date:20100126

Re: [Qemu-devel] [PATCH] Add definitions for current cpu models..

2010-01-26 Thread Gerd Hoffmann


On 01/25/10 23:35, Dor Laor wrote:

On 01/25/2010 04:21 PM, Anthony Liguori wrote:

Another way to look at this is that implementing a somewhat arbitrary
policy within QEMU's .c files is something we should try to avoid.
Implementing arbitrary policy in our default config file is a fine thing
to do. Default configs are suggested configurations that are modifiable
by a user. Something baked into QEMU is something that ought to work for

>

If we get the models right, users and mgmt stacks won't need to define
them. It seems like almost impossible task for us, mgmt stack/users
won't do a better job, the opposite I guess. The configs are great, I
have no argument against them, my case is that if we can pin down some
definitions, its better live in the code, like the above models.
It might even help to get the same cpus across the various vendors,
otherwise we might end up with IBM's core2duo, RH's core2duo, Suse's,..


I agree.  When looking at this thread and config file idea it feels a 
bit like "we have a hard time to agree on some sensible default cpu 
types, so lets make this configurable so we don't have to".  Which is a 
bad thing IMHO.


cheers,
  Gerd

[Qemu-devel] [PATCH v3 0/5] Reduce down time during migration without shared storage

2010-01-26 Thread Liran Schour

This series of patches reduce the down time of the guest during a migration
without shared storage. It does that by start transfer dirty blocks in the 
iterative phase. In the current code transferring of dirty blocks begins only 
during the full phase while the guest is suspended. Therefore the guest will 
be suspended linear to the amount of data that was written to disk during
migration.

Changes from v2: - don't duplicate code by using qemu_get_clock_ns() use Paolo 
   Bonzini  patch.
 - Coding style issues
 - Remove unused constants
Changes from v1: - infer storage performance by get_clock()
 - remove dirty max iterations, user is responsible for 
   migration convergence
 - remove trailing whitespaces
 - minor cleanups

 block-migration.c |  235 +---
 block.c   |   16 +++-
 block.h   |1 +
 block_int.h   |1 +
 qemu-timer.h  |1 +
 vl.c  |   21 +-
 6 files changed, 203 insertions(+), 72 deletions(-)

Signed-off-by: Liran Schour

[Qemu-devel] [PATCH v3 1/5] Remove unused code

2010-01-26 Thread Liran Schour

blk_mig_save_bulked_block is never called with sync flag. Remove the sync
flag. Calculate bulk completion during blk_mig_save_bulked_block.
Remove unused constants.

Signed-off-by: Liran Schour 
---
 block-migration.c |   61 +++-
 1 files changed, 18 insertions(+), 43 deletions(-)

diff --git a/block-migration.c b/block-migration.c
index 258a88a..93d86d9 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -26,9 +26,6 @@
 #define BLK_MIG_FLAG_PROGRESS   0x04
 
 #define MAX_IS_ALLOCATED_SEARCH 65536
-#define MAX_BLOCKS_READ 1
-#define BLOCKS_READ_CHANGE 100
-#define INITIAL_BLOCKS_READ 100
 
 //#define DEBUG_BLK_MIGRATION
 
@@ -72,6 +69,7 @@ typedef struct BlkMigState {
 int transferred;
 int64_t total_sector_sum;
 int prev_progress;
+int bulk_completed;
 } BlkMigState;
 
 static BlkMigState block_mig_state;
@@ -138,7 +136,7 @@ static void blk_mig_read_cb(void *opaque, int ret)
 }
 
 static int mig_save_device_bulk(Monitor *mon, QEMUFile *f,
-BlkMigDevState *bmds, int is_async)
+BlkMigDevState *bmds)
 {
 int64_t total_sectors = bmds->total_sectors;
 int64_t cur_sector = bmds->cur_sector;
@@ -175,27 +173,16 @@ static int mig_save_device_bulk(Monitor *mon, QEMUFile *f,
 blk->bmds = bmds;
 blk->sector = cur_sector;
 
-if (is_async) {
-blk->iov.iov_base = blk->buf;
-blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
-qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
-
-blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov,
-nr_sectors, blk_mig_read_cb, blk);
-if (!blk->aiocb) {
-goto error;
-}
-block_mig_state.submitted++;
-} else {
-if (bdrv_read(bs, cur_sector, blk->buf, nr_sectors) < 0) {
-goto error;
-}
-blk_send(f, blk);
+blk->iov.iov_base = blk->buf;
+blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
+qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
 
-qemu_free(blk->buf);
-qemu_free(blk);
+blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov,
+nr_sectors, blk_mig_read_cb, blk);
+if (!blk->aiocb) {
+goto error;
 }
-
+block_mig_state.submitted++;
 bdrv_reset_dirty(bs, cur_sector, nr_sectors);
 bmds->cur_sector = cur_sector + nr_sectors;
 
@@ -229,6 +216,7 @@ static void init_blk_migration(Monitor *mon, QEMUFile *f)
 block_mig_state.transferred = 0;
 block_mig_state.total_sector_sum = 0;
 block_mig_state.prev_progress = -1;
+block_mig_state.bulk_completed = 0;
 
 for (bs = bdrv_first; bs != NULL; bs = bs->next) {
 if (bs->type == BDRV_TYPE_HD) {
@@ -260,7 +248,7 @@ static void init_blk_migration(Monitor *mon, QEMUFile *f)
 }
 }
 
-static int blk_mig_save_bulked_block(Monitor *mon, QEMUFile *f, int is_async)
+static int blk_mig_save_bulked_block(Monitor *mon, QEMUFile *f)
 {
 int64_t completed_sector_sum = 0;
 BlkMigDevState *bmds;
@@ -269,7 +257,7 @@ static int blk_mig_save_bulked_block(Monitor *mon, QEMUFile 
*f, int is_async)
 
 QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
 if (bmds->bulk_completed == 0) {
-if (mig_save_device_bulk(mon, f, bmds, is_async) == 1) {
+if (mig_save_device_bulk(mon, f, bmds) == 1) {
 /* completed bulk section for this device */
 bmds->bulk_completed = 1;
 }
@@ -362,19 +350,7 @@ static void flush_blks(QEMUFile* f)
 
 static int is_stage2_completed(void)
 {
-BlkMigDevState *bmds;
-
-if (block_mig_state.submitted > 0) {
-return 0;
-}
-
-QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
-if (bmds->bulk_completed == 0) {
-return 0;
-}
-}
-
-return 1;
+return (block_mig_state.submitted == 0 && block_mig_state.bulk_completed);
 }
 
 static void blk_mig_cleanup(Monitor *mon)
@@ -432,8 +408,9 @@ static int block_save_live(Monitor *mon, QEMUFile *f, int 
stage, void *opaque)
 while ((block_mig_state.submitted +
 block_mig_state.read_done) * BLOCK_SIZE <
qemu_file_get_rate_limit(f)) {
-if (blk_mig_save_bulked_block(mon, f, 1) == 0) {
-/* no more bulk blocks for now */
+if (blk_mig_save_bulked_block(mon, f) == 0) {
+/* finished saving bulk on all devices */
+block_mig_state.bulk_completed = 1;
 break;
 }
 }
@@ -446,9 +423,7 @@ static int block_save_live(Monitor *mon, QEMUFile *f, int 
stage, void *opaque)
 }
 
 if (stage == 3) {
-while (blk_mig_save_bulked_block(mon, f, 0) != 0) {
-/* empty */
-}
+/* we know for sure that save bulk is completed */
 
 blk_mig_save_dirty_blocks(mon, f);
 blk_mig_cleanup(mon);
-- 
1.6.0.4

[Qemu-devel] [PATCH v3 5/5] Try not to exceed max downtime on stage3

2010-01-26 Thread Liran Schour

Move to stage3 only when remaining work can be done below max downtime.
Use qemu_get_clock_ns for measuring read performance.

Signed-off-by: Liran Schour 
---
 block-migration.c |   79 +++--
 1 files changed, 70 insertions(+), 9 deletions(-)

diff --git a/block-migration.c b/block-migration.c
index d8755d1..0e63596 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -15,8 +15,10 @@
 #include "block_int.h"
 #include "hw/hw.h"
 #include "qemu-queue.h"
+#include "qemu-timer.h"
 #include "monitor.h"
 #include "block-migration.h"
+#include "migration.h"
 #include 
 
 #define BLOCK_SIZE (BDRV_SECTORS_PER_DIRTY_CHUNK << BDRV_SECTOR_BITS)
@@ -57,6 +59,7 @@ typedef struct BlkMigBlock {
 QEMUIOVector qiov;
 BlockDriverAIOCB *aiocb;
 int ret;
+int64_t time;
 QSIMPLEQ_ENTRY(BlkMigBlock) entry;
 } BlkMigBlock;
 
@@ -71,7 +74,8 @@ typedef struct BlkMigState {
 int64_t total_sector_sum;
 int prev_progress;
 int bulk_completed;
-int dirty_iterations;
+long double total_time;
+int reads;
 } BlkMigState;
 
 static BlkMigState block_mig_state;
@@ -124,12 +128,28 @@ uint64_t blk_mig_bytes_total(void)
 return sum << BDRV_SECTOR_BITS;
 }
 
+static inline void add_avg_read_time(int64_t time)
+{
+block_mig_state.reads++;
+block_mig_state.total_time += time;
+}
+
+static inline long double compute_read_bwidth(void)
+{
+assert(block_mig_state.total_time != 0);
+return  (block_mig_state.reads * BLOCK_SIZE)/ block_mig_state.total_time;
+}
+
 static void blk_mig_read_cb(void *opaque, int ret)
 {
 BlkMigBlock *blk = opaque;
 
 blk->ret = ret;
 
+blk->time = qemu_get_clock_ns(rt_clock) - blk->time;
+
+add_avg_read_time(blk->time);
+
 QSIMPLEQ_INSERT_TAIL(&block_mig_state.blk_list, blk, entry);
 
 block_mig_state.submitted--;
@@ -179,6 +199,8 @@ static int mig_save_device_bulk(Monitor *mon, QEMUFile *f,
 blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
 qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
 
+blk->time = qemu_get_clock_ns(rt_clock);
+
 blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov,
 nr_sectors, blk_mig_read_cb, blk);
 if (!blk->aiocb) {
@@ -220,6 +242,8 @@ static void init_blk_migration(Monitor *mon, QEMUFile *f)
 block_mig_state.total_sector_sum = 0;
 block_mig_state.prev_progress = -1;
 block_mig_state.bulk_completed = 0;
+block_mig_state.total_time = 0;
+block_mig_state.reads = 0;
 
 for (bs = bdrv_first; bs != NULL; bs = bs->next) {
 if (bs->type == BDRV_TYPE_HD) {
@@ -314,11 +338,13 @@ static int mig_save_device_dirty(Monitor *mon, QEMUFile 
*f,
 blk->bmds = bmds;
 blk->sector = sector;
 
-if(is_async) {
+if (is_async) {
 blk->iov.iov_base = blk->buf;
 blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
 qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
 
+   blk->time = qemu_get_clock_ns(rt_clock);
+
 blk->aiocb = bdrv_aio_readv(bmds->bs, sector, &blk->qiov,
 nr_sectors, blk_mig_read_cb, blk);
 if (!blk->aiocb) {
@@ -345,7 +371,7 @@ static int mig_save_device_dirty(Monitor *mon, QEMUFile *f,
 
 return (bmds->cur_dirty >= bmds->total_sectors);
 
- error:
+error:
 monitor_printf(mon, "Error reading sector %" PRId64 "\n", sector);
 qemu_file_set_error(f);
 qemu_free(blk->buf);
@@ -359,7 +385,7 @@ static int blk_mig_save_dirty_block(Monitor *mon, QEMUFile 
*f, int is_async)
 int ret = 0;
 
 QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
-if(mig_save_device_dirty(mon, f, bmds, is_async) == 0) {
+if (mig_save_device_dirty(mon, f, bmds, is_async) == 0) {
 ret = 1;
 break;
 }
@@ -400,9 +426,42 @@ static void flush_blks(QEMUFile* f)
 block_mig_state.transferred);
 }
 
+static int64_t get_remaining_dirty(void)
+{
+BlkMigDevState *bmds;
+int64_t dirty = 0;
+
+QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
+dirty += bdrv_get_dirty_count(bmds->bs);
+}
+
+return dirty * BLOCK_SIZE;
+}
+
 static int is_stage2_completed(void)
 {
-return (block_mig_state.submitted == 0 && block_mig_state.bulk_completed);
+int64_t remaining_dirty;
+long double bwidth;
+
+if (block_mig_state.bulk_completed == 1) {
+
+remaining_dirty = get_remaining_dirty();
+   if (remaining_dirty == 0) {
+   return 1;
+   }
+
+   bwidth = compute_read_bwidth();
+
+   if ((remaining_dirty / bwidth) <=
+migrate_max_downtime()) {
+/* finish stage2 because we think that we can finish remaing work
+   below max_downtime */
+
+return 1;
+}
+}
+
+return 0;
 }
 
 static void blk_mig_cleanup(Monitor *mon)
@@ -458,7 +51

[Qemu-devel] [PATCH v3 4/5] Count dirty blocks and expose an API to get dirty count

2010-01-26 Thread Liran Schour

This will manage dirty counter for each device and will allow to get the
dirty counter from above.

Signed-off-by: Liran Schour 
---
 block.c |   16 ++--
 block.h |1 +
 block_int.h |1 +
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/block.c b/block.c
index 30ae2b1..a6381ad 100644
--- a/block.c
+++ b/block.c
@@ -653,9 +653,15 @@ static void set_dirty_bitmap(BlockDriverState *bs, int64_t 
sector_num,
 bit = start % (sizeof(unsigned long) * 8);
 val = bs->dirty_bitmap[idx];
 if (dirty) {
-val |= 1 << bit;
+if (!(val & (1 << bit))) {
+bs->dirty_count++;
+val |= 1 << bit;
+}
 } else {
-val &= ~(1 << bit);
+if (val & (1 << bit)) {
+bs->dirty_count--;
+val &= ~(1 << bit);
+}
 }
 bs->dirty_bitmap[idx] = val;
 }
@@ -2116,6 +2122,7 @@ void bdrv_set_dirty_tracking(BlockDriverState *bs, int 
enable)
 {
 int64_t bitmap_size;
 
+bs->dirty_count = 0;
 if (enable) {
 if (!bs->dirty_bitmap) {
 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
@@ -2150,3 +2157,8 @@ void bdrv_reset_dirty(BlockDriverState *bs, int64_t 
cur_sector,
 {
 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
 }
+
+int64_t bdrv_get_dirty_count(BlockDriverState *bs)
+{
+return bs->dirty_count;
+}
diff --git a/block.h b/block.h
index fa51ddf..1012303 100644
--- a/block.h
+++ b/block.h
@@ -201,4 +201,5 @@ void bdrv_set_dirty_tracking(BlockDriverState *bs, int 
enable);
 int bdrv_get_dirty(BlockDriverState *bs, int64_t sector);
 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
   int nr_sectors);
+int64_t bdrv_get_dirty_count(BlockDriverState *bs);
 #endif
diff --git a/block_int.h b/block_int.h
index 9a3b2e0..8d5d9bc 100644
--- a/block_int.h
+++ b/block_int.h
@@ -172,6 +172,7 @@ struct BlockDriverState {
 int type;
 char device_name[32];
 unsigned long *dirty_bitmap;
+int64_t dirty_count;
 BlockDriverState *next;
 void *private;
 };
-- 
1.6.0.4

[Qemu-devel] [PATCH v3 2/5] add qemu_get_clock_ns

2010-01-26 Thread Liran Schour

From: Paolo Bonzini 

Some places use get_clock directly because they want to access the
rt_clock with nanosecond precision.  Add a function to do exactly that
instead of using internal interfaces.

Signed-off-by: Paolo Bonzini 
---
 qemu-timer.h |1 +
 vl.c |   21 +++--
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/qemu-timer.h b/qemu-timer.h
index e7eaa04..c17b4e6 100644
--- a/qemu-timer.h
+++ b/qemu-timer.h
@@ -25,6 +25,7 @@ extern QEMUClock *vm_clock;
 extern QEMUClock *host_clock;
 
 int64_t qemu_get_clock(QEMUClock *clock);
+int64_t qemu_get_clock_ns(QEMUClock *clock);
 
 QEMUTimer *qemu_new_timer(QEMUClock *clock, QEMUTimerCB *cb, void *opaque);
 void qemu_free_timer(QEMUTimer *ts);
diff --git a/vl.c b/vl.c
index e881e45..c5cd462 100644
--- a/vl.c
+++ b/vl.c
@@ -1131,6 +1131,23 @@ int64_t qemu_get_clock(QEMUClock *clock)
 }
 }
 
+int64_t qemu_get_clock_ns(QEMUClock *clock)
+{
+switch(clock->type) {
+case QEMU_CLOCK_REALTIME:
+return get_clock();
+default:
+case QEMU_CLOCK_VIRTUAL:
+if (use_icount) {
+return cpu_get_icount();
+} else {
+return cpu_get_clock();
+}
+case QEMU_CLOCK_HOST:
+return get_clock_realtime();
+}
+}
+
 static void init_clocks(void)
 {
 init_get_clock();
@@ -3063,7 +3080,7 @@ static int ram_save_live(Monitor *mon, QEMUFile *f, int 
stage, void *opaque)
 }
 
 bytes_transferred_last = bytes_transferred;
-bwidth = get_clock();
+bwidth = qemu_get_clock_ns(rt_clock);
 
 while (!qemu_file_rate_limit(f)) {
 int ret;
@@ -3074,7 +3091,7 @@ static int ram_save_live(Monitor *mon, QEMUFile *f, int 
stage, void *opaque)
 break;
 }
 
-bwidth = get_clock() - bwidth;
+bwidth = qemu_get_clock_ns(rt_clock) - bwidth;
 bwidth = (bytes_transferred - bytes_transferred_last) / bwidth;
 
 /* if we haven't transferred anything this round, force expected_time to a
-- 
1.6.0.4

Re: [Qemu-devel] Re: [PATCH 1/3] Support --sysconfdir in configure to specify path to configuration files (v3)

2010-01-26 Thread Avi Kivity


On 01/25/2010 11:05 PM, Paolo Bonzini wrote:



I'm not sure about the choice for Windows. Do we want possibly a
dozen of .conf files all in the same directory as the binaries, or
maybe it's better to set sysconfdir = ${prefix}/conf,
confdir=${sysconfdir} on Windows?


I honestly don't know. What's the normal thing to do with Windows?


The registry, I think.


The registry would be used indeed to get the path or to override 
defaults.  However, what would be the default value (written in the 
registry by the installer, or used by the program if the registry 
value is absent)?


You could use the registry to hold the defaults, not a path to the defaults.

--
error compiling committee.c: too many arguments to function

[Qemu-devel] Re: KVM call agenda for Jan 26

2010-01-26 Thread Alexander Graf


On 26.01.2010, at 07:49, Chris Wright wrote:

> Please send in any agenda items you are interested in covering.

KVM Hardware Inquiry Tool

One of the things I have on my todo list is a tool you can run on your machine 
that tells you which virtualization features it supports. Imaginary output of 
such a tool:

--

KVM Supported: yes
NPT/EPT: yes
Device Assignment: no

Expected Virtual CPU Speed: 95%

--

That way users can easily determine what to expect when they run KVM on a 
machine without need to know about CPUID flags that don't even get exposed in 
/proc/cpuinfo or grepping dmesg.

My main question on this one is how to best implement it.

Should this be part of qemu? We'll need some architecture specific backend 
code, so leveraging the structure might be helpful.
Should this be a separate script? That'd mean installing one more application 
that distros might name differently :(.
Does it even have chances to get accepted upstream?


Alex

Re: [Qemu-devel] [PATCH] block: prevent multiwrite_merge from creating too large iovecs

2010-01-26 Thread Christoph Hellwig

On Wed, Jan 20, 2010 at 12:37:51PM +0100, Kevin Wolf wrote:
> To underline that it's a backend/platform dependent thing: Your patch
> breaks the mingw build for me.

Actually that's because mingw is the usual piece of crap and doesn't
actually have any of the vector support you can expect from a normal
Unix system.

I can either throw in an #ifdef IOV_MAX around the check or fake one up
for mingw.  Does any of the maintainers have a preference for either
variant?

[Qemu-devel] [PATCH v2][uqmaster] kvm: Flush coalesced MMIO buffer periodly

2010-01-26 Thread Sheng Yang

The default action of coalesced MMIO is, cache the writing in buffer, until:
1. The buffer is full.
2. Or the exit to QEmu due to other reasons.

But this would result in a very late writing in some condition.
1. The each time write to MMIO content is small.
2. The writing interval is big.
3. No need for input or accessing other devices frequently.

This issue was observed in a experimental embbed system. The test image
simply print "test" every 1 seconds. The output in QEmu meets expectation,
but the output in KVM is delayed for seconds.

Per Avi's suggestion, I hooked flushing coalesced MMIO buffer in VGA update
handler. By this way, We don't need vcpu explicit exit to QEmu to
handle this issue.

Signed-off-by: Sheng Yang 
---
 cpu-all.h |2 ++
 exec.c|6 ++
 kvm-all.c |   21 +
 kvm.h |1 +
 vl.c  |2 ++
 5 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/cpu-all.h b/cpu-all.h
index 57b69f8..1ccc9a8 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -915,6 +915,8 @@ void qemu_register_coalesced_mmio(target_phys_addr_t addr, 
ram_addr_t size);
 
 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size);
 
+void qemu_flush_coalesced_mmio_buffer(void);
+
 /***/
 /* host CPU ticks (if available) */
 
diff --git a/exec.c b/exec.c
index 1190591..6875370 100644
--- a/exec.c
+++ b/exec.c
@@ -2406,6 +2406,12 @@ void qemu_unregister_coalesced_mmio(target_phys_addr_t 
addr, ram_addr_t size)
 kvm_uncoalesce_mmio_region(addr, size);
 }
 
+void qemu_flush_coalesced_mmio_buffer(void)
+{
+if (kvm_enabled())
+kvm_flush_coalesced_mmio_buffer();
+}
+
 ram_addr_t qemu_ram_alloc(ram_addr_t size)
 {
 RAMBlock *new_block;
diff --git a/kvm-all.c b/kvm-all.c
index 15ec38e..889fc42 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -59,6 +59,7 @@ struct KVMState
 int vmfd;
 int regs_modified;
 int coalesced_mmio;
+struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
 int broken_set_mem_region;
 int migration_log;
 int vcpu_events;
@@ -200,6 +201,12 @@ int kvm_init_vcpu(CPUState *env)
 goto err;
 }
 
+#ifdef KVM_CAP_COALESCED_MMIO
+if (s->coalesced_mmio && !s->coalesced_mmio_ring)
+s->coalesced_mmio_ring = (void *) env->kvm_run +
+   s->coalesced_mmio * PAGE_SIZE;
+#endif
+
 ret = kvm_arch_init_vcpu(env);
 if (ret == 0) {
 qemu_register_reset(kvm_reset_vcpu, env);
@@ -466,10 +473,10 @@ int kvm_init(int smp_cpus)
 goto err;
 }
 
+s->coalesced_mmio = 0;
+s->coalesced_mmio_ring = NULL;
 #ifdef KVM_CAP_COALESCED_MMIO
 s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO);
-#else
-s->coalesced_mmio = 0;
 #endif
 
 s->broken_set_mem_region = 1;
@@ -544,14 +551,12 @@ static int kvm_handle_io(uint16_t port, void *data, int 
direction, int size,
 return 1;
 }
 
-static void kvm_run_coalesced_mmio(CPUState *env, struct kvm_run *run)
+void kvm_flush_coalesced_mmio_buffer(void)
 {
 #ifdef KVM_CAP_COALESCED_MMIO
 KVMState *s = kvm_state;
-if (s->coalesced_mmio) {
-struct kvm_coalesced_mmio_ring *ring;
-
-ring = (void *)run + (s->coalesced_mmio * TARGET_PAGE_SIZE);
+if (s->coalesced_mmio_ring) {
+struct kvm_coalesced_mmio_ring *ring = s->coalesced_mmio_ring;
 while (ring->first != ring->last) {
 struct kvm_coalesced_mmio *ent;
 
@@ -609,7 +614,7 @@ int kvm_cpu_exec(CPUState *env)
 abort();
 }
 
-kvm_run_coalesced_mmio(env, run);
+kvm_flush_coalesced_mmio_buffer();
 
 ret = 0; /* exit loop */
 switch (run->exit_reason) {
diff --git a/kvm.h b/kvm.h
index 1c93ac5..59cba18 100644
--- a/kvm.h
+++ b/kvm.h
@@ -53,6 +53,7 @@ void kvm_setup_guest_memory(void *start, size_t size);
 
 int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size);
 int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size);
+void kvm_flush_coalesced_mmio_buffer(void);
 
 int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
   target_ulong len, int type);
diff --git a/vl.c b/vl.c
index 2b0b653..1f0c536 100644
--- a/vl.c
+++ b/vl.c
@@ -3193,6 +3193,7 @@ static void gui_update(void *opaque)
 DisplayState *ds = opaque;
 DisplayChangeListener *dcl = ds->listeners;
 
+qemu_flush_coalesced_mmio_buffer();
 dpy_refresh(ds);
 
 while (dcl != NULL) {
@@ -3208,6 +3209,7 @@ static void nographic_update(void *opaque)
 {
 uint64_t interval = GUI_REFRESH_INTERVAL;
 
+qemu_flush_coalesced_mmio_buffer();
 qemu_mod_timer(nographic_timer, interval + qemu_get_clock(rt_clock));
 }
 
-- 
1.5.4.5

[Qemu-devel] Re: [PATCH v2][uqmaster] kvm: Flush coalesced MMIO buffer periodly

2010-01-26 Thread Alexander Graf


On 26.01.2010, at 10:41, Sheng Yang wrote:

> The default action of coalesced MMIO is, cache the writing in buffer, until:
> 1. The buffer is full.
> 2. Or the exit to QEmu due to other reasons.
> 
> But this would result in a very late writing in some condition.
> 1. The each time write to MMIO content is small.
> 2. The writing interval is big.
> 3. No need for input or accessing other devices frequently.
> 
> This issue was observed in a experimental embbed system. The test image
> simply print "test" every 1 seconds. The output in QEmu meets expectation,
> but the output in KVM is delayed for seconds.
> 
> Per Avi's suggestion, I hooked flushing coalesced MMIO buffer in VGA update
> handler. By this way, We don't need vcpu explicit exit to QEmu to
> handle this issue.
> 
> Signed-off-by: Sheng Yang 
> ---
> cpu-all.h |2 ++
> exec.c|6 ++
> kvm-all.c |   21 +
> kvm.h |1 +
> vl.c  |2 ++
> 5 files changed, 24 insertions(+), 8 deletions(-)
> 
> diff --git a/cpu-all.h b/cpu-all.h
> index 57b69f8..1ccc9a8 100644
> --- a/cpu-all.h
> +++ b/cpu-all.h
> @@ -915,6 +915,8 @@ void qemu_register_coalesced_mmio(target_phys_addr_t 
> addr, ram_addr_t size);
> 
> void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size);
> 
> +void qemu_flush_coalesced_mmio_buffer(void);
> +
> /***/
> /* host CPU ticks (if available) */
> 
> diff --git a/exec.c b/exec.c
> index 1190591..6875370 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -2406,6 +2406,12 @@ void qemu_unregister_coalesced_mmio(target_phys_addr_t 
> addr, ram_addr_t size)
> kvm_uncoalesce_mmio_region(addr, size);
> }
> 
> +void qemu_flush_coalesced_mmio_buffer(void)
> +{
> +if (kvm_enabled())
> +kvm_flush_coalesced_mmio_buffer();
> +}
> +
> ram_addr_t qemu_ram_alloc(ram_addr_t size)
> {
> RAMBlock *new_block;
> diff --git a/kvm-all.c b/kvm-all.c
> index 15ec38e..889fc42 100644
> --- a/kvm-all.c
> +++ b/kvm-all.c
> @@ -59,6 +59,7 @@ struct KVMState
> int vmfd;
> int regs_modified;
> int coalesced_mmio;
> +struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;

I guess this needs to be guarded by an #ifdef?


Alex

[Qemu-devel] Re: [PATCH v2][uqmaster] kvm: Flush coalesced MMIO buffer periodly

2010-01-26 Thread Sheng Yang

On Tue, Jan 26, 2010 at 10:59:17AM +0100, Alexander Graf wrote:
> 
> On 26.01.2010, at 10:41, Sheng Yang wrote:
> 
> > --- a/kvm-all.c
> > +++ b/kvm-all.c
> > @@ -59,6 +59,7 @@ struct KVMState
> > int vmfd;
> > int regs_modified;
> > int coalesced_mmio;
> > +struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
> 
> I guess this needs to be guarded by an #ifdef?

Oh, yes. Thanks for reminder. :)

-- 
regards
Yang, Sheng

[Qemu-devel] [PATCH v3][uqmaster] kvm: Flush coalesced MMIO buffer periodly

2010-01-26 Thread Sheng Yang

The default action of coalesced MMIO is, cache the writing in buffer, until:
1. The buffer is full.
2. Or the exit to QEmu due to other reasons.

But this would result in a very late writing in some condition.
1. The each time write to MMIO content is small.
2. The writing interval is big.
3. No need for input or accessing other devices frequently.

This issue was observed in a experimental embbed system. The test image
simply print "test" every 1 seconds. The output in QEmu meets expectation,
but the output in KVM is delayed for seconds.

Per Avi's suggestion, I hooked flushing coalesced MMIO buffer in VGA update
handler. By this way, We don't need vcpu explicit exit to QEmu to
handle this issue.

Signed-off-by: Sheng Yang 
---
 cpu-all.h |2 ++
 exec.c|6 ++
 kvm-all.c |   23 +++
 kvm.h |1 +
 vl.c  |2 ++
 5 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/cpu-all.h b/cpu-all.h
index 57b69f8..1ccc9a8 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -915,6 +915,8 @@ void qemu_register_coalesced_mmio(target_phys_addr_t addr, 
ram_addr_t size);
 
 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size);
 
+void qemu_flush_coalesced_mmio_buffer(void);
+
 /***/
 /* host CPU ticks (if available) */
 
diff --git a/exec.c b/exec.c
index 1190591..6875370 100644
--- a/exec.c
+++ b/exec.c
@@ -2406,6 +2406,12 @@ void qemu_unregister_coalesced_mmio(target_phys_addr_t 
addr, ram_addr_t size)
 kvm_uncoalesce_mmio_region(addr, size);
 }
 
+void qemu_flush_coalesced_mmio_buffer(void)
+{
+if (kvm_enabled())
+kvm_flush_coalesced_mmio_buffer();
+}
+
 ram_addr_t qemu_ram_alloc(ram_addr_t size)
 {
 RAMBlock *new_block;
diff --git a/kvm-all.c b/kvm-all.c
index 15ec38e..f8350c9 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -59,6 +59,9 @@ struct KVMState
 int vmfd;
 int regs_modified;
 int coalesced_mmio;
+#ifdef KVM_CAP_COALESCED_MMIO
+struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
+#endif
 int broken_set_mem_region;
 int migration_log;
 int vcpu_events;
@@ -200,6 +203,12 @@ int kvm_init_vcpu(CPUState *env)
 goto err;
 }
 
+#ifdef KVM_CAP_COALESCED_MMIO
+if (s->coalesced_mmio && !s->coalesced_mmio_ring)
+s->coalesced_mmio_ring = (void *) env->kvm_run +
+   s->coalesced_mmio * PAGE_SIZE;
+#endif
+
 ret = kvm_arch_init_vcpu(env);
 if (ret == 0) {
 qemu_register_reset(kvm_reset_vcpu, env);
@@ -466,10 +475,10 @@ int kvm_init(int smp_cpus)
 goto err;
 }
 
+s->coalesced_mmio = 0;
 #ifdef KVM_CAP_COALESCED_MMIO
 s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO);
-#else
-s->coalesced_mmio = 0;
+s->coalesced_mmio_ring = NULL;
 #endif
 
 s->broken_set_mem_region = 1;
@@ -544,14 +553,12 @@ static int kvm_handle_io(uint16_t port, void *data, int 
direction, int size,
 return 1;
 }
 
-static void kvm_run_coalesced_mmio(CPUState *env, struct kvm_run *run)
+void kvm_flush_coalesced_mmio_buffer(void)
 {
 #ifdef KVM_CAP_COALESCED_MMIO
 KVMState *s = kvm_state;
-if (s->coalesced_mmio) {
-struct kvm_coalesced_mmio_ring *ring;
-
-ring = (void *)run + (s->coalesced_mmio * TARGET_PAGE_SIZE);
+if (s->coalesced_mmio_ring) {
+struct kvm_coalesced_mmio_ring *ring = s->coalesced_mmio_ring;
 while (ring->first != ring->last) {
 struct kvm_coalesced_mmio *ent;
 
@@ -609,7 +616,7 @@ int kvm_cpu_exec(CPUState *env)
 abort();
 }
 
-kvm_run_coalesced_mmio(env, run);
+kvm_flush_coalesced_mmio_buffer();
 
 ret = 0; /* exit loop */
 switch (run->exit_reason) {
diff --git a/kvm.h b/kvm.h
index 1c93ac5..59cba18 100644
--- a/kvm.h
+++ b/kvm.h
@@ -53,6 +53,7 @@ void kvm_setup_guest_memory(void *start, size_t size);
 
 int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size);
 int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size);
+void kvm_flush_coalesced_mmio_buffer(void);
 
 int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
   target_ulong len, int type);
diff --git a/vl.c b/vl.c
index 2b0b653..1f0c536 100644
--- a/vl.c
+++ b/vl.c
@@ -3193,6 +3193,7 @@ static void gui_update(void *opaque)
 DisplayState *ds = opaque;
 DisplayChangeListener *dcl = ds->listeners;
 
+qemu_flush_coalesced_mmio_buffer();
 dpy_refresh(ds);
 
 while (dcl != NULL) {
@@ -3208,6 +3209,7 @@ static void nographic_update(void *opaque)
 {
 uint64_t interval = GUI_REFRESH_INTERVAL;
 
+qemu_flush_coalesced_mmio_buffer();
 qemu_mod_timer(nographic_timer, interval + qemu_get_clock(rt_clock));
 }
 
-- 
1.5.4.5

Re: [Qemu-devel] Re: [PATCH] win32: use PRId64 instead of %lld

2010-01-26 Thread Luiz Capitulino

On Mon, 25 Jan 2010 17:38:38 +0200
"Michael S. Tsirkin"  wrote:

> On Mon, Jan 25, 2010 at 01:27:19PM -0200, Luiz Capitulino wrote:
> > On Mon, 25 Jan 2010 15:35:53 +0100
> > Markus Armbruster  wrote:
> > 
> > > Luiz Capitulino  writes:
> > > 
> > > > On Mon, 25 Jan 2010 12:09:06 +0200
> > > > "Michael S. Tsirkin"  wrote:
> > > [...]
> > > >> 
> > > >> Finally,
> > > >> don't we want unsigned values in protocol?
> > > >
> > > >  JSON doesn't support them.
> > > 
> > > Uh, where does the RFC say that?
> > 
> >  I see that my comment was misleading.
> > 
> >  In JSON we don't have unsigned types, we have only a type
> > called 'number' to represent them all.
> > 
> >  Unsigneds should be handled correctly, except for uint64_t which
> > is cast to int64_t.
> > 
> >  Michael, does this answer your question?
> > Is there any
> > issue with the handling of unsigneds I'm not aware about?
> 
> The issue I see isn't related to unsigned.  Apparently we currently
> accept values such as 'a' as valid strings. Since this is not valid json
> we probably should reject it just in case we will want to switch to
> another json library, otherwise clients might come to depend on
> non-standard behaviour.

 This extension is only used internally by QEMU and we find it
very convenient otherwise we would have to escape strings in
dicts and lists, which is error prone and time consuming.

Re: [Qemu-devel] [RFC 00/11]: QMP feature negotiation support

2010-01-26 Thread Luiz Capitulino

On Mon, 25 Jan 2010 15:33:40 +0100
Markus Armbruster  wrote:

> Anthony Liguori  writes:
> 
> > On 01/21/2010 03:09 PM, Luiz Capitulino wrote:
> >> """
> >> {"QMP": {"capabilities": ["async messages"]}}
> >>
> >> { "execute": "query-qmp-mode" }
> >> {"return": {"mode": "handshake"}}
> >>
> >> { "execute": "change", "arguments": { "device": "vnc", "target": 
> >> "password", "arg": "1234" } }
> >> {"error": {"class": "QMPInvalidModeCommad", "desc": "The issued command is 
> >> invalid in this mode", "data": {}}}
> >>
> >> { "execute": "async_msg_enable", "arguments": { "name": "STOP" } }
> >> {"return": {}}
> >>
> >
> > Maybe:
> >
> > enable-capability "async messages"
> > disable-capability "async messages"
> >
> > I think that's a bit more obvious and it means that a client doesn't
> > have to maintain a mapping of features -> enable functions.  It's also
> > strange to use an enable command to disable something.
> 
> Agree on both counts.  But why two commands?  Why not simply "capability
> NAME VALUE"?  Works even for non-boolean capabilities.  I'm not
> predicting we'll need such capabilities.

 I slightly prefer two commands because that's probably how I'd
write it in a program (ie. two functions), also enabling/disabling
a group of features is a bit easier too, as we can use an array:

capability_enable [ "foo", "bar" ]

 Now, only one command is not terrible difficult, but we would
have to accept an array of objects, like:

[ { "name": "foo", "enabled": true }, { "name": "bar", "enabled": true } ]

[Qemu-devel] [PATCH v3 3/5] Tranfer dirty blocks during iterative phase

2010-01-26 Thread Liran Schour

Start transfer dirty blocks during the iterative stage. That will
reduce the time that the guest will be suspended

Signed-off-by: Liran Schour 
---
 block-migration.c |  135 +++--
 1 files changed, 99 insertions(+), 36 deletions(-)

diff --git a/block-migration.c b/block-migration.c
index 93d86d9..d8755d1 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -42,6 +42,7 @@ typedef struct BlkMigDevState {
 int bulk_completed;
 int shared_base;
 int64_t cur_sector;
+int64_t cur_dirty;
 int64_t completed_sectors;
 int64_t total_sectors;
 int64_t dirty;
@@ -70,6 +71,7 @@ typedef struct BlkMigState {
 int64_t total_sector_sum;
 int prev_progress;
 int bulk_completed;
+int dirty_iterations;
 } BlkMigState;
 
 static BlkMigState block_mig_state;
@@ -183,6 +185,7 @@ static int mig_save_device_bulk(Monitor *mon, QEMUFile *f,
 goto error;
 }
 block_mig_state.submitted++;
+
 bdrv_reset_dirty(bs, cur_sector, nr_sectors);
 bmds->cur_sector = cur_sector + nr_sectors;
 
@@ -281,39 +284,88 @@ static int blk_mig_save_bulked_block(Monitor *mon, 
QEMUFile *f)
 return ret;
 }
 
-#define MAX_NUM_BLOCKS 4
-
-static void blk_mig_save_dirty_blocks(Monitor *mon, QEMUFile *f)
+static void blk_mig_reset_dirty_cursor(void)
 {
 BlkMigDevState *bmds;
-BlkMigBlock blk;
+
+QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
+bmds->cur_dirty = 0;
+}
+}
+
+static int mig_save_device_dirty(Monitor *mon, QEMUFile *f,
+ BlkMigDevState *bmds, int is_async)
+{
+BlkMigBlock *blk;
+int64_t total_sectors = bmds->total_sectors;
 int64_t sector;
+int nr_sectors;
 
-blk.buf = qemu_malloc(BLOCK_SIZE);
+for (sector = bmds->cur_dirty; sector < bmds->total_sectors;) {
+if (bdrv_get_dirty(bmds->bs, sector)) {
 
-QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
-for (sector = 0; sector < bmds->cur_sector;) {
-if (bdrv_get_dirty(bmds->bs, sector)) {
-if (bdrv_read(bmds->bs, sector, blk.buf,
-  BDRV_SECTORS_PER_DIRTY_CHUNK) < 0) {
-monitor_printf(mon, "Error reading sector %" PRId64 "\n",
-   sector);
-qemu_file_set_error(f);
-qemu_free(blk.buf);
-return;
+if (total_sectors - sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
+nr_sectors = total_sectors - sector;
+} else {
+nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
+}
+blk = qemu_malloc(sizeof(BlkMigBlock));
+blk->buf = qemu_malloc(BLOCK_SIZE);
+blk->bmds = bmds;
+blk->sector = sector;
+
+if(is_async) {
+blk->iov.iov_base = blk->buf;
+blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
+qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
+
+blk->aiocb = bdrv_aio_readv(bmds->bs, sector, &blk->qiov,
+nr_sectors, blk_mig_read_cb, blk);
+if (!blk->aiocb) {
+goto error;
+}
+block_mig_state.submitted++;
+} else {
+if (bdrv_read(bmds->bs, sector, blk->buf,
+  nr_sectors) < 0) {
+goto error;
 }
-blk.bmds = bmds;
-blk.sector = sector;
-blk_send(f, &blk);
+blk_send(f, blk);
 
-bdrv_reset_dirty(bmds->bs, sector,
- BDRV_SECTORS_PER_DIRTY_CHUNK);
+qemu_free(blk->buf);
+qemu_free(blk);
 }
-sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
+
+bdrv_reset_dirty(bmds->bs, sector, nr_sectors);
+break;
 }
+sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
+bmds->cur_dirty = sector;
 }
 
-qemu_free(blk.buf);
+return (bmds->cur_dirty >= bmds->total_sectors);
+
+ error:
+monitor_printf(mon, "Error reading sector %" PRId64 "\n", sector);
+qemu_file_set_error(f);
+qemu_free(blk->buf);
+qemu_free(blk);
+return 0;
+}
+
+static int blk_mig_save_dirty_block(Monitor *mon, QEMUFile *f, int is_async)
+{
+BlkMigDevState *bmds;
+int ret = 0;
+
+QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
+if(mig_save_device_dirty(mon, f, bmds, is_async) == 0) {
+ret = 1;
+break;
+}
+}
+
+return ret;
 }
 
 static void flush_blks(QEMUFile* f)
@@ -404,28 +456,39 @@ static int block_save_live(Monitor *mon, QEMUFile *f, int 
stage, void *opaque)
 return 0;
 }
 
-/* control the rate of transfer */
-while ((block_mig_state.submitted +
-block_mig_state.re

[Qemu-devel] [PATCH v3 3/5] Tranfer dirty blocks during iterative phase

2010-01-26 Thread Liran Schour

Start transfer dirty blocks during the iterative stage. That will
reduce the time that the guest will be suspended

Signed-off-by: Liran Schour 
---
 block-migration.c |  135 +++--
 1 files changed, 99 insertions(+), 36 deletions(-)

diff --git a/block-migration.c b/block-migration.c
index 93d86d9..d8755d1 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -42,6 +42,7 @@ typedef struct BlkMigDevState {
 int bulk_completed;
 int shared_base;
 int64_t cur_sector;
+int64_t cur_dirty;
 int64_t completed_sectors;
 int64_t total_sectors;
 int64_t dirty;
@@ -70,6 +71,7 @@ typedef struct BlkMigState {
 int64_t total_sector_sum;
 int prev_progress;
 int bulk_completed;
+int dirty_iterations;
 } BlkMigState;
 
 static BlkMigState block_mig_state;
@@ -183,6 +185,7 @@ static int mig_save_device_bulk(Monitor *mon, QEMUFile *f,
 goto error;
 }
 block_mig_state.submitted++;
+
 bdrv_reset_dirty(bs, cur_sector, nr_sectors);
 bmds->cur_sector = cur_sector + nr_sectors;
 
@@ -281,39 +284,88 @@ static int blk_mig_save_bulked_block(Monitor *mon, 
QEMUFile *f)
 return ret;
 }
 
-#define MAX_NUM_BLOCKS 4
-
-static void blk_mig_save_dirty_blocks(Monitor *mon, QEMUFile *f)
+static void blk_mig_reset_dirty_cursor(void)
 {
 BlkMigDevState *bmds;
-BlkMigBlock blk;
+
+QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
+bmds->cur_dirty = 0;
+}
+}
+
+static int mig_save_device_dirty(Monitor *mon, QEMUFile *f,
+ BlkMigDevState *bmds, int is_async)
+{
+BlkMigBlock *blk;
+int64_t total_sectors = bmds->total_sectors;
 int64_t sector;
+int nr_sectors;
 
-blk.buf = qemu_malloc(BLOCK_SIZE);
+for (sector = bmds->cur_dirty; sector < bmds->total_sectors;) {
+if (bdrv_get_dirty(bmds->bs, sector)) {
 
-QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
-for (sector = 0; sector < bmds->cur_sector;) {
-if (bdrv_get_dirty(bmds->bs, sector)) {
-if (bdrv_read(bmds->bs, sector, blk.buf,
-  BDRV_SECTORS_PER_DIRTY_CHUNK) < 0) {
-monitor_printf(mon, "Error reading sector %" PRId64 "\n",
-   sector);
-qemu_file_set_error(f);
-qemu_free(blk.buf);
-return;
+if (total_sectors - sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
+nr_sectors = total_sectors - sector;
+} else {
+nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
+}
+blk = qemu_malloc(sizeof(BlkMigBlock));
+blk->buf = qemu_malloc(BLOCK_SIZE);
+blk->bmds = bmds;
+blk->sector = sector;
+
+if(is_async) {
+blk->iov.iov_base = blk->buf;
+blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
+qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
+
+blk->aiocb = bdrv_aio_readv(bmds->bs, sector, &blk->qiov,
+nr_sectors, blk_mig_read_cb, blk);
+if (!blk->aiocb) {
+goto error;
+}
+block_mig_state.submitted++;
+} else {
+if (bdrv_read(bmds->bs, sector, blk->buf,
+  nr_sectors) < 0) {
+goto error;
 }
-blk.bmds = bmds;
-blk.sector = sector;
-blk_send(f, &blk);
+blk_send(f, blk);
 
-bdrv_reset_dirty(bmds->bs, sector,
- BDRV_SECTORS_PER_DIRTY_CHUNK);
+qemu_free(blk->buf);
+qemu_free(blk);
 }
-sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
+
+bdrv_reset_dirty(bmds->bs, sector, nr_sectors);
+break;
 }
+sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
+bmds->cur_dirty = sector;
 }
 
-qemu_free(blk.buf);
+return (bmds->cur_dirty >= bmds->total_sectors);
+
+ error:
+monitor_printf(mon, "Error reading sector %" PRId64 "\n", sector);
+qemu_file_set_error(f);
+qemu_free(blk->buf);
+qemu_free(blk);
+return 0;
+}
+
+static int blk_mig_save_dirty_block(Monitor *mon, QEMUFile *f, int is_async)
+{
+BlkMigDevState *bmds;
+int ret = 0;
+
+QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
+if(mig_save_device_dirty(mon, f, bmds, is_async) == 0) {
+ret = 1;
+break;
+}
+}
+
+return ret;
 }
 
 static void flush_blks(QEMUFile* f)
@@ -404,28 +456,39 @@ static int block_save_live(Monitor *mon, QEMUFile *f, int 
stage, void *opaque)
 return 0;
 }
 
-/* control the rate of transfer */
-while ((block_mig_state.submitted +
-block_mig_state.re

[Qemu-devel] Re: [PATCH] New API for asynchronous monitor commands (V2)

2010-01-26 Thread Luiz Capitulino

On Mon, 25 Jan 2010 12:18:44 -0600
Adam Litke  wrote:

> Changes since V1:
>  - Miscellaneous code cleanups (Thanks Luiz)
> 
> Qemu has a number of commands that can operate asynchronously (savevm, 
> migrate,
> etc) and it will be getting more.  For these commands, the user monitor needs
> to be suspended, but QMP monitors could continue to to accept other commands.
> This patch introduces a new command API that isolates the details of handling
> different monitor types from the actual command execution.
> 
> A monitor command can use this API by implementing the mhandler.cmd_async
> handler (or info_async if appropriate).  This function is responsible for
> submitting the command and does not return any data although it may raise
> errors.  When the command completes, the QMPCompletion callback should be
> invoked with its opaque data and the command result.
> 
> The process for submitting and completing an asynchronous command is different
> for QMP and user monitors.  A user monitor must be suspended at submit time 
> and
> resumed at completion time.  The user_print() function must be passed to the
> QMPCompletion callback so the result can be displayed properly.  QMP monitors
> are simpler.  No submit time setup is required.  When the command completes,
> monitor_protocol_emitter() writes the result in JSON format.

 s/QMPCompletion/MonitorCompletion

 Otherwise looks good to me.

Re: [Qemu-devel] Re: [PATCH] win32: use PRId64 instead of %lld

2010-01-26 Thread Markus Armbruster

Luiz Capitulino  writes:

> On Mon, 25 Jan 2010 17:38:38 +0200
> "Michael S. Tsirkin"  wrote:
[...]
>> The issue I see isn't related to unsigned.  Apparently we currently
>> accept values such as 'a' as valid strings. Since this is not valid json
>> we probably should reject it just in case we will want to switch to
>> another json library, otherwise clients might come to depend on
>> non-standard behaviour.
>
>  This extension is only used internally by QEMU

Let me elaborate: when a QEMP client sends us 'a' over the wire, the
parser rejects that as an error.  At least that's what we've been
promised when the extension was discussed.

> and we find it
> very convenient otherwise we would have to escape strings in
> dicts and lists, which is error prone and time consuming.

I doubt it would be error prone, but it would sure be annoying and hard
to read.  The readability argument is what convinced me.

[Qemu-devel] Re: [PATCH v3][uqmaster] kvm: Flush coalesced MMIO buffer periodly

2010-01-26 Thread Marcelo Tosatti

On Tue, Jan 26, 2010 at 07:21:16PM +0800, Sheng Yang wrote:
> The default action of coalesced MMIO is, cache the writing in buffer, until:
> 1. The buffer is full.
> 2. Or the exit to QEmu due to other reasons.
> 
> But this would result in a very late writing in some condition.
> 1. The each time write to MMIO content is small.
> 2. The writing interval is big.
> 3. No need for input or accessing other devices frequently.
> 
> This issue was observed in a experimental embbed system. The test image
> simply print "test" every 1 seconds. The output in QEmu meets expectation,
> but the output in KVM is delayed for seconds.
> 
> Per Avi's suggestion, I hooked flushing coalesced MMIO buffer in VGA update
> handler. By this way, We don't need vcpu explicit exit to QEmu to
> handle this issue.
> 
> Signed-off-by: Sheng Yang 

Applied, thanks.

Re: [Qemu-devel] Re: [PATCH] win32: use PRId64 instead of %lld

2010-01-26 Thread Anthony Liguori


On 01/26/2010 05:43 AM, Luiz Capitulino wrote:

The issue I see isn't related to unsigned.  Apparently we currently
accept values such as 'a' as valid strings. Since this is not valid json
we probably should reject it just in case we will want to switch to
another json library, otherwise clients might come to depend on
non-standard behaviour.
 

  This extension is only used internally by QEMU and we find it
very convenient otherwise we would have to escape strings in
dicts and lists, which is error prone and time consuming.
   


Actually, I was reading the JSON RFC last night and came across:

  "A JSON parser transforms a JSON text into another representation. A
   JSON parser MUST accept all texts that conform to the JSON grammar.
   A JSON parser MAY accept non-JSON forms or extensions."

So we are fully JSON compliant in our current implementation.

Regards,

Anthony Liguori

Re: [Qemu-devel] Re: [PATCH] win32: use PRId64 instead of %lld

2010-01-26 Thread Michael S. Tsirkin

On Tue, Jan 26, 2010 at 06:47:12AM -0600, Anthony Liguori wrote:
> On 01/26/2010 05:43 AM, Luiz Capitulino wrote:
>>> The issue I see isn't related to unsigned.  Apparently we currently
>>> accept values such as 'a' as valid strings. Since this is not valid json
>>> we probably should reject it just in case we will want to switch to
>>> another json library, otherwise clients might come to depend on
>>> non-standard behaviour.
>>>  
>>   This extension is only used internally by QEMU and we find it
>> very convenient otherwise we would have to escape strings in
>> dicts and lists, which is error prone and time consuming.
>>
>
> Actually, I was reading the JSON RFC last night and came across:
>
>   "A JSON parser transforms a JSON text into another representation. A
>JSON parser MUST accept all texts that conform to the JSON grammar.
>A JSON parser MAY accept non-JSON forms or extensions."
>
> So we are fully JSON compliant in our current implementation.
>
> Regards,
>
> Anthony Liguori

Yes, I agree we are comnpliant.
But I also think we should be strict and reject non-JSON
input just so that clients do not come to depend on it.

-- 
MST

Re: [Qemu-devel] [PATCH] Add definitions for current cpu models..

2010-01-26 Thread Anthony Liguori


On 01/26/2010 02:26 AM, Gerd Hoffmann wrote:

On 01/25/10 23:35, Dor Laor wrote:

On 01/25/2010 04:21 PM, Anthony Liguori wrote:

Another way to look at this is that implementing a somewhat arbitrary
policy within QEMU's .c files is something we should try to avoid.
Implementing arbitrary policy in our default config file is a fine 
thing

to do. Default configs are suggested configurations that are modifiable
by a user. Something baked into QEMU is something that ought to work 
for

>

If we get the models right, users and mgmt stacks won't need to define
them. It seems like almost impossible task for us, mgmt stack/users
won't do a better job, the opposite I guess. The configs are great, I
have no argument against them, my case is that if we can pin down some
definitions, its better live in the code, like the above models.
It might even help to get the same cpus across the various vendors,
otherwise we might end up with IBM's core2duo, RH's core2duo, Suse's,..


I agree.  When looking at this thread and config file idea it feels a 
bit like "we have a hard time to agree on some sensible default cpu 
types, so lets make this configurable so we don't have to".  Which is 
a bad thing IMHO.


There's no sensible default.  If a user only has Nehalem-EX class 
processors and Westmeres, why would they want to limit themselves to 
just Nehalem?  For an organization that already uses and understand the 
VMware grouping, is it wrong for them to want to just use VMware-style 
grouping?


This feature is purely data driven.  There is no code involved.  Any 
time a feature is purely data driven and there isn't a clear right and 
wrong solution, a configuration file is a natural solution IMHO.


I think the only real question is whether it belongs in the default 
config or a dedicated configuration file but honestly that's just a 
statement of convention.


Regards,

Anthony Liguori


cheers,
  Gerd

Re: [Qemu-devel] Re: [PATCH] win32: use PRId64 instead of %lld

2010-01-26 Thread Anthony Liguori


On 01/26/2010 06:37 AM, Markus Armbruster wrote:

  This extension is only used internally by QEMU
 

Let me elaborate: when a QEMP client sends us 'a' over the wire, the
parser rejects that as an error.  At least that's what we've been
promised when the extension was discussed.
   


No, that's never been the case.  I don't see the point.  JSON allows 
it.  If a client comes to depend on it, so what?


Regards,

Anthony Liguori

Re: [Qemu-devel] [RFC 00/11]: QMP feature negotiation support

2010-01-26 Thread Jamie Lokier

Luiz Capitulino wrote:
> capability_enable [ "foo", "bar" ]
> 
>  Now, only one command is not terrible difficult, but we would
> have to accept an array of objects, like:
> 
> [ { "name": "foo", "enabled": true }, { "name": "bar", "enabled": true } ]

That looks like XML-itis.

Why not { "foo": true, "bar": true }?

-- Jamie

Re: [Qemu-devel] Re: [PATCH] win32: use PRId64 instead of %lld

2010-01-26 Thread Anthony Liguori


On 01/26/2010 06:46 AM, Michael S. Tsirkin wrote:

Yes, I agree we are comnpliant.
But I also think we should be strict and reject non-JSON
input just so that clients do not come to depend on it.
   


If we can make JSON better while preserving compatibility and adhering 
to the spec, why wouldn't we?


For instance, at some point in time, we're going to do have to do 
something about floating point representation.  We have the ability to 
negotiate these capabilities at run-time.


Regards,

Anthony Liguori

Re: [Qemu-devel] Re: [PATCH] win32: use PRId64 instead of %lld

2010-01-26 Thread Michael S. Tsirkin

On Tue, Jan 26, 2010 at 06:56:10AM -0600, Anthony Liguori wrote:
> On 01/26/2010 06:37 AM, Markus Armbruster wrote:
>>>   This extension is only used internally by QEMU
>>>  
>> Let me elaborate: when a QEMP client sends us 'a' over the wire, the
>> parser rejects that as an error.  At least that's what we've been
>> promised when the extension was discussed.
>>
>
> No, that's never been the case.  I don't see the point.  JSON allows it.  
> If a client comes to depend on it, so what?
>
> Regards,
>
> Anthony Liguori


Then we'll have to support it forever. Asking clients to only depend on
valid JSON will make sure we can use json library in the future, as well
as allow easier debugging etc.

-- 
MST

Re: [Qemu-devel] Re: [PATCH] win32: use PRId64 instead of %lld

2010-01-26 Thread Michael S. Tsirkin

On Tue, Jan 26, 2010 at 06:58:32AM -0600, Anthony Liguori wrote:
> On 01/26/2010 06:46 AM, Michael S. Tsirkin wrote:
>> Yes, I agree we are comnpliant.
>> But I also think we should be strict and reject non-JSON
>> input just so that clients do not come to depend on it.
>>
>
> If we can make JSON better while preserving compatibility and adhering  
> to the spec, why wouldn't we?

Adding '' seems very little gain. The pain point wouild be
supporting multiple syntax variants, and inability to use
external tools to parse such traffic.

> For instance, at some point in time, we're going to do have to do  
> something about floating point representation.

What's the issue? There's '.' and there's 'e' ...
And maybe we won't need floating point ever ...

> We have the ability to  negotiate these capabilities at run-time.
>
> Regards,
>
> Anthony Liguori

If there's an important capability this might make sense.

-- 
MST

Re: [Qemu-devel] Re: [PATCH] win32: use PRId64 instead of %lld

2010-01-26 Thread Anthony Liguori


On 01/26/2010 06:56 AM, Michael S. Tsirkin wrote:

Then we'll have to support it forever. Asking clients to only depend on
valid JSON will make sure we can use json library in the future, as well
as allow easier debugging etc.
   


As I mentioned in IRC, I'm not opposed to making this feature of the 
parser not exposed when dealing with external clients.


But I do believe that we're going to have to extend JSON down the road.  
This particular extension is unimportant so I don't mind limiting it's 
visibility.


Regards,

Anthony Liguori

Re: [Qemu-devel] [PATCH] block: prevent multiwrite_merge from creating too large iovecs

2010-01-26 Thread Anthony Liguori


On 01/26/2010 03:21 AM, Christoph Hellwig wrote:

On Wed, Jan 20, 2010 at 12:37:51PM +0100, Kevin Wolf wrote:
   

To underline that it's a backend/platform dependent thing: Your patch
breaks the mingw build for me.
 

Actually that's because mingw is the usual piece of crap and doesn't
actually have any of the vector support you can expect from a normal
Unix system.

I can either throw in an #ifdef IOV_MAX around the check or fake one up
for mingw.  Does any of the maintainers have a preference for either
variant?
   


grep for CONFIG_IOVEC in qemu-common.h and add a #define IOV_MAX.

mingw doesn't have iovec so we introduce a compat version.

Regards,

Anthony Liguori

Re: [Qemu-devel] Re: [PATCH] win32: use PRId64 instead of %lld

2010-01-26 Thread Daniel P. Berrange

On Tue, Jan 26, 2010 at 06:58:32AM -0600, Anthony Liguori wrote:
> On 01/26/2010 06:46 AM, Michael S. Tsirkin wrote:
> >Yes, I agree we are comnpliant.
> >But I also think we should be strict and reject non-JSON
> >input just so that clients do not come to depend on it.
> >   
> 
> If we can make JSON better while preserving compatibility and adhering 
> to the spec, why wouldn't we?
> 
> For instance, at some point in time, we're going to do have to do 
> something about floating point representation.  We have the ability to 
> negotiate these capabilities at run-time.

Even if we can negotiate extensions at the protocol level, we need to be
careful about how we actually use them. The client is likely going to be
using whatever standard JSON client comes with their language/environment
and will not neccessarily have ability to change that to make use of the
QEMU specific extension. We don't want to end up with QEMU having a nice
JSON extension for some core feature, but none of the clients being able
to use it in practice.

Regards,
Daniel
-- 
|: Red Hat, Engineering, London   -o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org  -o-  http://virt-manager.org  -o-  http://ovirt.org :|
|: http://autobuild.org   -o- http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-  F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

[Qemu-devel] Re: KVM call agenda for Jan 26

2010-01-26 Thread Anthony Liguori


On 01/26/2010 03:09 AM, Alexander Graf wrote:

On 26.01.2010, at 07:49, Chris Wright wrote:

   

Please send in any agenda items you are interested in covering.
 

KVM Hardware Inquiry Tool
   


Avi beat you to it ;-)  See vmxcap in the tree.


One of the things I have on my todo list is a tool you can run on your machine 
that tells you which virtualization features it supports. Imaginary output of 
such a tool:

--

KVM Supported: yes
NPT/EPT: yes
Device Assignment: no

Expected Virtual CPU Speed: 95%
   


I would suggest exercising caution in making such a broad performance 
statement.  It's never going to be that simple.


Regards,

Anthony Liguori

Re: [Qemu-devel] Re: [PATCH] win32: use PRId64 instead of %lld

2010-01-26 Thread Anthony Liguori


On 01/26/2010 07:01 AM, Michael S. Tsirkin wrote:

For instance, at some point in time, we're going to do have to do
something about floating point representation.
 

What's the issue? There's '.' and there's 'e' ...
And maybe we won't need floating point ever ...
   


You cannot represent an IEEE754 floating point value because it lacks 
important representations like NaN and infinity.


Regards,

Anthony Liguori

Re: [Qemu-devel] Re: [PATCH] win32: use PRId64 instead of %lld

2010-01-26 Thread Anthony Liguori


On 01/26/2010 07:08 AM, Daniel P. Berrange wrote:

On Tue, Jan 26, 2010 at 06:58:32AM -0600, Anthony Liguori wrote:
   

On 01/26/2010 06:46 AM, Michael S. Tsirkin wrote:
 

Yes, I agree we are comnpliant.
But I also think we should be strict and reject non-JSON
input just so that clients do not come to depend on it.

   

If we can make JSON better while preserving compatibility and adhering
to the spec, why wouldn't we?

For instance, at some point in time, we're going to do have to do
something about floating point representation.  We have the ability to
negotiate these capabilities at run-time.
 

Even if we can negotiate extensions at the protocol level, we need to be
careful about how we actually use them. The client is likely going to be
using whatever standard JSON client comes with their language/environment
and will not neccessarily have ability to change that to make use of the
QEMU specific extension. We don't want to end up with QEMU having a nice
JSON extension for some core feature, but none of the clients being able
to use it in practice.
   


Agreed.

Regards,

Anthony Liguori

Re: [Qemu-devel] Re: [PATCH] win32: use PRId64 instead of %lld

2010-01-26 Thread Michael S. Tsirkin

On Tue, Jan 26, 2010 at 07:05:01AM -0600, Anthony Liguori wrote:
> On 01/26/2010 06:56 AM, Michael S. Tsirkin wrote:
>> Then we'll have to support it forever. Asking clients to only depend on
>> valid JSON will make sure we can use json library in the future, as well
>> as allow easier debugging etc.
>>
>
> As I mentioned in IRC, I'm not opposed to making this feature of the  
> parser not exposed when dealing with external clients.
>
> But I do believe that we're going to have to extend JSON down the road.   
> This particular extension is unimportant so I don't mind limiting it's  
> visibility.

I agree.

> Regards,
>
> Anthony Liguori

[Qemu-devel] Re: KVM call agenda for Jan 26

2010-01-26 Thread Avi Kivity


On 01/26/2010 03:11 PM, Anthony Liguori wrote:

On 01/26/2010 03:09 AM, Alexander Graf wrote:

On 26.01.2010, at 07:49, Chris Wright wrote:


Please send in any agenda items you are interested in covering.

KVM Hardware Inquiry Tool


Avi beat you to it ;-)  See vmxcap in the tree.


I knew I should have put a disclaimer in there.  Maybe I should make the 
output vary randomly over time?


Anyway we really need a "virtualization stack inquiry tool", since 
capabilities depend on the hardware, kernel, and qemu.


--
error compiling committee.c: too many arguments to function

[Qemu-devel] Re: KVM call agenda for Jan 26

2010-01-26 Thread Alexander Graf

On 26.01.2010, at 14:11, Anthony Liguori wrote:

> On 01/26/2010 03:09 AM, Alexander Graf wrote:
>> On 26.01.2010, at 07:49, Chris Wright wrote:
>> 
>>   
>>> Please send in any agenda items you are interested in covering.
>>> 
>> KVM Hardware Inquiry Tool
>>   
> 
> Avi beat you to it ;-)  See vmxcap in the tree.

Interesting. Though as the name implies it's for VMX. No good for anybody but 
Intel users. I was more thinking of something generic that would also work just 
fine on PPC and S390.

> 
>> One of the things I have on my todo list is a tool you can run on your 
>> machine that tells you which virtualization features it supports. Imaginary 
>> output of such a tool:
>> 
>> --
>> 
>> KVM Supported: yes
>> NPT/EPT: yes
>> Device Assignment: no
>> 
>> Expected Virtual CPU Speed: 95%
>>   
> 
> I would suggest exercising caution in making such a broad performance 
> statement.  It's never going to be that simple.

Well, I think we should tell users something. We are telling them "According to 
performance measurements, when using NPT with a non-IO heavy workload gives you 
> 90% native performance in the VM" today already. At least that's what I 
remembered ;-).

The message should be something really simple so users know what to expect from 
KVM before they actually use it. With all the device assignment questions 
arising that somehow seems to underline my statement.

I'd also like to see some simple help analysis built into this tool. Something 
like "VMX is disabled in the BIOS", "Machine is Device Passthrough capable, but 
it's disabled in the BIOS", "Please pass parameter XXX to the kernel command 
line to activate feature Y".

The main question is where does it belong?

a) built into qemu
b) built as separate tool, but shipped with qemu
c) completely separate

I'm personally leaning towards a. That way we can reuse the detection code and 
give help when an option is used that doesn't work.

Alex

[Qemu-devel] icount and unaligned IO accesses

2010-01-26 Thread Edgar E. Iglesias

Hi,

While emulating a small MMU-less CRIS system I ran into an -icount
related problem. Without icount the emulation runs fine, with icount
I get stuff like:

qemu: fatal: cpu_io_recompile: could not find TB for pc=0x4be7fd

IIUC, there is a recursion bug in the slow_ldx() calls that ends
up clobbering retaddr. Later if the TB is aborted on the IO
access, the code to map retaddr into guest PC fails to even find
a TB because retaddr wrongly points to slow_ldx().

It seems to me like we simply shouldn't be touching retaddr
in slow_ldx().

The following patch fixes the problem for me. slow_st() was
AFAICS already OK.

Comments?

Cheers

commit a4a31d3039e82b7550933e3d8e1f4c6e9a7f8529
Author: Edgar E. Iglesias 
Date:   Tue Jan 26 13:55:55 2010 +0100

softmmu: Dont clobber retaddr in slow_ldx().

When splitting up unaligned IO accesses, ld calls slow_ld which was
clobbering retaddr.

AFAIK the problem only shows up when running emulations with -icount
that may abort TB execution on IO accesses.

Signed-off-by: Edgar E. Iglesias 

diff --git a/softmmu_template.h b/softmmu_template.h
index abf18d2..9185c32 100644
--- a/softmmu_template.h
+++ b/softmmu_template.h
@@ -161,7 +161,6 @@ static DATA_TYPE glue(glue(slow_ld, SUFFIX), 
MMUSUFFIX)(target_ulong addr,
 /* IO access */
 if ((addr & (DATA_SIZE - 1)) != 0)
 goto do_unaligned_access;
-retaddr = GETPC();
 addend = env->iotlb[mmu_idx][index];
 res = glue(io_read, SUFFIX)(addend, addr, retaddr);
 } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= 
TARGET_PAGE_SIZE) {

[Qemu-devel] Re: KVM call agenda for Jan 26

2010-01-26 Thread Avi Kivity


On 01/26/2010 03:18 PM, Alexander Graf wrote:


The main question is where does it belong?

a) built into qemu
b) built as separate tool, but shipped with qemu
c) completely separate

I'm personally leaning towards a. That way we can reuse the detection code and 
give help when an option is used that doesn't work.

   


Me too, especially as the whole stack is involved, and qemu is the 
topmost part from our perspective (no doubt libvirt will want to 
integrate that functionality as well).


--
error compiling committee.c: too many arguments to function

[Qemu-devel] Re: KVM call agenda for Jan 26

2010-01-26 Thread Daniel P. Berrange

On Tue, Jan 26, 2010 at 03:24:50PM +0200, Avi Kivity wrote:
> On 01/26/2010 03:18 PM, Alexander Graf wrote:
> >
> >The main question is where does it belong?
> >
> >a) built into qemu
> >b) built as separate tool, but shipped with qemu
> >c) completely separate
> >
> >I'm personally leaning towards a. That way we can reuse the detection code 
> >and give help when an option is used that doesn't work.
> >
> >   
> 
> Me too, especially as the whole stack is involved, and qemu is the 
> topmost part from our perspective (no doubt libvirt will want to 
> integrate that functionality as well).

FYI, libvirt already exposes this kind of functionality. The API call
virConnectGetCapabilities() / command line "virsh capabilities" command
tells you about what the virtualization host is able to support. It can
tell you what architectures are supported, by which binaries. What
machine types are available. Whether KVM or KQEMU acceleration are
present. What CPU model / flags are on the host. What NUMA topology is
available. etc etc 

The data format it outputs though is not exactly targetted for direct
end user consumption though, rather its a XML doc aimed at applications
The virt-manager app tries to use this to inform the user of problems
such as ability todo hardware virt, but it not being enabled.

Regards,
Daniel
-- 
|: Red Hat, Engineering, London   -o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org  -o-  http://virt-manager.org  -o-  http://ovirt.org :|
|: http://autobuild.org   -o- http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-  F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

[Qemu-devel] Re: KVM call agenda for Jan 26

2010-01-26 Thread Alexander Graf


On 26.01.2010, at 14:33, Daniel P. Berrange wrote:

> On Tue, Jan 26, 2010 at 03:24:50PM +0200, Avi Kivity wrote:
>> On 01/26/2010 03:18 PM, Alexander Graf wrote:
>>> 
>>> The main question is where does it belong?
>>> 
>>> a) built into qemu
>>> b) built as separate tool, but shipped with qemu
>>> c) completely separate
>>> 
>>> I'm personally leaning towards a. That way we can reuse the detection code 
>>> and give help when an option is used that doesn't work.
>>> 
>>> 
>> 
>> Me too, especially as the whole stack is involved, and qemu is the 
>> topmost part from our perspective (no doubt libvirt will want to 
>> integrate that functionality as well).
> 
> FYI, libvirt already exposes this kind of functionality. The API call
> virConnectGetCapabilities() / command line "virsh capabilities" command
> tells you about what the virtualization host is able to support. It can
> tell you what architectures are supported, by which binaries. What
> machine types are available. Whether KVM or KQEMU acceleration are
> present. What CPU model / flags are on the host. What NUMA topology is
> available. etc etc 
> 
> The data format it outputs though is not exactly targetted for direct
> end user consumption though, rather its a XML doc aimed at applications
> The virt-manager app tries to use this to inform the user of problems
> such as ability todo hardware virt, but it not being enabled.

Hrm, while I sympathize with the goals of libvirt and all the efforts in it, 
I'd like to see the stock qemu exectable stay as user friendly as possible. One 
of qemu's strong points always used to be its really simple CLI.
So IMHO it rather belongs there with libvirt querying qemu than the other way 
around.

Nevertheless, I suppose the code would be a pretty good starting point!

Alex

Re: [Qemu-devel] [PATCH] block: prevent multiwrite_merge from creating too large iovecs

2010-01-26 Thread Christoph Hellwig

On Tue, Jan 26, 2010 at 07:08:20AM -0600, Anthony Liguori wrote:
> >I can either throw in an #ifdef IOV_MAX around the check or fake one up
> >for mingw.  Does any of the maintainers have a preference for either
> >variant?
> >   
> 
> grep for CONFIG_IOVEC in qemu-common.h and add a #define IOV_MAX.
> 
> mingw doesn't have iovec so we introduce a compat version.

Yes, that's what I meant with the second alternative above.

[Qemu-devel] Re: KVM call agenda for Jan 26

2010-01-26 Thread Avi Kivity


On 01/26/2010 03:33 PM, Daniel P. Berrange wrote:



Me too, especially as the whole stack is involved, and qemu is the
topmost part from our perspective (no doubt libvirt will want to
integrate that functionality as well).
 

FYI, libvirt already exposes this kind of functionality. The API call
virConnectGetCapabilities() / command line "virsh capabilities" command
tells you about what the virtualization host is able to support. It can
tell you what architectures are supported, by which binaries. What
machine types are available. Whether KVM or KQEMU acceleration are
present. What CPU model / flags are on the host. What NUMA topology is
available. etc etc

   


Great.  Note that for a cpu flag to be usable in a guest, it needs to be 
supported by both kvm.ko and qemu, so reporting /proc/cpuinfo is 
insufficient.  There are also synthetic cpu flags (kvm paravirt 
features, x2apic) that aren't present in /proc/cpuinfo.



--
error compiling committee.c: too many arguments to function

Re: [Qemu-devel] [RFC 00/11]: QMP feature negotiation support

2010-01-26 Thread Luiz Capitulino

On Tue, 26 Jan 2010 12:57:54 +
Jamie Lokier  wrote:

> Luiz Capitulino wrote:
> > capability_enable [ "foo", "bar" ]
> > 
> >  Now, only one command is not terrible difficult, but we would
> > have to accept an array of objects, like:
> > 
> > [ { "name": "foo", "enabled": true }, { "name": "bar", "enabled": true } ]
> 
> That looks like XML-itis.

 This is valid json, we already output data in this format and will
likely accept it in other commands.

> Why not { "foo": true, "bar": true }?

 Possible, but if we use a dict then I would prefer the previous format,
because it can be extended in a compatible way (while a single list and
yours don't).

[Qemu-devel] [PATCH v2] block: avoid creating too large iovecs in multiwrite_merge

2010-01-26 Thread Christoph Hellwig

If we go over the maximum number of iovecs support by syscall we get
back EINVAL from the kernel which translate to I/O errors for the guest.

Add a MAX_IOV defintion for platforms that don't have it.  For now we use
the same 1024 define that's used on Linux and various other platforms,
but until the windows block backend implements some kind of vectored I/O
it doesn't matter.

Signed-off-by: Christoph Hellwig 

Index: qemu/block.c
===
--- qemu.orig/block.c   2010-01-26 10:59:39.757004445 +0100
+++ qemu/block.c2010-01-26 11:01:38.056023231 +0100
@@ -1689,6 +1689,10 @@ static int multiwrite_merge(BlockDriverS
 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
 }
 
+if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
+merge = 0;
+}
+
 if (merge) {
 size_t size;
 QEMUIOVector *qiov = qemu_mallocz(sizeof(*qiov));
Index: qemu/qemu-common.h
===
--- qemu.orig/qemu-common.h 2010-01-26 14:41:40.894254285 +0100
+++ qemu/qemu-common.h  2010-01-26 14:42:27.267275698 +0100
@@ -54,6 +54,10 @@ struct iovec {
 void *iov_base;
 size_t iov_len;
 };
+/*
+ * Use the same value as Linux for now.
+ */
+#define IOV_MAX1024
 #else
 #include 
 #endif

Re: [Qemu-devel] Re: [PATCH] win32: use PRId64 instead of %lld

2010-01-26 Thread Avi Kivity


On 01/26/2010 02:47 PM, Anthony Liguori wrote:

On 01/26/2010 05:43 AM, Luiz Capitulino wrote:

The issue I see isn't related to unsigned.  Apparently we currently
accept values such as 'a' as valid strings. Since this is not valid 
json

we probably should reject it just in case we will want to switch to
another json library, otherwise clients might come to depend on
non-standard behaviour.

  This extension is only used internally by QEMU and we find it
very convenient otherwise we would have to escape strings in
dicts and lists, which is error prone and time consuming.


Actually, I was reading the JSON RFC last night and came across:

  "A JSON parser transforms a JSON text into another representation. A
   JSON parser MUST accept all texts that conform to the JSON grammar.
   A JSON parser MAY accept non-JSON forms or extensions."

So we are fully JSON compliant in our current implementation.



The risk is that if we support a private extension (like '') and then 
json is officially extended to support a conflicting or similar syntax 
with a different meaning, then we cannot advance to the next revision of 
json without breaking compatibility.


In the case of '', the odds of a such a clash are very low, but 
nevertheless I think we should refrain from doing so.  Being strict is good.


--
error compiling committee.c: too many arguments to function

Re: [Qemu-devel] Re: [PATCH] win32: use PRId64 instead of %lld

2010-01-26 Thread Anthony Liguori


On 01/26/2010 07:55 AM, Avi Kivity wrote:
The risk is that if we support a private extension (like '') and then 
json is officially extended to support a conflicting or similar syntax 
with a different meaning, then we cannot advance to the next revision 
of json without breaking compatibility.


The paragraph I quoted from the RFC seems to suggest that the authors of 
JSON boxed themselves in with respect to extending JSON.  The reason 
being that a conforming implementation is given free reign to extend 
with "non-JSON forms or extensions".  That would seem to prevent any 
extension.


Keep in mind, JSON is a proper subset of ECMAScript which means the 
likelihood of extension going outside of ECMAScript would be extremely 
unlikely.  I don't expect JSON is ever going to change.


Regards,

Anthony Liguori

Re: [Qemu-devel] Re: [PATCH] win32: use PRId64 instead of %lld

2010-01-26 Thread Avi Kivity


On 01/26/2010 04:05 PM, Anthony Liguori wrote:

On 01/26/2010 07:55 AM, Avi Kivity wrote:
The risk is that if we support a private extension (like '') and then 
json is officially extended to support a conflicting or similar 
syntax with a different meaning, then we cannot advance to the next 
revision of json without breaking compatibility.


The paragraph I quoted from the RFC seems to suggest that the authors 
of JSON boxed themselves in with respect to extending JSON.  The 
reason being that a conforming implementation is given free reign to 
extend with "non-JSON forms or extensions".  That would seem to 
prevent any extension.


A json generator is required to generate conforming text.  So there are 
three choices:


- reject 's
- unofficially accept 's, nonconforming generators break if json 
changes, nor our problem

- officially accept 's, look stupid when json changes



Keep in mind, JSON is a proper subset of ECMAScript which means the 
likelihood of extension going outside of ECMAScript would be extremely 
unlikely.  I don't expect JSON is ever going to change.


Who knows?  Let's not take unnecessary risks.

--
error compiling committee.c: too many arguments to function

[Qemu-devel] Re: KVM call agenda for Jan 26

2010-01-26 Thread Anthony Liguori


On 01/26/2010 07:24 AM, Avi Kivity wrote:

On 01/26/2010 03:18 PM, Alexander Graf wrote:


The main question is where does it belong?

a) built into qemu
b) built as separate tool, but shipped with qemu
c) completely separate

I'm personally leaning towards a. That way we can reuse the detection 
code and give help when an option is used that doesn't work.




Me too, especially as the whole stack is involved, and qemu is the 
topmost part from our perspective (no doubt libvirt will want to 
integrate that functionality as well).


I'm not sure I agree.  It would use no code from qemu and really benefit 
in no way from being part of qemu.  I don't feel that strongly about it 
though.


Regards,

Anthony Liguori

[Qemu-devel] Re: KVM call agenda for Jan 26

2010-01-26 Thread Avi Kivity


On 01/26/2010 04:13 PM, Anthony Liguori wrote:
Me too, especially as the whole stack is involved, and qemu is the 
topmost part from our perspective (no doubt libvirt will want to 
integrate that functionality as well).



I'm not sure I agree.  It would use no code from qemu and really 
benefit in no way from being part of qemu.  I don't feel that strongly 
about it though.




It would need to know which cpuid bits qemu supports.  Only qemu knows that.

--
error compiling committee.c: too many arguments to function

[Qemu-devel] [PATCH] configure: verify stdio.h

2010-01-26 Thread Michael S. Tsirkin

Verify that stdio.h supports %ll and %z
Some migw variants don't unless requested explicitly (see
http://www.mail-archive.com/mingw-w64-pub...@lists.sourceforge.net/msg00417.html)
), detect them early.

Signed-off-by: Michael S. Tsirkin 
---
 configure |   25 +
 1 files changed, 25 insertions(+), 0 deletions(-)

diff --git a/configure b/configure
index 5631bbb..7bbe4bc 100755
--- a/configure
+++ b/configure
@@ -123,6 +123,31 @@ else
 exit 1
 fi
 
+# Check that stdio.h compiler is sane: some
+# mingw variants do not support %z %l that we rely on
+cat > $TMPC <
+#include 
+size_t z = 1;
+long long ll = 2;
+int main(void) {
+  printf("z=%zd;ll=%lld;\n", z, ll);
+  return 0;
+}
+EOF
+
+if compile_prog && ($TMPE | grep "z=1;ll=2;" > /dev/null); then
+  : C compiler works ok
+else
+echo "ERROR: \"$cc\" does not have a working stdio.h"
+exit 1
+fi
+
+cat > $TMPC << EOF
+#include 
+int main(void) { return fdatasync(0); }
+EOF
+
 check_define() {
 cat > $TMPC <

[Qemu-devel] Re: KVM call agenda for Jan 26

2010-01-26 Thread Anthony Liguori


On 01/26/2010 08:15 AM, Avi Kivity wrote:

On 01/26/2010 04:13 PM, Anthony Liguori wrote:
Me too, especially as the whole stack is involved, and qemu is the 
topmost part from our perspective (no doubt libvirt will want to 
integrate that functionality as well).



I'm not sure I agree.  It would use no code from qemu and really 
benefit in no way from being part of qemu.  I don't feel that 
strongly about it though.




It would need to know which cpuid bits qemu supports.  Only qemu knows 
that.


I'm not sure I understand why.  Can you elaborate?

Regards,

Anthony Liguori

[Qemu-devel] Re: KVM call agenda for Jan 26

2010-01-26 Thread Avi Kivity


On 01/26/2010 04:22 PM, Anthony Liguori wrote:

On 01/26/2010 08:15 AM, Avi Kivity wrote:

On 01/26/2010 04:13 PM, Anthony Liguori wrote:
Me too, especially as the whole stack is involved, and qemu is the 
topmost part from our perspective (no doubt libvirt will want to 
integrate that functionality as well).



I'm not sure I agree.  It would use no code from qemu and really 
benefit in no way from being part of qemu.  I don't feel that 
strongly about it though.




It would need to know which cpuid bits qemu supports.  Only qemu 
knows that.


I'm not sure I understand why.  Can you elaborate?



If qemu doesn't recognize -cpu qemu64,+nx, then no amount of hardware 
and kvm.ko support will allow the user to enable nx in a guest.


--
error compiling committee.c: too many arguments to function

Re: [Qemu-devel] [RFC 00/11]: QMP feature negotiation support

2010-01-26 Thread Daniel P. Berrange

On Tue, Jan 26, 2010 at 12:57:54PM +, Jamie Lokier wrote:
> Luiz Capitulino wrote:
> > capability_enable [ "foo", "bar" ]
> > 
> >  Now, only one command is not terrible difficult, but we would
> > have to accept an array of objects, like:
> > 
> > [ { "name": "foo", "enabled": true }, { "name": "bar", "enabled": true } ]
> 
> That looks like XML-itis.
> 
> Why not { "foo": true, "bar": true }?

It depends on whether we think we're going to need to add more metadata
beyond just the enabled/disabled status. If we did want to add a further
item against foo & bar, then having the array of hashes makes that 
extension easier becaue you add easily add more key/value pairs to
each.


Regards,
Daniel
-- 
|: Red Hat, Engineering, London   -o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org  -o-  http://virt-manager.org  -o-  http://ovirt.org :|
|: http://autobuild.org   -o- http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-  F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

[Qemu-devel] Re: KVM call agenda for Jan 26

2010-01-26 Thread Anthony Liguori


On 01/26/2010 08:26 AM, Avi Kivity wrote:

On 01/26/2010 04:22 PM, Anthony Liguori wrote:

On 01/26/2010 08:15 AM, Avi Kivity wrote:

On 01/26/2010 04:13 PM, Anthony Liguori wrote:
Me too, especially as the whole stack is involved, and qemu is the 
topmost part from our perspective (no doubt libvirt will want to 
integrate that functionality as well).



I'm not sure I agree.  It would use no code from qemu and really 
benefit in no way from being part of qemu.  I don't feel that 
strongly about it though.




It would need to know which cpuid bits qemu supports.  Only qemu 
knows that.


I'm not sure I understand why.  Can you elaborate?



If qemu doesn't recognize -cpu qemu64,+nx, then no amount of hardware 
and kvm.ko support will allow the user to enable nx in a guest.


Does -cpu host filter out flags that we don't know about?  I'm pretty 
sure it doesn't.  Since we're planning on moving to -cpu host by default 
for KVM, does it really matter?


Oh, I was under the impression that the tool was meant to be software 
agnostic.  IOW, here are all the virt features your hardware supports.


Regards,

Anthony Liguori

[Qemu-devel] Re: KVM call agenda for Jan 26

2010-01-26 Thread Avi Kivity


On 01/26/2010 04:32 PM, Anthony Liguori wrote:
It would need to know which cpuid bits qemu supports.  Only qemu 
knows that.


I'm not sure I understand why.  Can you elaborate?



If qemu doesn't recognize -cpu qemu64,+nx, then no amount of hardware 
and kvm.ko support will allow the user to enable nx in a guest.



Does -cpu host filter out flags that we don't know about?  I'm pretty 
sure it doesn't.  Since we're planning on moving to -cpu host by 
default for KVM, does it really matter?


People who use discovery tools are probably setting up a migration 
cluster.  They aren't going to use -cpu host.




Oh, I was under the impression that the tool was meant to be software 
agnostic.  IOW, here are all the virt features your hardware supports.


That's /proc/cpuinfo, we should just extend it, maybe that's what Alex 
meant, but I'd like to see something more capable.


--
error compiling committee.c: too many arguments to function

Re: [Qemu-devel] Re: [PATCH] win32: use PRId64 instead of %lld

2010-01-26 Thread Anthony Liguori


On 01/26/2010 08:12 AM, Avi Kivity wrote:

On 01/26/2010 04:05 PM, Anthony Liguori wrote:

On 01/26/2010 07:55 AM, Avi Kivity wrote:
The risk is that if we support a private extension (like '') and 
then json is officially extended to support a conflicting or similar 
syntax with a different meaning, then we cannot advance to the next 
revision of json without breaking compatibility.


The paragraph I quoted from the RFC seems to suggest that the authors 
of JSON boxed themselves in with respect to extending JSON.  The 
reason being that a conforming implementation is given free reign to 
extend with "non-JSON forms or extensions".  That would seem to 
prevent any extension.


A json generator is required to generate conforming text.  So there 
are three choices:


- reject 's
- unofficially accept 's, nonconforming generators break if json 
changes, nor our problem

- officially accept 's, look stupid when json changes



Keep in mind, JSON is a proper subset of ECMAScript which means the 
likelihood of extension going outside of ECMAScript would be 
extremely unlikely.  I don't expect JSON is ever going to change.


Who knows?  Let's not take unnecessary risks.


Keep in mind, I've already agreed to not allow '' strings for external 
JSON.  The only reason the thread's still alive is because we like to 
argue apparently :-)  Single quoted strings are not sufficiently useful 
to warrant taking any risks here.


Regards,

Anthony Liguori

[Qemu-devel] Re: KVM call agenda for Jan 26

2010-01-26 Thread Alexander Graf

On 26.01.2010, at 15:37, Avi Kivity wrote:

> On 01/26/2010 04:32 PM, Anthony Liguori wrote:
> It would need to know which cpuid bits qemu supports.  Only qemu knows 
> that.

 I'm not sure I understand why.  Can you elaborate?

>>> 
>>> If qemu doesn't recognize -cpu qemu64,+nx, then no amount of hardware and 
>>> kvm.ko support will allow the user to enable nx in a guest.
>> 
>> 
>> Does -cpu host filter out flags that we don't know about?  I'm pretty sure 
>> it doesn't.  Since we're planning on moving to -cpu host by default for KVM, 
>> does it really matter?
> 
> People who use discovery tools are probably setting up a migration cluster.  
> They aren't going to use -cpu host.
> 
>> 
>> Oh, I was under the impression that the tool was meant to be software 
>> agnostic.  IOW, here are all the virt features your hardware supports.
> 
> That's /proc/cpuinfo, we should just extend it, maybe that's what Alex meant, 
> but I'd like to see something more capable.

I think we're all looking at different use-cases.

First and frontmost the one type of user I'm concerned with in this case is a 
mortal end-user who doesn't know that much about virtualization details and 
doesn't care what NPT is. He just wants to have a VM running and wants to know 
how well it'll work.

For such a user an addition to /proc/cpuinfo would be enough, if it'd include 
IOMMU information. Or maybe /proc/iommu?

I think users should be able to run some simple command to evaluate if what 
they're trying to do works out. And if not, the command should give assistance 
on how to make things work (buy a new mainboard, set this kernel option, ...)

Of course one could fit in stuff for management tools too, but that's not my 
main goal for this feature.

Alex

[Qemu-devel] Re: KVM call agenda for Jan 26

2010-01-26 Thread Anthony Liguori


On 01/26/2010 08:37 AM, Avi Kivity wrote:
People who use discovery tools are probably setting up a migration 
cluster.  They aren't going to use -cpu host.


BTW, it might be neat to introduce a qemu command line that runs a 
monitor command and exits without creating a VM.  We could then 
introduce a info cpucap command that dumped all of the supported CPU 
features.


Someone setting up a migration cluster would then run qemu -monitor 
command="info cpucap", collect the results, compute an intersection, and 
then use that to generate a -cpu flag.  In fact, providing a tool that 
parsed a bunch of those outputs and generated a -cpu flag would be a 
pretty nice addition.




Oh, I was under the impression that the tool was meant to be software 
agnostic.  IOW, here are all the virt features your hardware supports.


That's /proc/cpuinfo, we should just extend it, maybe that's what Alex 
meant, but I'd like to see something more capable.


I definitely think extending /proc/cpuinfo or introducing a 
/proc/virtinfo would be a good idea regardless of any tool we introduce.


Regards,

Anthony Liguori

[Qemu-devel] CONFIG_FB_CIRRUS for Linux guests

2010-01-26 Thread Jan Kiszka

Hi,

just received some Linux guest kernel that refuses to properly use its
framebuffer console under QEMU (probably any version, tried down to some
0.10.x). It turned out that it has CONFIG_FB_CIRRUS enabled and fails to
initialize the display when this is actually used. Anyone came across
this before or has some idea what goes wrong?

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux

[Qemu-devel] Re: KVM call agenda for Jan 26

2010-01-26 Thread Avi Kivity


On 01/26/2010 04:42 PM, Anthony Liguori wrote:

On 01/26/2010 08:37 AM, Avi Kivity wrote:
People who use discovery tools are probably setting up a migration 
cluster.  They aren't going to use -cpu host.


BTW, it might be neat to introduce a qemu command line that runs a 
monitor command and exits without creating a VM.  We could then 
introduce a info cpucap command that dumped all of the supported CPU 
features.


Someone setting up a migration cluster would then run qemu -monitor 
command="info cpucap", collect the results, compute an intersection, 
and then use that to generate a -cpu flag.  In fact, providing a tool 
that parsed a bunch of those outputs and generated a -cpu flag would 
be a pretty nice addition.


Definitely.  And query about supported machine models virtio NIC 
features, etc.




--
error compiling committee.c: too many arguments to function

[Qemu-devel] Re: KVM call agenda for Jan 26

2010-01-26 Thread Avi Kivity


On 01/26/2010 04:42 PM, Alexander Graf wrote:



That's /proc/cpuinfo, we should just extend it, maybe that's what Alex meant, 
but I'd like to see something more capable.
 

I think we're all looking at different use-cases.

First and frontmost the one type of user I'm concerned with in this case is a 
mortal end-user who doesn't know that much about virtualization details and 
doesn't care what NPT is. He just wants to have a VM running and wants to know 
how well it'll work.
   


It really depends on what he does with it.  3D gaming? might have a 
different experience from the always exciting kernel builds.


--
error compiling committee.c: too many arguments to function

[Qemu-devel] Re: KVM call agenda for Jan 26

2010-01-26 Thread Alexander Graf

On 26.01.2010, at 15:47, Avi Kivity wrote:

> On 01/26/2010 04:42 PM, Alexander Graf wrote:
>> 
>>> That's /proc/cpuinfo, we should just extend it, maybe that's what Alex 
>>> meant, but I'd like to see something more capable.
>>> 
>> I think we're all looking at different use-cases.
>> 
>> First and frontmost the one type of user I'm concerned with in this case is 
>> a mortal end-user who doesn't know that much about virtualization details 
>> and doesn't care what NPT is. He just wants to have a VM running and wants 
>> to know how well it'll work.
>>   
> 
> It really depends on what he does with it.  3D gaming? might have a different 
> experience from the always exciting kernel builds.

Well, we can give an estimation (based on previous measurements) for certain 
subsystems. Like I proposed in the original mail, we can actually give users 
information about virtual CPU speed.

With SPICE hopefully merged one day we also could give some estimates on 3D 
performance.

Alex

[Qemu-devel] Re: KVM call agenda for Jan 26

2010-01-26 Thread Anthony Liguori


On 01/26/2010 08:50 AM, Alexander Graf wrote:

On 26.01.2010, at 15:47, Avi Kivity wrote:

   

On 01/26/2010 04:42 PM, Alexander Graf wrote:
 
   

That's /proc/cpuinfo, we should just extend it, maybe that's what Alex meant, 
but I'd like to see something more capable.

 

I think we're all looking at different use-cases.

First and frontmost the one type of user I'm concerned with in this case is a 
mortal end-user who doesn't know that much about virtualization details and 
doesn't care what NPT is. He just wants to have a VM running and wants to know 
how well it'll work.

   

It really depends on what he does with it.  3D gaming? might have a different 
experience from the always exciting kernel builds.
 

Well, we can give an estimation (based on previous measurements) for certain 
subsystems. Like I proposed in the original mail, we can actually give users 
information about virtual CPU speed.
   


The problem with making an unqualified statement about something like 
"virtual CPU speed" is that if a user runs a random benchmark, and gets 
less than XX%, they'll consider it a bug and be unhappy.


I'm very reluctant to take anything in QEMU that makes promises about 
virtualization performance.  It's a bad idea IMHO.



With SPICE hopefully merged one day we also could give some estimates on 3D 
performance.
   


Spice doesn't support 3D today.

Regards,

Anthony Liguori


Alex

Re: [Qemu-devel] [RFC 00/11]: QMP feature negotiation support

2010-01-26 Thread Jamie Lokier

Daniel P. Berrange wrote:
> On Tue, Jan 26, 2010 at 12:57:54PM +, Jamie Lokier wrote:
> > Luiz Capitulino wrote:
> > > capability_enable [ "foo", "bar" ]
> > > 
> > >  Now, only one command is not terrible difficult, but we would
> > > have to accept an array of objects, like:
> > > 
> > > [ { "name": "foo", "enabled": true }, { "name": "bar", "enabled": true } ]
> > 
> > That looks like XML-itis.
> > 
> > Why not { "foo": true, "bar": true }?
> 
> It depends on whether we think we're going to need to add more metadata
> beyond just the enabled/disabled status. If we did want to add a further
> item against foo & bar, then having the array of hashes makes that 
> extension easier becaue you add easily add more key/value pairs to
> each.

Sure, extensibility is good, and I personally don't care which
format/function are used.  Just wanted to question the padded
structure, because sometimes that style is done unintentially.

Look at the argument leading up here - Luiz says let's use separate,
non-extensible enable/disable commands taking a list, because if it
were a single command it'd be important to make it extensible.  Does
that make sense?  I don't understand that reasoning.

On that topic: In the regular monitor, commands are often extensible
because they take command-line-style options, and you can always add
more options.  What about QMP - are QMP commands all future-extensible
with options in a similar way?

-- Jamie

(ps. XML-itis: a tendancy to write
tagnamevalue,
when  would do).

[Qemu-devel] [PATCH 4/5] linux-user: Add access to TLS registers

2010-01-26 Thread Riku Voipio

From: Riku Voipio 

If you compile applications with gcc -mtp=cp15, __thread
access's will generate an abort. Implement accessing allowed
cp15.c13 registers on ARMv6K+ in linux-user.

Signed-off-by: Riku Voipio 
---
 target-arm/helper.c |   27 ++-
 1 files changed, 26 insertions(+), 1 deletions(-)

diff --git a/target-arm/helper.c b/target-arm/helper.c
index b3aec99..68578ce 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -505,13 +505,38 @@ uint32_t HELPER(get_cp)(CPUState *env, uint32_t insn)
 
 void HELPER(set_cp15)(CPUState *env, uint32_t insn, uint32_t val)
 {
+int op2;
+
+op2 = (insn >> 5) & 7;
+/* Allow write access to CP15 User RW Thread ID Register */
+if (arm_feature (env, ARM_FEATURE_V6K) && ((insn >> 16) & 0xf) == 13) {
+switch (op2) {
+case 2:
+env->cp15.c13_tls1 = val;
+return;
+}
+}
 cpu_abort(env, "cp15 insn %08x\n", insn);
 }
 
 uint32_t HELPER(get_cp15)(CPUState *env, uint32_t insn)
 {
+int op2;
+/* Allow read access to CP15 User RW and RO Thread ID Registers */
+
+op2 = (insn >> 5) & 7;
+if (arm_feature (env, ARM_FEATURE_V6K) && ((insn >> 16) & 0xf) == 13) {
+switch (op2) {
+case 2:
+return env->cp15.c13_tls1;
+case 3:
+return env->cp15.c13_tls2;
+default:
+goto bad_reg;
+}
+}
+bad_reg:
 cpu_abort(env, "cp15 insn %08x\n", insn);
-return 0;
 }
 
 /* These should probably raise undefined insn exceptions.  */
-- 
1.6.5

[Qemu-devel] [PATCH 0/5] linux-user for-usptream patches

2010-01-26 Thread Riku Voipio

From: Riku Voipio 

Some fixes to avoid hanging, make arm uname match selected
cpu, and fixes to cp15.c13 registers for linux-user tls register
access.

Loïc Minier (1):
  linux-user: adapt uname machine to emulated CPU

Riku Voipio (4):
  fix locking error with current_tb
  linux-user: remove signal handler before calling abort()
  linux-user: Add access to TLS registers
  target-arm: Thread ID Registers are ARMv6K +

 Makefile.target|2 +-
 exec.c |   13 +++-
 linux-user/cpu-uname.c |   72 
 linux-user/cpu-uname.h |1 +
 linux-user/syscall.c   |3 +-
 target-arm/helper.c|   39 +-
 6 files changed, 125 insertions(+), 5 deletions(-)
 create mode 100644 linux-user/cpu-uname.c
 create mode 100644 linux-user/cpu-uname.

[Qemu-devel] [PATCH 2/5] fix locking error with current_tb

2010-01-26 Thread Riku Voipio

From: Riku Voipio 

Signed-off-by: Riku Voipio 
---
 exec.c |4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/exec.c b/exec.c
index 1190591..71f655f 100644
--- a/exec.c
+++ b/exec.c
@@ -1537,15 +1537,15 @@ static void cpu_unlink_tb(CPUState *env)
 TranslationBlock *tb;
 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
 
+spin_lock(&interrupt_lock);
 tb = env->current_tb;
 /* if the cpu is currently executing code, we must unlink it and
all the potentially executing TB */
 if (tb) {
-spin_lock(&interrupt_lock);
 env->current_tb = NULL;
 tb_reset_jump_recursive(tb);
-spin_unlock(&interrupt_lock);
 }
+spin_unlock(&interrupt_lock);
 }
 
 /* mask must never be zero, except for A20 change call */
-- 
1.6.5

[Qemu-devel] [PATCH 1/5] linux-user: adapt uname machine to emulated CPU

2010-01-26 Thread Riku Voipio

From: Loïc Minier 

This patch for linux-user adapts the output of the emulated uname()
syscall to match the configured CPU.  Tested with x86, x86-64 and arm
emulation.

Signed-off-by: Loïc Minier 
Signed-off-by: Riku Voipio 
---
 Makefile.target|2 +-
 linux-user/cpu-uname.c |   72 
 linux-user/cpu-uname.h |1 +
 linux-user/syscall.c   |3 +-
 4 files changed, 76 insertions(+), 2 deletions(-)
 create mode 100644 linux-user/cpu-uname.c
 create mode 100644 linux-user/cpu-uname.h

diff --git a/Makefile.target b/Makefile.target
index 5c0ef1f..9dfc4c2 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -95,7 +95,7 @@ $(call set-vpath, 
$(SRC_PATH)/linux-user:$(SRC_PATH)/linux-user/$(TARGET_ABI_DIR
 
 QEMU_CFLAGS+=-I$(SRC_PATH)/linux-user 
-I$(SRC_PATH)/linux-user/$(TARGET_ABI_DIR)
 obj-y = main.o syscall.o strace.o mmap.o signal.o thunk.o \
-  elfload.o linuxload.o uaccess.o gdbstub.o
+  elfload.o linuxload.o uaccess.o gdbstub.o cpu-uname.o
 
 obj-$(TARGET_HAS_BFLT) += flatload.o
 obj-$(TARGET_HAS_ELFLOAD32) += elfload32.o
diff --git a/linux-user/cpu-uname.c b/linux-user/cpu-uname.c
new file mode 100644
index 000..23afede
--- /dev/null
+++ b/linux-user/cpu-uname.c
@@ -0,0 +1,72 @@
+/*
+ *  cpu to uname machine name map
+ *
+ *  Copyright (c) 2009 Lo�c Minier
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see .
+ */
+
+#include 
+
+#include "qemu.h"
+//#include "qemu-common.h"
+#include "cpu-uname.h"
+
+/* return highest utsname machine name for emulated instruction set
+ *
+ * NB: the default emulated CPU ("any") might not match any existing CPU, e.g.
+ * on ARM it has all features turned on, so there is no perfect arch string to
+ * return here */
+const char *cpu_to_uname_machine(void *cpu_env)
+{
+#ifdef TARGET_ARM
+/* utsname machine name on linux arm is CPU arch name + endianness, e.g.
+ * armv7l; to get a list of CPU arch names from the linux source, use:
+ * grep arch_name: -A1 linux/arch/arm/mm/proc-*.S
+ * see arch/arm/kernel/setup.c: setup_processor()
+ *
+ * to test by CPU id, compare cpu_env->cp15.c0_cpuid to ARM_CPUID_*
+ * defines and to test by CPU feature, use arm_feature(cpu_env,
+ * ARM_FEATURE_*) */
+
+/* in theory, endianness is configurable on some ARM CPUs, but this isn't
+ * used in user mode emulation */
+#ifdef TARGET_WORDS_BIGENDIAN
+#define utsname_suffix "b"
+#else
+#define utsname_suffix "l"
+#endif
+if (arm_feature(cpu_env, ARM_FEATURE_V7))
+return "armv7" utsname_suffix;
+if (arm_feature(cpu_env, ARM_FEATURE_V6))
+return "armv6" utsname_suffix;
+/* earliest emulated CPU is ARMv5TE; qemu can emulate the 1026, but not its
+ * Jazelle support */
+return "armv5te" utsname_suffix;
+#elif defined(TARGET_X86_64)
+return "x86-64";
+#elif defined(TARGET_I386)
+/* see arch/x86/kernel/cpu/bugs.c: check_bugs(), 386, 486, 586, 686 */
+uint32_t cpuid_version = ((CPUX86State *)cpu_env)->cpuid_version;
+int family = ((cpuid_version >> 8) & 0x0f) + ((cpuid_version >> 20) & 
0xff);
+if (family == 4)
+return "i486";
+if (family == 5)
+return "i586";
+return "i686";
+#else
+/* default is #define-d in each arch/ subdir */
+return UNAME_MACHINE;
+#endif
+}
diff --git a/linux-user/cpu-uname.h b/linux-user/cpu-uname.h
new file mode 100644
index 000..32492de
--- /dev/null
+++ b/linux-user/cpu-uname.h
@@ -0,0 +1 @@
+const char *cpu_to_uname_machine(void *cpu_env);
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index f2dd39e..9fb493f 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -82,6 +82,7 @@
 #include 
 #include 
 #include "linux_loop.h"
+#include "cpu-uname.h"
 
 #include "qemu.h"
 #include "qemu-common.h"
@@ -5739,7 +5740,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
 if (!is_error(ret)) {
 /* Overrite the native machine name with whatever is being
emulated. */
-strcpy (buf->machine, UNAME_MACHINE);
+strcpy (buf->machine, cpu_to_uname_machine(cpu_env));
 /* Allow the user to override the reported release.  */
 if (qemu_uname_release && *qemu_uname_release)
   strcpy (buf->release, qemu_uname_release);
-- 
1.6.5

[Qemu-devel] [PATCH 5/5] target-arm: Thread ID Registers are ARMv6K +

2010-01-26 Thread Riku Voipio

From: Riku Voipio 

Change the system-emulation code also to only support accessing
the cp15.c13 Thread ID registers on ARMv6K and newer.

Signed-off-by: Riku Voipio 
---
 target-arm/helper.c |   12 
 1 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/target-arm/helper.c b/target-arm/helper.c
index 68578ce..1245d9d 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -1517,12 +1517,18 @@ void HELPER(set_cp15)(CPUState *env, uint32_t insn, 
uint32_t val)
 env->cp15.c13_context = val;
 break;
 case 2:
+if (!arm_feature(env, ARM_FEATURE_V6K))
+goto bad_reg;
 env->cp15.c13_tls1 = val;
 break;
 case 3:
+if (!arm_feature(env, ARM_FEATURE_V6K))
+goto bad_reg;
 env->cp15.c13_tls2 = val;
 break;
 case 4:
+if (!arm_feature(env, ARM_FEATURE_V6K))
+goto bad_reg;
 env->cp15.c13_tls3 = val;
 break;
 default:
@@ -1805,10 +1811,16 @@ uint32_t HELPER(get_cp15)(CPUState *env, uint32_t insn)
 case 1:
 return env->cp15.c13_context;
 case 2:
+if (!arm_feature(env, ARM_FEATURE_V6K))
+goto bad_reg;
 return env->cp15.c13_tls1;
 case 3:
+if (!arm_feature(env, ARM_FEATURE_V6K))
+goto bad_reg;
 return env->cp15.c13_tls2;
 case 4:
+if (!arm_feature(env, ARM_FEATURE_V6K))
+goto bad_reg;
 return env->cp15.c13_tls3;
 default:
 goto bad_reg;
-- 
1.6.5

[Qemu-devel] [PATCH 3/5] linux-user: remove signal handler before calling abort()

2010-01-26 Thread Riku Voipio

From: Riku Voipio 

Qemu may hang in host_signal_handler after qemu has done a
seppuku with cpu_abort(). But at this stage we are not really
interested in target process coredump anymore, so unregister
host_signal_handler to die grafefully.

Signed-off-by: Riku Voipio 
---
 exec.c |9 +
 1 files changed, 9 insertions(+), 0 deletions(-)

diff --git a/exec.c b/exec.c
index 71f655f..3145b83 100644
--- a/exec.c
+++ b/exec.c
@@ -40,6 +40,7 @@
 #include "kvm.h"
 #if defined(CONFIG_USER_ONLY)
 #include 
+#include 
 #endif
 
 //#define DEBUG_TB_INVALIDATE
@@ -1692,6 +1693,14 @@ void cpu_abort(CPUState *env, const char *fmt, ...)
 }
 va_end(ap2);
 va_end(ap);
+#if defined(CONFIG_USER_ONLY)
+{
+struct sigaction act;
+sigfillset(&act.sa_mask);
+act.sa_handler = SIG_DFL;
+sigaction(SIGABRT, &act, NULL);
+}
+#endif
 abort();
 }
 
-- 
1.6.5

Re: [Qemu-devel] CONFIG_FB_CIRRUS for Linux guests

2010-01-26 Thread Alexander Graf

On 26.01.2010, at 15:44, Jan Kiszka wrote:

> Hi,
> 
> just received some Linux guest kernel that refuses to properly use its
> framebuffer console under QEMU (probably any version, tried down to some
> 0.10.x). It turned out that it has CONFIG_FB_CIRRUS enabled and fails to
> initialize the display when this is actually used. Anyone came across
> this before or has some idea what goes wrong?

The only thing I've seen that might be a similar issue is that the Cirrus 
driver on Windows NT 4 just shows a black screen with KVM.

Alex

[Qemu-devel] [PATCHv2] configure: verify stdio.h

2010-01-26 Thread Michael S. Tsirkin

Verify that stdio.h supports %lld %zd.
Some migw variants don't unless requested explicitly (see
http://www.mail-archive.com/mingw-w64-pub...@lists.sourceforge.net/msg00417.html)
), detect them early.

Signed-off-by: Michael S. Tsirkin 
Acked-by: Juan Quintela 

---

changes from v1:
removed leftover chunk

 configure |   20 
 1 files changed, 20 insertions(+), 0 deletions(-)

diff --git a/configure b/configure
index 5631bbb..6ba06d6 100755
--- a/configure
+++ b/configure
@@ -123,6 +123,26 @@ else
 exit 1
 fi
 
+# Check that stdio.h compiler is sane: some
+# mingw variants do not support %z %l that we rely on
+cat > $TMPC <
+#include 
+size_t z = 1;
+long long ll = 2;
+int main(void) {
+  printf("z=%zd;ll=%lld;\n", z, ll);
+  return 0;
+}
+EOF
+
+if compile_prog && ($TMPE | grep "z=1;ll=2;" > /dev/null); then
+  : C compiler works ok
+else
+echo "ERROR: \"$cc\" does not have a working stdio.h"
+exit 1
+fi
+
 check_define() {
 cat > $TMPC <

[Qemu-devel] Re: regression between 0.12.1.2 and 0.12.2

2010-01-26 Thread Jan Kiszka

Jan Kiszka wrote:
> Toralf Förster wrote:
>> Hi,
>>
>> under a mostly stable Gentoo I observed this new msg :
>>
>> tfoer...@n22 ~/virtual/kvm $ qemu -hda gentoo_kdevm.img -hdb 
>> portage_kdeprefix.img -hdd swap.img -smp 2 -m 768 -vga std -soundhw es1370   
>>  
>>
>> BUG: kvm_dirty_pages_log_enable_slot: invalid parameters 
>> 
>> BUG: kvm_dirty_pages_log_disable_slot: invalid parameters
>> 
>> ..
>>
>> The kvm image can be derived from 
>> http://dev.gentooexperimental.org/~wired/kvm/ .
>>
>> My system is a :
>> tfoer...@n22 ~/virtual/kvm $ uname -a
>> Linux n22 2.6.32.4 #1 SMP Mon Jan 18 20:20:38 CET 2010 i686 Intel(R) 
>> Core(TM)2 Duo CPU P8600 @ 2.40GHz GenuineIntel GNU/Linux
>>
>>
> 
> That's a pre-0.12.1.2 qemu-kvm issue, upstream is not affected - or is
> at least not reporting it. It's already in my todo queue, just waiting
> to be dequeued.

I've looked into this a bit, and the bug message that pops up is in fact
new for your scenario (0.12.1.2->0.12.2, -vga std), it just happens to
trigger for me as well in a slightly different setup (CONFIG_FB_CIRRUS).

This is "mostly harmless" (the bug is gracefully handled), indicating
that qemu-kvm tries to enable/disable dirty logging for a VGA memory
area that was just unregistered. And that is because qemu-kvm tries to
keep support for old host kernels that had bugs and required workaround
approaches, but that code is bit-rotting a bit.

Avi, we should get rid of these messages, either by suppressing them in
qemu-kvm for now (stick your head into the sand...) or by finally
dropping all those dirty-logging diffs to upstream (in theory, there is
a third option: fixing the workaround code, but I don't think it's worth
the effort). What could be a road map for dropping? What distro kernels
are you aware of that may become unusable then?

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux

Re: [Qemu-devel] [RFC 00/11]: QMP feature negotiation support

2010-01-26 Thread Luiz Capitulino

On Tue, 26 Jan 2010 15:57:46 +
Jamie Lokier  wrote:

> Daniel P. Berrange wrote:
> > On Tue, Jan 26, 2010 at 12:57:54PM +, Jamie Lokier wrote:
> > > Luiz Capitulino wrote:
> > > > capability_enable [ "foo", "bar" ]
> > > > 
> > > >  Now, only one command is not terrible difficult, but we would
> > > > have to accept an array of objects, like:
> > > > 
> > > > [ { "name": "foo", "enabled": true }, { "name": "bar", "enabled": true 
> > > > } ]
> > > 
> > > That looks like XML-itis.
> > > 
> > > Why not { "foo": true, "bar": true }?
> > 
> > It depends on whether we think we're going to need to add more metadata
> > beyond just the enabled/disabled status. If we did want to add a further
> > item against foo & bar, then having the array of hashes makes that 
> > extension easier becaue you add easily add more key/value pairs to
> > each.
> 
> Sure, extensibility is good, and I personally don't care which
> format/function are used.  Just wanted to question the padded
> structure, because sometimes that style is done unintentially.
> 
> Look at the argument leading up here - Luiz says let's use separate,
> non-extensible enable/disable commands taking a list, because if it
> were a single command it'd be important to make it extensible.  Does
> that make sense?  I don't understand that reasoning.

 I didn't consider extensibility in my first format, but we could also
have:

capability_enable [ { "name": "foo" }, { "name": "bar" } ]

> On that topic: In the regular monitor, commands are often extensible
> because they take command-line-style options, and you can always add
> more options.  What about QMP - are QMP commands all future-extensible
> with options in a similar way?

 Yes, command input is done through a json-object as does output.

Re: [Qemu-devel] [PATCH 4/5] linux-user: Add access to TLS registers

2010-01-26 Thread Laurent Desnogues

On Tue, Jan 26, 2010 at 5:00 PM, Riku Voipio  wrote:
> From: Riku Voipio 
>
> If you compile applications with gcc -mtp=cp15, __thread
> access's will generate an abort. Implement accessing allowed
> cp15.c13 registers on ARMv6K+ in linux-user.
>
> Signed-off-by: Riku Voipio 
> ---
>  target-arm/helper.c |   27 ++-
>  1 files changed, 26 insertions(+), 1 deletions(-)
>
> diff --git a/target-arm/helper.c b/target-arm/helper.c
> index b3aec99..68578ce 100644
> --- a/target-arm/helper.c
> +++ b/target-arm/helper.c
> @@ -505,13 +505,38 @@ uint32_t HELPER(get_cp)(CPUState *env, uint32_t insn)
>
>  void HELPER(set_cp15)(CPUState *env, uint32_t insn, uint32_t val)
>  {
> +    int op2;
> +
> +    op2 = (insn >> 5) & 7;
> +    /* Allow write access to CP15 User RW Thread ID Register */
> +    if (arm_feature (env, ARM_FEATURE_V6K) && ((insn >> 16) & 0xf) == 13) {
> +        switch (op2) {
> +        case 2:
> +            env->cp15.c13_tls1 = val;
> +            return;
> +        }
> +    }
>     cpu_abort(env, "cp15 insn %08x\n", insn);
>  }
>
>  uint32_t HELPER(get_cp15)(CPUState *env, uint32_t insn)
>  {
> +    int op2;
> +    /* Allow read access to CP15 User RW and RO Thread ID Registers */
> +
> +    op2 = (insn >> 5) & 7;
> +    if (arm_feature (env, ARM_FEATURE_V6K) && ((insn >> 16) & 0xf) == 13) {
> +        switch (op2) {
> +        case 2:
> +            return env->cp15.c13_tls1;
> +        case 3:
> +            return env->cp15.c13_tls2;
> +        default:
> +            goto bad_reg;
> +        }
> +    }
> +bad_reg:
>     cpu_abort(env, "cp15 insn %08x\n", insn);
> -    return 0;
>  }
>
>  /* These should probably raise undefined insn exceptions.  */

Most of the checks you do here could be done in translate.c.
Wouldn't it be better to do them there?


Laurent

Re: [Qemu-devel] [PATCH 5/5] target-arm: Thread ID Registers are ARMv6K +

2010-01-26 Thread Laurent Desnogues

On Tue, Jan 26, 2010 at 5:00 PM, Riku Voipio  wrote:
> From: Riku Voipio 
>
> Change the system-emulation code also to only support accessing
> the cp15.c13 Thread ID registers on ARMv6K and newer.
>
> Signed-off-by: Riku Voipio 
> ---
>  target-arm/helper.c |   12 
>  1 files changed, 12 insertions(+), 0 deletions(-)
>
> diff --git a/target-arm/helper.c b/target-arm/helper.c
> index 68578ce..1245d9d 100644
> --- a/target-arm/helper.c
> +++ b/target-arm/helper.c
> @@ -1517,12 +1517,18 @@ void HELPER(set_cp15)(CPUState *env, uint32_t insn, 
> uint32_t val)
>             env->cp15.c13_context = val;
>             break;
>         case 2:
> +            if (!arm_feature(env, ARM_FEATURE_V6K))
> +                goto bad_reg;
>             env->cp15.c13_tls1 = val;
>             break;
>         case 3:
> +            if (!arm_feature(env, ARM_FEATURE_V6K))
> +                goto bad_reg;
>             env->cp15.c13_tls2 = val;
>             break;
>         case 4:
> +            if (!arm_feature(env, ARM_FEATURE_V6K))
> +                goto bad_reg;
>             env->cp15.c13_tls3 = val;
>             break;
>         default:
> @@ -1805,10 +1811,16 @@ uint32_t HELPER(get_cp15)(CPUState *env, uint32_t 
> insn)
>         case 1:
>             return env->cp15.c13_context;
>         case 2:
> +            if (!arm_feature(env, ARM_FEATURE_V6K))
> +                goto bad_reg;
>             return env->cp15.c13_tls1;
>         case 3:
> +            if (!arm_feature(env, ARM_FEATURE_V6K))
> +                goto bad_reg;
>             return env->cp15.c13_tls2;
>         case 4:
> +            if (!arm_feature(env, ARM_FEATURE_V6K))
> +                goto bad_reg;
>             return env->cp15.c13_tls3;
>         default:
>             goto bad_reg;

Same question as for user mode :-)


Laurent

[Qemu-devel] Re: [PATCH] QEMU - provide e820 reserve through qemu_cfg

2010-01-26 Thread Jes Sorensen


On 01/26/10 07:46, Gleb Natapov wrote:

On Mon, Jan 25, 2010 at 06:13:35PM +0100, Jes Sorensen wrote:

I am fine with having QEMU build the e820 tables completely if there is
a consensus to take that path.


QEMU can't build the e820 map completely. There are things it doesn't
know. Like how much memory ACPI tables take and where they are located.


Good point!

I think the conclusion is to do a load-extra-tables kinda interface 
allowing QEMU to pass in a bunch of them, but leaving things like the

ACPI space for the BIOS to reserve.

Cheers,
Jes

Re: [Qemu-devel] [PATCH 0/6] tcg conditional set, round 6

2010-01-26 Thread Richard Henderson


Ping?

r~

On 01/20/2010 09:18 AM, Richard Henderson wrote:

Changes v5-v6:
   * Update x86-64 post P_REXB changes.

Changes v4-v5:
   * Document double-word support opcodes in a new section of the README,
 as requested by Aurelien.
   * Shorten the branch-to-next in the EQ case of i386 brcond2,  as
 pointed out by Laurent.


r~


Richard Henderson (6):
   tcg: Document double-word support opcodes.
   tcg: Generic support for conditional set
   tcg: Add tcg_invert_cond.
   tcg-i386: Implement small forward branches.
   tcg-i386: Implement setcond.
   tcg-x86_64: Implement setcond.

  elf.h   |2 +
  tcg/README  |   36 +
  tcg/i386/tcg-target.c   |  187 +--
  tcg/tcg-op.h|   47 
  tcg/tcg-opc.h   |3 +
  tcg/tcg.c   |   21 --
  tcg/tcg.h   |5 +
  tcg/x86_64/tcg-target.c |   38 -
  8 files changed, 288 insertions(+), 51 deletions(-)

Re: [Qemu-devel] [PATCH 0/6] target-alpha: fpu qualifiers, round 2

2010-01-26 Thread Richard Henderson


Ping?

r~

On 01/04/2010 02:46 PM, Richard Henderson wrote:

I've split up the FPCR as requested by Aurelien.  We no longer
set anything in FP_STATUS after the execution of the operation,
only copy data from FP_STATUS to some env->fpcr field.

I have totally rewritten the patch to be more along the line
that Laurent was suggesting, in that the rounding mode and other
qualifiers are totally parsed within the translator.  I no longer
pass the FN11 field to the helper functions.

Unlike Laurent's prototype, I do not set the rounding mode at
every FP instruction; I remember the previous setting of the
rounding mode within a TB.  Similarly for the flush-to-zero field.

I do not handle VAX instructions at all.  The existing VAX support
is mostly broken, and I didn't feel like compounding the problem.


r~


--
Richard Henderson (6):
   target-alpha: Fix gdb access to fpcr and unique.
   target-alpha: Split up FPCR value into separate fields.
   target-alpha: Reduce internal processor registers for user-mode.
   target-alpha: Clean up arithmetic traps.
   target-alpha: Mark helper_excp as NORETURN.
   target-alpha: Implement IEEE FP qualifiers.

Re: [Qemu-devel] Re: Stop using "which" in ./configure

2010-01-26 Thread Loïc Minier

On Thu, Jan 21, 2010, Måns Rullgård wrote:
> I think that entire test is wrong, in fact.  It is perfectly possible
> for someone on Solaris to install a working "install" command in
> /usr/bin.  It is better, if possible, to test whatever "install"
> command is in the path, and complain only if it actually fails.

 As I said, I prefer not changing this without access to a Solaris
 platform; would you mind committing the patch as is, and requesting
 whoever is interested in the Solaris specific code in qemu to fix the
 remainder?  The patch is a net improvement over what's in ./configure.

   Thanks
-- 
Loïc Minier

Re: [Qemu-devel] CONFIG_FB_CIRRUS for Linux guests

2010-01-26 Thread Anthony Liguori


On 01/26/2010 08:44 AM, Jan Kiszka wrote:

Hi,

just received some Linux guest kernel that refuses to properly use its
framebuffer console under QEMU (probably any version, tried down to some
0.10.x). It turned out that it has CONFIG_FB_CIRRUS enabled and fails to
initialize the display when this is actually used. Anyone came across
this before or has some idea what goes wrong?
   


Looks like it worked at some point:

commit ca896ef389fc44c7a673d1bd2b94823e28da37a1
Author: aurel32 
Date:   Thu May 8 12:21:27 2008 +

cirrusfb: proper "Attribute Controller Toggle Readback" register 
behaviour


(Marcelo Tosatti)


git-svn-id: svn://svn.savannah.nongnu.org/qemu/tr...@4386 
c046a42c-6fe2-441c


The cirrusfb Linux driver uses the toggle readback register to determine
whether to flip 0xC30 to data/address mode.

Without proper implementation the VGA mode will be erroneously set to
blank resulting in crashes when starting BITBLT.

3d4h index 24h (R):  Attribute Controller Toggle Readback Register
bit7  If set the Attribute Controller will next access a data register, if
  clear it'll access the index register


Signed-off-by: Marcelo Tosatti

Regards,

Anthony Liguori


Jan

[Qemu-devel] Re: sparc solaris guest, hsfs_putpage: dirty HSFS page

2010-01-26 Thread Artyom Tarasenko

2010/1/24 Blue Swirl :
> On Sun, Jan 24, 2010 at 2:02 AM, Artyom Tarasenko
>  wrote:
>> All solaris versions which currently boot (from cd) regularly produce 
>> buckets of
>> "hsfs_putpage: dirty HSFS page" messages.
>>
>> High Sierra is a pretty old and stable stuff, so it is possible that
>> the code is similar to OpenSolaris.
>> I looked in debugger, and the function calls hierarchy looks pretty similar.
>>
>> Now in the OpenSolaris source code there is a nice comment:
>> http://src.opensolaris.org/source/xref/onnv/onnv-gate/usr/src/uts/common/fs/hsfs/hsfs_vnops.c#1758
>> /*
>> * Normally pvn_getdirty() should return 0, which
>> * impies that it has done the job for us.
>> * The shouldn't-happen scenario is when it returns 1.
>> * This means that the page has been modified and
>> * needs to be put back.
>> * Since we can't write on a CD, we fake a failed
>> * I/O and force pvn_write_done() to destroy the page.
>> */
>> if (pvn_getdirty(pp, flags) == 1) {
>>                cmn_err(CE_NOTE,
>>                            "hsfs_putpage: dirty HSFS page");
>>
>> Now the question: does the problem have to do with qemu caches 
>> (non-)emulation?
>> Can it be that we mark non-dirty pages dirty? Or does qemu always mark
>> pages dirty exactly to avoid cache emulation?
>>
>> Otherwise it means something else goes astray and Solaris guest really
>> modifies the pages it shouldn't.
>>
>> Just wonder what to dig first, MMU or IRQ emulation (the two most
>> obvious suspects).
>
> Maybe the stores via MMU bypass ASIs

why bypass stores? What about the non-bypass ones?

> should use
> st[bwlq]_phys_notdirty.

Seems that st[bw]_phys_notdirty are not implemeted yet?

I've changed [lq] for asi 0x20 and 21-2f and see no difference. Also I
put some debug printfs and see that none of these ASIs is called after
the Solaris kernel is loaded.

> It can break display handling, though.


-- 
Regards,
Artyom Tarasenko

solaris/sparc under qemu blog: http://tyom.blogspot.com/

Re: [Qemu-devel] CONFIG_FB_CIRRUS for Linux guests

2010-01-26 Thread René Rebe

Hi,

I saw that some pretty long time ago, but didn't care enough to investigate, 
just disabled the cirrus fb  in T2 (http://t2-project.org):

r...@8way:t2-trunk-clean$ svn log -r29093

r29093 | rene | 2008-05-27 09:37:57 +0200 (Tue, 27 May 2008) | 4 lines

* updated linux26 module blacklist, do not list ide-generic (that's
  handled these days), but add cirrusfb, which causes a pure black
  screen in Qemu


r...@8way:t2-trunk-clean$ svn di -c r29093
Index: package/base/module-init-tools/modprobe.conf.data
===
--- package/base/module-init-tools/modprobe.conf.data   (revision 29092)
+++ package/base/module-init-tools/modprobe.conf.data   (revision 29093)
@@ -203,5 +203,4 @@
 alias char-major-89-* i2c-dev
 alias net-pf-24 pppoe
 
-# do not auto-load (e.g. on boot)
-blacklist ide-generic
+blacklist cirrusfb # plain black screen in Qemu

On 26.01.2010, at 17:02, Alexander Graf wrote:

> 
> On 26.01.2010, at 15:44, Jan Kiszka wrote:
> 
>> Hi,
>> 
>> just received some Linux guest kernel that refuses to properly use its
>> framebuffer console under QEMU (probably any version, tried down to some
>> 0.10.x). It turned out that it has CONFIG_FB_CIRRUS enabled and fails to
>> initialize the display when this is actually used. Anyone came across
>> this before or has some idea what goes wrong?
> 
> The only thing I've seen that might be a similar issue is that the Cirrus 
> driver on Windows NT 4 just shows a black screen with KVM.
> 
> Alex
> 

-- 
  René Rebe, ExactCODE GmbH, Jaegerstr. 67, DE-10117 Berlin
  http://exactcode.com | http://t2-project.org | http://rene.rebe.name

Re: [Qemu-devel] CONFIG_FB_CIRRUS for Linux guests

2010-01-26 Thread Jan Kiszka

Anthony Liguori wrote:
> On 01/26/2010 08:44 AM, Jan Kiszka wrote:
>> Hi,
>>
>> just received some Linux guest kernel that refuses to properly use its
>> framebuffer console under QEMU (probably any version, tried down to some
>> 0.10.x). It turned out that it has CONFIG_FB_CIRRUS enabled and fails to
>> initialize the display when this is actually used. Anyone came across
>> this before or has some idea what goes wrong?
>>
> 
> Looks like it worked at some point:
> 
> commit ca896ef389fc44c7a673d1bd2b94823e28da37a1
> Author: aurel32 
> Date:   Thu May 8 12:21:27 2008 +
> 
>  cirrusfb: proper "Attribute Controller Toggle Readback" register 
> behaviour
> 
>  (Marcelo Tosatti)
> 
> 
>  git-svn-id: svn://svn.savannah.nongnu.org/qemu/tr...@4386 
> c046a42c-6fe2-441c
> 
> The cirrusfb Linux driver uses the toggle readback register to determine
> whether to flip 0xC30 to data/address mode.
> 
> Without proper implementation the VGA mode will be erroneously set to
> blank resulting in crashes when starting BITBLT.
> 
> 3d4h index 24h (R):  Attribute Controller Toggle Readback Register
> bit7  If set the Attribute Controller will next access a data register, if
>clear it'll access the index register
> 
> 
> Signed-off-by: Marcelo Tosatti
> 

Yep, and 2bec46dc97571a3c34b18fe4ca198e7bfbdca41f broke it again.

Unless Glauber immediately confesses the crime and removes the bug he
left behind, I will try to understand the reason later.

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux

[Qemu-devel] Re: [PATCHv2] configure: verify stdio.h

2010-01-26 Thread Måns Rullgård

"Michael S. Tsirkin"  writes:

> Verify that stdio.h supports %lld %zd.
> Some migw variants don't unless requested explicitly (see
> http://www.mail-archive.com/mingw-w64-pub...@lists.sourceforge.net/msg00417.html)
> ), detect them early.
>
> Signed-off-by: Michael S. Tsirkin 
> Acked-by: Juan Quintela 
>
> ---
>
> changes from v1:
>   removed leftover chunk
>
>  configure |   20 
>  1 files changed, 20 insertions(+), 0 deletions(-)
>
> diff --git a/configure b/configure
> index 5631bbb..6ba06d6 100755
> --- a/configure
> +++ b/configure
> @@ -123,6 +123,26 @@ else
>  exit 1
>  fi
>
> +# Check that stdio.h compiler is sane: some
> +# mingw variants do not support %z %l that we rely on
> +cat > $TMPC < +#include 
> +#include 
> +size_t z = 1;
> +long long ll = 2;
> +int main(void) {
> +  printf("z=%zd;ll=%lld;\n", z, ll);
> +  return 0;
> +}
> +EOF
> +
> +if compile_prog && ($TMPE | grep "z=1;ll=2;" > /dev/null); then
> +  : C compiler works ok
> +else
> +echo "ERROR: \"$cc\" does not have a working stdio.h"
> +exit 1
> +fi
> +
>  check_define() {
>  cat > $TMPC <  #if !defined($1)

This makes cross-compiling impossible.  Is that really desirable?

-- 
Måns Rullgård
m...@mansr.com

Re: [Qemu-devel] Re: Stop using "which" in ./configure

2010-01-26 Thread Blue Swirl

On Tue, Jan 26, 2010 at 6:47 PM, Loïc Minier  wrote:
> On Thu, Jan 21, 2010, Måns Rullgård wrote:
>> I think that entire test is wrong, in fact.  It is perfectly possible
>> for someone on Solaris to install a working "install" command in
>> /usr/bin.  It is better, if possible, to test whatever "install"
>> command is in the path, and complain only if it actually fails.
>
>  As I said, I prefer not changing this without access to a Solaris
>  platform; would you mind committing the patch as is, and requesting
>  whoever is interested in the Solaris specific code in qemu to fix the
>  remainder?  The patch is a net improvement over what's in ./configure.

The patches didn't apply. Also please send only one patch per message,
git am can't handle multiple patches.

[Qemu-devel] Re: sparc solaris guest, hsfs_putpage: dirty HSFS page

2010-01-26 Thread Blue Swirl

On Tue, Jan 26, 2010 at 7:03 PM, Artyom Tarasenko
 wrote:
> 2010/1/24 Blue Swirl :
>> On Sun, Jan 24, 2010 at 2:02 AM, Artyom Tarasenko
>>  wrote:
>>> All solaris versions which currently boot (from cd) regularly produce 
>>> buckets of
>>> "hsfs_putpage: dirty HSFS page" messages.
>>>
>>> High Sierra is a pretty old and stable stuff, so it is possible that
>>> the code is similar to OpenSolaris.
>>> I looked in debugger, and the function calls hierarchy looks pretty similar.
>>>
>>> Now in the OpenSolaris source code there is a nice comment:
>>> http://src.opensolaris.org/source/xref/onnv/onnv-gate/usr/src/uts/common/fs/hsfs/hsfs_vnops.c#1758
>>> /*
>>> * Normally pvn_getdirty() should return 0, which
>>> * impies that it has done the job for us.
>>> * The shouldn't-happen scenario is when it returns 1.
>>> * This means that the page has been modified and
>>> * needs to be put back.
>>> * Since we can't write on a CD, we fake a failed
>>> * I/O and force pvn_write_done() to destroy the page.
>>> */
>>> if (pvn_getdirty(pp, flags) == 1) {
>>>                cmn_err(CE_NOTE,
>>>                            "hsfs_putpage: dirty HSFS page");
>>>
>>> Now the question: does the problem have to do with qemu caches 
>>> (non-)emulation?
>>> Can it be that we mark non-dirty pages dirty? Or does qemu always mark
>>> pages dirty exactly to avoid cache emulation?
>>>
>>> Otherwise it means something else goes astray and Solaris guest really
>>> modifies the pages it shouldn't.
>>>
>>> Just wonder what to dig first, MMU or IRQ emulation (the two most
>>> obvious suspects).
>>
>> Maybe the stores via MMU bypass ASIs
>
> why bypass stores? What about the non-bypass ones?

Because their use should update the PTE dirty bits.

>> should use
>> st[bwlq]_phys_notdirty.
>
> Seems that st[bw]_phys_notdirty are not implemeted yet?
>
> I've changed [lq] for asi 0x20 and 21-2f and see no difference. Also I
> put some debug printfs and see that none of these ASIs is called after
> the Solaris kernel is loaded.
>
>> It can break display handling, though.
>
>
> --
> Regards,
> Artyom Tarasenko
>
> solaris/sparc under qemu blog: http://tyom.blogspot.com/
>

Re: [Qemu-devel] Re: [PATCHv2] configure: verify stdio.h

2010-01-26 Thread Blue Swirl

2010/1/26 Måns Rullgård :
> "Michael S. Tsirkin"  writes:
>
>> Verify that stdio.h supports %lld %zd.
>> Some migw variants don't unless requested explicitly (see
>> http://www.mail-archive.com/mingw-w64-pub...@lists.sourceforge.net/msg00417.html)
>> ), detect them early.
>>
>> Signed-off-by: Michael S. Tsirkin 
>> Acked-by: Juan Quintela 
>>
>> ---
>>
>> changes from v1:
>>       removed leftover chunk
>>
>>  configure |   20 
>>  1 files changed, 20 insertions(+), 0 deletions(-)
>>
>> diff --git a/configure b/configure
>> index 5631bbb..6ba06d6 100755
>> --- a/configure
>> +++ b/configure
>> @@ -123,6 +123,26 @@ else
>>      exit 1
>>  fi
>>
>> +# Check that stdio.h compiler is sane: some
>> +# mingw variants do not support %z %l that we rely on
>> +cat > $TMPC <> +#include 
>> +#include 
>> +size_t z = 1;
>> +long long ll = 2;
>> +int main(void) {
>> +  printf("z=%zd;ll=%lld;\n", z, ll);
>> +  return 0;
>> +}
>> +EOF
>> +
>> +if compile_prog && ($TMPE | grep "z=1;ll=2;" > /dev/null); then
>> +  : C compiler works ok
>> +else
>> +    echo "ERROR: \"$cc\" does not have a working stdio.h"
>> +    exit 1
>> +fi
>> +
>>  check_define() {
>>  cat > $TMPC <>  #if !defined($1)
>
> This makes cross-compiling impossible.  Is that really desirable?

I'd rather drop %ll and %z.

Re: [Qemu-devel] [PATCH 4/5] linux-user: Add access to TLS registers

2010-01-26 Thread Riku Voipio

On Tue, Jan 26, 2010 at 05:27:27PM +0100, Laurent Desnogues wrote:
> On Tue, Jan 26, 2010 at 5:00 PM, Riku Voipio  wrote:
> > From: Riku Voipio 
> >
> > If you compile applications with gcc -mtp=cp15, __thread
> > access's will generate an abort. Implement accessing allowed
> > cp15.c13 registers on ARMv6K+ in linux-user.
> >
> > Signed-off-by: Riku Voipio 
> > ---
> >  target-arm/helper.c |   27 ++-
> >  1 files changed, 26 insertions(+), 1 deletions(-)
> >
> > diff --git a/target-arm/helper.c b/target-arm/helper.c
> > index b3aec99..68578ce 100644
> > --- a/target-arm/helper.c
> > +++ b/target-arm/helper.c
> > @@ -505,13 +505,38 @@ uint32_t HELPER(get_cp)(CPUState *env, uint32_t insn)
> >
> >  void HELPER(set_cp15)(CPUState *env, uint32_t insn, uint32_t val)
> >  {
> > +    int op2;
> > +
> > +    op2 = (insn >> 5) & 7;
> > +    /* Allow write access to CP15 User RW Thread ID Register */
> > +    if (arm_feature (env, ARM_FEATURE_V6K) && ((insn >> 16) & 0xf) == 13) {
> > +        switch (op2) {
> > +        case 2:
> > +            env->cp15.c13_tls1 = val;
> > +            return;
> > +        }
> > +    }
> >     cpu_abort(env, "cp15 insn %08x\n", insn);
> >  }
> >
> >  uint32_t HELPER(get_cp15)(CPUState *env, uint32_t insn)
> >  {
> > +    int op2;
> > +    /* Allow read access to CP15 User RW and RO Thread ID Registers */
> > +
> > +    op2 = (insn >> 5) & 7;
> > +    if (arm_feature (env, ARM_FEATURE_V6K) && ((insn >> 16) & 0xf) == 13) {
> > +        switch (op2) {
> > +        case 2:
> > +            return env->cp15.c13_tls1;
> > +        case 3:
> > +            return env->cp15.c13_tls2;
> > +        default:
> > +            goto bad_reg;
> > +        }
> > +    }
> > +bad_reg:
> >     cpu_abort(env, "cp15 insn %08x\n", insn);
> > -    return 0;
> >  }
> >
> >  /* These should probably raise undefined insn exceptions.  */

> Most of the checks you do here could be done in translate.c.
> Wouldn't it be better to do them there?

Indeed, thus we could even avoid the helper completly. I just followed the the
cp15 implementation of system-qemu here.

[Qemu-devel] virtio: Add memory statistics reporting to the balloon driver (V8)

2010-01-26 Thread Adam Litke

The changes in V8 of this patch are related to the monitor infrastructure.  No
changes to the virtio interface core have been made since V4.  This is intended
to apply on top of my API for asynchronous monitor commands patch.

Changes since V7:
 - Ported to the asynchronous monitor API

Changes since V6:
 - Integrated with virtio qdev feature bit changes
   (specifically: Use VirtIODevice 'guest_features' to check if memory stats
   is a negotiated feature)
 - Track which monitor initiated the most recent stats request.  Now it does the
   Right Thing(tm) with multiple monitors making parallel requests.

Changes since V5:
 - Asynchronous query-balloon mode for QMP
 - Add timeout to prevent hanging the user monitor in synchronous mode

Changes since V4:
 - Virtio spec updated: 
http://ozlabs.org/~rusty/virtio-spec/virtio-spec-0.8.2.pdf
 - Guest-side Linux implementation applied by Rusty
 - Start using the QObject infrastructure
 - All endian conversions done in the host
 - Report stats that reference a quantity of memory in bytes

Changes since V3:
 - Increase stat field size to 64 bits
 - Report all sizes in kb (not pages)
 - Drop anon_pages stat

Changes since V2:
 - Use a virtqueue for communication instead of the device config space

Changes since V1:
 - In the monitor, print all stats on one line with less abbreviated names
 - Coding style changes

When using ballooning to manage overcommitted memory on a host, a system for
guests to communicate their memory usage to the host can provide information
that will minimize the impact of ballooning on the guests.  The current method
employs a daemon running in each guest that communicates memory statistics to a
host daemon at a specified time interval.  The host daemon aggregates this
information and inflates and/or deflates balloons according to the level of
host memory pressure.  This approach is effective but overly complex since a
daemon must be installed inside each guest and coordinated to communicate with
the host.  A simpler approach is to collect memory statistics in the virtio
balloon driver and communicate them directly to the hypervisor.

Signed-off-by: Adam Litke 
To: Anthony Liguori 
Cc: Avi Kivity 
Cc: Luiz Capitulino 
Cc: qemu-devel@nongnu.org

diff --git a/balloon.h b/balloon.h
index 60b4a5d..c3a1ad3 100644
--- a/balloon.h
+++ b/balloon.h
@@ -16,12 +16,13 @@
 
 #include "cpu-defs.h"
 
-typedef ram_addr_t (QEMUBalloonEvent)(void *opaque, ram_addr_t target);
+typedef void (QEMUBalloonEvent)(void *opaque, ram_addr_t target,
+MonitorCompletion cb, void *cb_data);
 
 void qemu_add_balloon_handler(QEMUBalloonEvent *func, void *opaque);
 
-void qemu_balloon(ram_addr_t target);
+int qemu_balloon(ram_addr_t target, MonitorCompletion cb, void *opaque);
 
-ram_addr_t qemu_balloon_status(void);
+int qemu_balloon_status(MonitorCompletion cb, void *opaque);
 
 #endif
diff --git a/hw/virtio-balloon.c b/hw/virtio-balloon.c
index e17880f..086d9d1 100644
--- a/hw/virtio-balloon.c
+++ b/hw/virtio-balloon.c
@@ -16,9 +16,13 @@
 #include "pc.h"
 #include "sysemu.h"
 #include "cpu.h"
+#include "monitor.h"
 #include "balloon.h"
 #include "virtio-balloon.h"
 #include "kvm.h"
+#include "qlist.h"
+#include "qint.h"
+#include "qstring.h"
 
 #if defined(__linux__)
 #include 
@@ -27,9 +31,14 @@
 typedef struct VirtIOBalloon
 {
 VirtIODevice vdev;
-VirtQueue *ivq, *dvq;
+VirtQueue *ivq, *dvq, *svq;
 uint32_t num_pages;
 uint32_t actual;
+uint64_t stats[VIRTIO_BALLOON_S_NR];
+VirtQueueElement stats_vq_elem;
+size_t stats_vq_offset;
+MonitorCompletion *stats_callback;
+void *stats_opaque_callback_data;
 } VirtIOBalloon;
 
 static VirtIOBalloon *to_virtio_balloon(VirtIODevice *vdev)
@@ -46,6 +55,42 @@ static void balloon_page(void *addr, int deflate)
 #endif
 }
 
+/*
+ * reset_stats - Mark all items in the stats array as unset
+ *
+ * This function needs to be called at device intialization and before
+ * before updating to a set of newly-generated stats.  This will ensure that no
+ * stale values stick around in case the guest reports a subset of the 
supported
+ * statistics.
+ */
+static inline void reset_stats(VirtIOBalloon *dev)
+{
+int i;
+for (i = 0; i < VIRTIO_BALLOON_S_NR; dev->stats[i++] = -1);
+}
+
+static void stat_put(QDict *dict, const char *label, uint64_t val)
+{
+if (val != -1)
+qdict_put(dict, label, qint_from_int(val));
+}
+
+static QObject *get_stats_qobject(VirtIOBalloon *dev)
+{
+QDict *dict = qdict_new();
+uint32_t actual = ram_size - (dev->actual << VIRTIO_BALLOON_PFN_SHIFT);
+
+stat_put(dict, "actual", actual);
+stat_put(dict, "mem_swapped_in", dev->stats[VIRTIO_BALLOON_S_SWAP_IN]);
+stat_put(dict, "mem_swapped_out", dev->stats[VIRTIO_BALLOON_S_SWAP_OUT]);
+stat_put(dict, "major_page_faults", dev->stats[VIRTIO_BALLOON_S_MAJFLT]);
+stat_put(dict, "minor_page_faults", dev->stats[VIRTIO_BALLOON_S_MINFLT]);
+stat_put(dict, "free_mem

Re: [Qemu-devel] [PATCH v3 2/8] QDict: New qdict_get_double()

2010-01-26 Thread Anthony Liguori


On 01/25/2010 07:23 AM, Markus Armbruster wrote:

Helper function just like qdict_get_int(), just for QFloat/double.

Signed-off-by: Markus Armbruster
   


Breaks check_qdict's build.  Need to update the Makefile.  You can just 
send a v4 of this patch.


Regards,

Anthony Liguori

Re: [Qemu-devel] [PATCH v2 6/6] monitor: convert do_cpu_set() to QObject, QError

2010-01-26 Thread Anthony Liguori


On 01/20/2010 06:07 AM, Markus Armbruster wrote:

Signed-off-by: Markus Armbruster
---
  monitor.c   |4 ++--
  qemu-monitor.hx |3 ++-
  2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/monitor.c b/monitor.c
index 816f6fd..b9166c3 100644
--- a/monitor.c
+++ b/monitor.c
@@ -808,11 +808,11 @@ static void do_info_cpus(Monitor *mon, QObject **ret_data)
  *ret_data = QOBJECT(cpu_list);
  }

-static void do_cpu_set(Monitor *mon, const QDict *qdict)
+static void do_cpu_set(Monitor *mon, const QDict *qdict, QObject **ret_data)
  {
  int index = qdict_get_int(qdict, "index");
  if (mon_set_cpu(index)<  0)
-monitor_printf(mon, "Invalid CPU index\n");
+qemu_error_new(QERR_INVALID_CPU_INDEX);
   


Just out of curiousity, why introduce a new error verses using 
(QERR_INVALID_PARAMETER, "index")?


Regards,

Anthony Liguori

Re: [Qemu-devel] [PATCH 01/17] Introduce qemu_write_full()

2010-01-26 Thread Anthony Liguori


On 01/19/2010 05:56 PM, Juan Quintela wrote:

From: Kirill A. Shutemov

A variant of write(2) which handles partial write.

Signed-off-by: Kirill A. Shutemov
Signed-off-by: Juan Quintela
   


Applied all.  Thanks.

Regards,

Anthony Liguori


---
  osdep.c   |   27 +++
  qemu-common.h |1 +
  2 files changed, 28 insertions(+), 0 deletions(-)

diff --git a/osdep.c b/osdep.c
index 1310684..09fbc99 100644
--- a/osdep.c
+++ b/osdep.c
@@ -243,6 +243,33 @@ int qemu_open(const char *name, int flags, ...)
  return ret;
  }

+/*
+ * A variant of write(2) which handles partial write.
+ *
+ * Return the number of bytes transferred.
+ * Set errno if fewer than `count' bytes are written.
+ */
+ssize_t qemu_write_full(int fd, const void *buf, size_t count)
+{
+ssize_t ret = 0;
+ssize_t total = 0;
+
+while (count) {
+ret = write(fd, buf, count);
+if (ret<  0) {
+if (errno == EINTR)
+continue;
+break;
+}
+
+count -= ret;
+buf += ret;
+total += ret;
+}
+
+return total;
+}
+
  #ifndef _WIN32
  /*
   * Creates a pipe with FD_CLOEXEC set on both file descriptors
diff --git a/qemu-common.h b/qemu-common.h
index 8630f8c..a8144cb 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -160,6 +160,7 @@ void qemu_mutex_lock_iothread(void);
  void qemu_mutex_unlock_iothread(void);

  int qemu_open(const char *name, int flags, ...);
+ssize_t qemu_write_full(int fd, const void *buf, size_t count);
  void qemu_set_cloexec(int fd);

  #ifndef _WIN32

Re: [Qemu-devel] [PATCH v2 1/6] monitor: Don't check for mon_get_cpu() failure

2010-01-26 Thread Anthony Liguori


On 01/20/2010 06:07 AM, Markus Armbruster wrote:

mon_get_cpu() can't return null pointer, because it passes its return
value to cpu_synchronize_state() first, which crashes if its argument
is null.

Remove the (pretty cheesy) handling of this non-existing error.

Signed-off-by: Markus Armbruster
   


Applied all.  Thanks.

Regards,

Anthony Liguori


---
  monitor.c |   39 +++
  1 files changed, 3 insertions(+), 36 deletions(-)

diff --git a/monitor.c b/monitor.c
index 938eb3b..c22901f 100644
--- a/monitor.c
+++ b/monitor.c
@@ -693,8 +693,6 @@ static void do_info_registers(Monitor *mon)
  {
  CPUState *env;
  env = mon_get_cpu();
-if (!env)
-return;
  #ifdef TARGET_I386
  cpu_dump_state(env, (FILE *)mon, monitor_fprintf,
 X86_DUMP_FPU);
@@ -1128,7 +1126,7 @@ static void memory_dump(Monitor *mon, int count, int 
format, int wsize,
  int flags;
  flags = 0;
  env = mon_get_cpu();
-if (!env&&  !is_physical)
+if (!is_physical)
  return;
  #ifdef TARGET_I386
  if (wsize == 2) {
@@ -1190,8 +1188,6 @@ static void memory_dump(Monitor *mon, int count, int 
format, int wsize,
  cpu_physical_memory_rw(addr, buf, l, 0);
  } else {
  env = mon_get_cpu();
-if (!env)
-break;
  if (cpu_memory_rw_debug(env, addr, buf, l, 0)<  0) {
  monitor_printf(mon, " Cannot access memory\n");
  break;
@@ -1318,8 +1314,6 @@ static void do_memory_save(Monitor *mon, const QDict 
*qdict, QObject **ret_data)
  uint8_t buf[1024];

  env = mon_get_cpu();
-if (!env)
-return;

  f = fopen(filename, "wb");
  if (!f) {
@@ -1754,8 +1748,6 @@ static void tlb_info(Monitor *mon)
  uint32_t pgd, pde, pte;

  env = mon_get_cpu();
-if (!env)
-return;

  if (!(env->cr[0]&  CR0_PG_MASK)) {
  monitor_printf(mon, "PG disabled\n");
@@ -1812,8 +1804,6 @@ static void mem_info(Monitor *mon)
  uint32_t pgd, pde, pte, start, end;

  env = mon_get_cpu();
-if (!env)
-return;

  if (!(env->cr[0]&  CR0_PG_MASK)) {
  monitor_printf(mon, "PG disabled\n");
@@ -2659,8 +2649,6 @@ typedef struct MonitorDef {
  static target_long monitor_get_pc (const struct MonitorDef *md, int val)
  {
  CPUState *env = mon_get_cpu();
-if (!env)
-return 0;
  return env->eip + env->segs[R_CS].base;
  }
  #endif
@@ -2672,9 +2660,6 @@ static target_long monitor_get_ccr (const struct 
MonitorDef *md, int val)
  unsigned int u;
  int i;

-if (!env)
-return 0;
-
  u = 0;
  for (i = 0; i<  8; i++)
  u |= env->crf[i]<<  (32 - (4 * i));
@@ -2685,40 +2670,30 @@ static target_long monitor_get_ccr (const struct 
MonitorDef *md, int val)
  static target_long monitor_get_msr (const struct MonitorDef *md, int val)
  {
  CPUState *env = mon_get_cpu();
-if (!env)
-return 0;
  return env->msr;
  }

  static target_long monitor_get_xer (const struct MonitorDef *md, int val)
  {
  CPUState *env = mon_get_cpu();
-if (!env)
-return 0;
  return env->xer;
  }

  static target_long monitor_get_decr (const struct MonitorDef *md, int val)
  {
  CPUState *env = mon_get_cpu();
-if (!env)
-return 0;
  return cpu_ppc_load_decr(env);
  }

  static target_long monitor_get_tbu (const struct MonitorDef *md, int val)
  {
  CPUState *env = mon_get_cpu();
-if (!env)
-return 0;
  return cpu_ppc_load_tbu(env);
  }

  static target_long monitor_get_tbl (const struct MonitorDef *md, int val)
  {
  CPUState *env = mon_get_cpu();
-if (!env)
-return 0;
  return cpu_ppc_load_tbl(env);
  }
  #endif
@@ -2728,8 +2703,6 @@ static target_long monitor_get_tbl (const struct 
MonitorDef *md, int val)
  static target_long monitor_get_psr (const struct MonitorDef *md, int val)
  {
  CPUState *env = mon_get_cpu();
-if (!env)
-return 0;
  return GET_PSR(env);
  }
  #endif
@@ -2737,8 +2710,6 @@ static target_long monitor_get_psr (const struct 
MonitorDef *md, int val)
  static target_long monitor_get_reg(const struct MonitorDef *md, int val)
  {
  CPUState *env = mon_get_cpu();
-if (!env)
-return 0;
  return env->regwptr[val];
  }
  #endif
@@ -2990,7 +2961,7 @@ static void expr_error(Monitor *mon, const char *msg)
  longjmp(expr_env, 1);
  }

-/* return 0 if OK, -1 if not found, -2 if no CPU defined */
+/* return 0 if OK, -1 if not found */
  static int get_monitor_def(target_long *pval, const char *name)
  {
  const MonitorDef *md;
@@ -3002,8 +2973,6 @@ static int get_monitor_def(target_long *pval, const char 
*name)
  *pval = md->get_value(md, md->offset);
  } else {
  CPUState *env = mon_get_cpu();
-if (!env)
-return -2;

Re: [Qemu-devel] [PATCH v2 01/10] qcow2: Fix error handling in qcow2_grow_l1_table

2010-01-26 Thread Anthony Liguori


On 01/20/2010 08:02 AM, Kevin Wolf wrote:

Return the appropriate error value instead of always using EIO. Don't free the
L1 table on errors, we still need it.

Signed-off-by: Kevin Wolf
   


Applied all.  Thanks.

Do you think this is stable-0.12 material?  Is there any reasonable 
scenario where this could fix a user visible bug?


Regards,

Anthony Liguori

Re: [Qemu-devel] [PATCH 1/7] Rename --enable-uname-release

2010-01-26 Thread Anthony Liguori


On 01/20/2010 11:41 AM, Juan Quintela wrote:

It really sets uname string.  It don't make sense a --disable option

Signed-off-by: Juan Quintela
   


Patch 3/7 doesn't apply.  It's an annoying one to rebase too.  Please 
send out a new series and ping me and I'll quickly apply it since this 
one is likely to break again.


Regards,

Anthony Liguori


---
  configure |4 ++--
  1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/configure b/configure
index 192338f..08ff6a2 100755
--- a/configure
+++ b/configure
@@ -578,7 +578,7 @@ for opt do
;;
--disable-user-pie) user_pie="no"
;;
-  --enable-uname-release=*) uname_release="$optarg"
+  --set-uname-release=*) uname_release="$optarg"
;;
--sparc_cpu=*)
;;
@@ -782,7 +782,7 @@ echo "  --disable-user-pie   do not build usermode emulation 
targets as PIE"
  echo "  --fmod-lib   path to FMOD library"
  echo "  --fmod-inc   path to FMOD includes"
  echo "  --oss-libpath to OSS library"
-echo "  --enable-uname-release=R Return R for uname -r in usermode emulation"
+echo "  --set-uname-release=R Return R for uname -r in usermode emulation"
  echo "  --sparc_cpu=VBuild qemu for Sparc architecture v7, v8, v8plus, 
v8plusa, v9"
  echo "  --disable-uuid   disable uuid support"
  echo "  --enable-uuidenable uuid support"

[Qemu-devel] Re: [PATCHv2] configure: verify stdio.h

2010-01-26 Thread Stefan Weil

Michael S. Tsirkin schrieb:
> Verify that stdio.h supports %lld %zd.
> Some migw variants don't unless requested explicitly (see

migw -> mingw

I don't know any ming32 variant which supports %lld, %zd.
There is a new mingw-w64 were people are addressing the
problem, but that variant is unsupported by qemu.


> http://www.mail-archive.com/mingw-w64-pub...@lists.sourceforge.net/msg00417.html)
> ), detect them early.
>
> Signed-off-by: Michael S. Tsirkin 
> Acked-by: Juan Quintela 
>
> ---
>
> changes from v1:
> removed leftover chunk
>
> configure | 20 
> 1 files changed, 20 insertions(+), 0 deletions(-)
>
> diff --git a/configure b/configure
> index 5631bbb..6ba06d6 100755
> --- a/configure
> +++ b/configure
> @@ -123,6 +123,26 @@ else
> exit 1
> fi
>
> +# Check that stdio.h compiler is sane: some
> +# mingw variants do not support %z %l that we rely on
> +cat > $TMPC < +#include 
> +#include 
> +size_t z = 1;
> +long long ll = 2;
> +int main(void) {
> + printf("z=%zd;ll=%lld;\n", z, ll);
> + return 0;
> +}
> +EOF
> +
> +if compile_prog && ($TMPE | grep "z=1;ll=2;" > /dev/null); then
> + : C compiler works ok
> +else
> + echo "ERROR: \"$cc\" does not have a working stdio.h"
> + exit 1
> +fi
> +
> check_define() {
> cat > $TMPC < #if !defined($1)


Tests of %lld must use a 64 bit pattern (ll = 0x0123456789abcdefLL).
Otherwise, %lld, %ld and %d with ll = 1 will all print 1 on little endian
machines, and the %lld test won't detect missing support.

The problem is now several years old, it can be avoided for really
important output, so it seems acceptable to wait another period
of time until a working mingw is supported by qemu.

A test which prevents qemu builds on windows with current mingw32
or cross builds with current debian cross mingw32 would not help.
Both variants work for me even without full format support.

Regards,
Stefan Weil

[Qemu-devel] [PATCH] Seabios - read e820 table from qemu_cfg

2010-01-26 Thread Jes Sorensen


Hi,

Based on the feedback I received over the e820 reserve patch, I have
changed it to have QEMU pass in a list of entries that can cover more
than just the TSS/EPT range. This should provide the flexibility that
people were asking for.

The Seabios portion should allow for unlimited sized tables in theory,
whereas for QEMU I have set a fixed limit for now, but it can easily
be extended.

Please let me know what you think of this version!

Cheers,
Jes

Read optional table of e820 entries from qemu_cfg

Read optional table of e820 entries through qemu_cfg, allowing QEMU to
provide the location of KVM's switch area etc. rather than rely on
hard coded values.

For now, fall back to the old hard coded values for the TSS and EPT
switch page for compatibility reasons. Compatibility code could
possibly be removed in the future.

Signed-off-by: Jes Sorensen 

---
 src/paravirt.c |   17 +
 src/paravirt.h |9 +
 src/post.c |   23 +++
 3 files changed, 45 insertions(+), 4 deletions(-)

Index: seabios/src/paravirt.c
===
--- seabios.orig/src/paravirt.c
+++ seabios/src/paravirt.c
@@ -132,6 +132,23 @@ u16 qemu_cfg_smbios_entries(void)
 return cnt;
 }
 
+u32 qemu_cfg_e820_entries(void)
+{
+u32 cnt;
+
+if (!qemu_cfg_present)
+return 0;
+
+qemu_cfg_read_entry(&cnt, QEMU_CFG_E820_TABLE, sizeof(cnt));
+return cnt;
+}
+
+void* qemu_cfg_e820_load_next(void *addr)
+{
+qemu_cfg_read(addr, sizeof(struct e820_entry));
+return addr;
+}
+
 struct smbios_header {
 u16 length;
 u8 type;
Index: seabios/src/paravirt.h
===
--- seabios.orig/src/paravirt.h
+++ seabios/src/paravirt.h
@@ -36,6 +36,7 @@ static inline int kvm_para_available(voi
 #define QEMU_CFG_ACPI_TABLES   (QEMU_CFG_ARCH_LOCAL + 0)
 #define QEMU_CFG_SMBIOS_ENTRIES(QEMU_CFG_ARCH_LOCAL + 1)
 #define QEMU_CFG_IRQ0_OVERRIDE (QEMU_CFG_ARCH_LOCAL + 2)
+#define QEMU_CFG_E820_TABLE(QEMU_CFG_ARCH_LOCAL + 3)
 
 extern int qemu_cfg_present;
 
@@ -61,8 +62,16 @@ typedef struct QemuCfgFile {
 char name[56];
 } QemuCfgFile;
 
+struct e820_entry {
+u64 address;
+u64 length;
+u32 type;
+};
+
 u16 qemu_cfg_first_file(QemuCfgFile *entry);
 u16 qemu_cfg_next_file(QemuCfgFile *entry);
 u32 qemu_cfg_read_file(QemuCfgFile *entry, void *dst, u32 maxlen);
+u32 qemu_cfg_e820_entries(void);
+void* qemu_cfg_e820_load_next(void *addr);
 
 #endif
Index: seabios/src/post.c
===
--- seabios.orig/src/post.c
+++ seabios/src/post.c
@@ -135,10 +135,25 @@ ram_probe(void)
  , E820_RESERVED);
 add_e820(BUILD_BIOS_ADDR, BUILD_BIOS_SIZE, E820_RESERVED);
 
-if (kvm_para_available())
-// 4 pages before the bios, 3 pages for vmx tss pages, the
-// other page for EPT real mode pagetable
-add_e820(0xfffbc000, 4*4096, E820_RESERVED);
+if (kvm_para_available()) {
+u32 count;
+
+count = qemu_cfg_e820_entries();
+if (count) {
+struct e820_entry entry;
+int i;
+
+for (i = 0; i < count; i++) {
+qemu_cfg_e820_load_next(&entry);
+add_e820(entry.address, entry.length, entry.type);
+}
+} else {
+// Backwards compatibility - provide hard coded range.
+// 4 pages before the bios, 3 pages for vmx tss pages, the
+// other page for EPT real mode pagetable
+add_e820(0xfffbc000, 4*4096, E820_RESERVED);
+}
+}
 
 dprintf(1, "Ram Size=0x%08x (0x%08x%08x high)\n"
 , RamSize, (u32)(RamSizeOver4G >> 32), (u32)RamSizeOver4G);

[Qemu-devel] [PATCH] QEMU-KVM - provide e820 table via fw_cfg

2010-01-26 Thread Jes Sorensen


Hi,

This is the QEMU-KVM part of the patch. If we can agree on this
approach, I will do a version for upstream QEMU as well.

Cheers,
Jes

Use qemu-cfg to provide the BIOS with an optional table of e820 entries.

Notify the BIOS of the location of the TSS+EPT range to by reserving
it via the e820 table.

Signed-off-by: Jes Sorensen 

---
 hw/pc.c   |   35 +++
 hw/pc.h   |9 +
 qemu-kvm-x86.c|7 +++
 target-i386/kvm.c |7 +++
 4 files changed, 58 insertions(+)

Index: qemu-kvm/hw/pc.c
===
--- qemu-kvm.orig/hw/pc.c
+++ qemu-kvm/hw/pc.c
@@ -66,6 +66,7 @@
 #define FW_CFG_ACPI_TABLES (FW_CFG_ARCH_LOCAL + 0)
 #define FW_CFG_SMBIOS_ENTRIES (FW_CFG_ARCH_LOCAL + 1)
 #define FW_CFG_IRQ0_OVERRIDE (FW_CFG_ARCH_LOCAL + 2)
+#define FW_CFG_E820_TABLE (FW_CFG_ARCH_LOCAL + 3)
 
 #define MAX_IDE_BUS 2
 
@@ -74,6 +75,21 @@ static RTCState *rtc_state;
 static PITState *pit;
 static PCII440FXState *i440fx_state;
 
+#define E820_NR_ENTRIES16
+
+struct e820_entry {
+uint64_t address;
+uint64_t length;
+uint32_t type;
+};
+
+struct e820_table {
+uint32_t count;
+struct e820_entry entry[E820_NR_ENTRIES];
+};
+
+static struct e820_table e820_table;
+
 qemu_irq *ioapic_irq_hack;
 
 typedef struct isa_irq_state {
@@ -444,6 +460,23 @@ static void bochs_bios_write(void *opaqu
 }
 }
 
+int e820_add_entry(uint64_t address, uint64_t length, uint32_t type)
+{
+int index = e820_table.count;
+struct e820_entry *entry;
+
+if (index >= E820_NR_ENTRIES)
+return -EBUSY;
+entry = &e820_table.entry[index];
+
+entry->address = address;
+entry->length = length;
+entry->type = type;
+
+e820_table.count++;
+return e820_table.count;
+}
+
 static void *bochs_bios_init(void)
 {
 void *fw_cfg;
@@ -475,6 +508,8 @@ static void *bochs_bios_init(void)
 if (smbios_table)
 fw_cfg_add_bytes(fw_cfg, FW_CFG_SMBIOS_ENTRIES,
  smbios_table, smbios_len);
+fw_cfg_add_bytes(fw_cfg, FW_CFG_E820_TABLE, (uint8_t *)&e820_table,
+ sizeof(struct e820_table));
 
 /* allocate memory for the NUMA channel: one (64bit) word for the number
  * of nodes, one word for each VCPU->node and one word for each node to
Index: qemu-kvm/hw/pc.h
===
--- qemu-kvm.orig/hw/pc.h
+++ qemu-kvm/hw/pc.h
@@ -169,4 +169,13 @@ void extboot_init(BlockDriverState *bs, 
 
 int cpu_is_bsp(CPUState *env);
 
+/* e820 types */
+#define E820_RAM1
+#define E820_RESERVED   2
+#define E820_ACPI   3
+#define E820_NVS4
+#define E820_UNUSABLE   5
+
+int e820_add_entry(uint64_t, uint64_t, uint32_t);
+
 #endif
Index: qemu-kvm/qemu-kvm-x86.c
===
--- qemu-kvm.orig/qemu-kvm-x86.c
+++ qemu-kvm/qemu-kvm-x86.c
@@ -37,6 +37,13 @@ int kvm_set_tss_addr(kvm_context_t kvm, 
 {
 #ifdef KVM_CAP_SET_TSS_ADDR
int r;
+/*
+ * Tell fw_cfg to notify the BIOS to reserve the range.
+ */
+if (e820_add_entry(addr, 0x4000, E820_RESERVED) < 0) {
+perror("e820_add_entry() table is full");
+exit(1);
+}
 
r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR);
if (r > 0) {
Index: qemu-kvm/target-i386/kvm.c
===
--- qemu-kvm.orig/target-i386/kvm.c
+++ qemu-kvm/target-i386/kvm.c
@@ -298,6 +298,13 @@ int kvm_arch_init(KVMState *s, int smp_c
  * as unavaible memory.  FIXME, need to ensure the e820 map deals with
  * this?
  */
+/*
+ * Tell fw_cfg to notify the BIOS to reserve the range.
+ */
+if (e820_add_entry(0xfffbc000, 0x4000, E820_RESERVED) < 0) {
+perror("e820_add_entry() table is full");
+exit(1);
+}
 return kvm_vm_ioctl(s, KVM_SET_TSS_ADDR, 0xfffbd000);
 }

1 2 >

1 - 100 of 144 matches

Mail list logo