[Qemu-devel] [PATCH V16 5/6] add-cow file format core code.

2012-11-23 Thread Dong Xu Wang
add-cow file format core code. It use block-cache.c as cache code.
It lacks of snapshot_blkdev support.

v15->v16):
1) Judge if opts is null in add_cow_create function.

Signed-off-by: Dong Xu Wang 
---
 block/Makefile.objs |1 +
 block/add-cow.c |  690 +++
 block/add-cow.h |   83 ++
 block/block-cache.c |4 +
 block/block-cache.h |1 +
 block_int.h |2 +
 6 files changed, 781 insertions(+), 0 deletions(-)
 create mode 100644 block/add-cow.c
 create mode 100644 block/add-cow.h

diff --git a/block/Makefile.objs b/block/Makefile.objs
index d23c250..f0ff067 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -1,5 +1,6 @@
 block-obj-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o 
vvfat.o
 block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o
+block-obj-y += add-cow.o
 block-obj-y += block-cache.o
 block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-y += qed-check.o
diff --git a/block/add-cow.c b/block/add-cow.c
new file mode 100644
index 000..9d3b067
--- /dev/null
+++ b/block/add-cow.c
@@ -0,0 +1,690 @@
+/*
+ * QEMU ADD-COW Disk Format
+ *
+ * Copyright IBM, Corp. 2012
+ *
+ * Authors:
+ *  Dong Xu Wang 
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu-common.h"
+#include "block_int.h"
+#include "module.h"
+#include "add-cow.h"
+
+static void add_cow_header_le_to_cpu(const AddCowHeader *le, AddCowHeader *cpu)
+{
+cpu->magic  = le32_to_cpu(le->magic);
+cpu->version= le32_to_cpu(le->version);
+
+cpu->backing_offset = le32_to_cpu(le->backing_offset);
+cpu->backing_size   = le32_to_cpu(le->backing_size);
+cpu->image_offset   = le32_to_cpu(le->image_offset);
+cpu->image_size = le32_to_cpu(le->image_size);
+
+cpu->cluster_bits   = le32_to_cpu(le->cluster_bits);
+cpu->features   = le64_to_cpu(le->features);
+cpu->compat_features= le64_to_cpu(le->compat_features);
+cpu->header_size= le32_to_cpu(le->header_size);
+
+memcpy(cpu->backing_fmt, le->backing_fmt, sizeof(cpu->backing_fmt));
+memcpy(cpu->image_fmt, le->image_fmt, sizeof(cpu->image_fmt));
+}
+
+static void add_cow_header_cpu_to_le(const AddCowHeader *cpu, AddCowHeader *le)
+{
+le->magic   = cpu_to_le32(cpu->magic);
+le->version = cpu_to_le32(cpu->version);
+
+le->backing_offset  = cpu_to_le32(cpu->backing_offset);
+le->backing_size= cpu_to_le32(cpu->backing_size);
+le->image_offset= cpu_to_le32(cpu->image_offset);
+le->image_size  = cpu_to_le32(cpu->image_size);
+
+le->cluster_bits= cpu_to_le32(cpu->cluster_bits);
+le->features= cpu_to_le64(cpu->features);
+le->compat_features = cpu_to_le64(cpu->compat_features);
+le->header_size = cpu_to_le32(cpu->header_size);
+memcpy(le->backing_fmt, cpu->backing_fmt, sizeof(le->backing_fmt));
+memcpy(le->image_fmt, cpu->image_fmt, sizeof(le->image_fmt));
+}
+
+static int add_cow_probe(const uint8_t *buf, int buf_size, const char 
*filename)
+{
+const AddCowHeader *header = (const AddCowHeader *)buf;
+
+if (buf_size < sizeof(*header)) {
+return 0;
+}
+if (le32_to_cpu(header->magic) == ADD_COW_MAGIC &&
+le32_to_cpu(header->version) == ADD_COW_VERSION) {
+return 100;
+}
+return 0;
+}
+
+static int add_cow_create(const char *filename, QemuOpts *opts)
+{
+AddCowHeader header = {
+.magic  = ADD_COW_MAGIC,
+.version= ADD_COW_VERSION,
+.features   = 0,
+.compat_features = 0,
+.header_size = ADD_COW_HEADER_SIZE,
+};
+AddCowHeader le_header;
+int64_t image_len = 0;
+const char *backing_filename = NULL, *backing_fmt = NULL;
+const char *image_filename = NULL, *image_format = NULL;
+size_t cluster_size = ADD_COW_CLUSTER_SIZE;
+BlockDriverState *bs, *image_bs, *backing_bs;
+BlockDriver *drv = bdrv_find_format("add-cow");
+int ret;
+
+if (opts) {
+image_len = qemu_opt_get_number(opts, BLOCK_OPT_SIZE, 0);
+backing_filename = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
+backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
+image_filename = qemu_opt_get(opts, BLOCK_OPT_IMAGE_FILE);
+image_format = qemu_opt_get(opts, BLOCK_OPT_IMAGE_FMT);
+cluster_size = qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE,
+ ADD_COW_CLUSTER_SIZE);
+}
+
+header.cluster_bits = ffs(cluster_size) - 1;
+if (header.cluster_bits < MIN_CLUSTER_BITS ||
+header.cluster_bits > MAX_CLUSTER_BITS ||
+(1 << header.cluster_bits) != cluster_size) {
+error_report(
+"Cluster size must be

[Qemu-devel] [PATCH V16 0/6] add-cow file format

2012-11-23 Thread Dong Xu Wang
It will introduce a new file format: add-cow.

The add-cow file format makes it possible to perform copy-on-write on top of
a raw disk image.  When we know that no backing file clusters remain visible
(e.g. we have streamed the entire image and copied all data from the backing
file), then it is possible to discard the add-cow file and use the raw image
file directly.

This feature adds the copy-on-write feature to raw files (which cannot support
it natively) while allowing us to get full performance again later when we no
longer need copy-on-write.

add-cow can benefit from other available functions, such as path_has_protocol
and qed_read_string, so we will make them public.

snapshot_blkdev are not supported now for add-cow. Will add it in futher 
patches.

These patches are using QemuOpts parser, former patches could be found here:
http://patchwork.ozlabs.org/patch/196096/

v15->v16):
1) Rebased on QEMU upstream source tree.
2) Judge if opts is null in add_cow_create function.

v14->v15:
1) Fix typo and make some sentences more readable in docs.
2) Introduce STRINGIZER macro.

v13->v14:
1) Make some sentences more clear in docs.
2) Make MAGIC from 8 bytes to 4 bytes.
3) Fix some bugs.

v12->v13:
1) Use QemuOpts, not QEMUOptionParameter
2) cluster_size configuable
3) Refactor block-cache.c
4) Correct qemu-iotests script.
5) Other bug fix.

v11->v12:
1) Removed un-used feature bit.
2) Share cache code with qcow2.c.
3) Remove snapshot_blkdev support, will add it in another patch.
5) COW Bitmap field in add-cow file will be multiple of 65536.
6) fix grammer and typo.

Dong Xu Wang (6):
  docs: document for add-cow file format
  make path_has_protocol non static
  qed_read_string to bdrv_read_string
  rename qcow2-cache.c to block-cache.c
  add-cow file format core code.
  qemu-iotests: add add-cow iotests support.

 block.c  |   29 ++-
 block.h  |3 +
 block/Makefile.objs  |4 +-
 block/add-cow.c  |  690 ++
 block/add-cow.h  |   83 +
 block/block-cache.c  |  321 
 block/block-cache.h  |   77 +
 block/qcow2-cache.c  |  323 
 block/qcow2-cluster.c|   53 ++--
 block/qcow2-refcount.c   |   66 +++--
 block/qcow2.c|   21 +-
 block/qcow2.h|   24 +--
 block/qed.c  |   34 +--
 block_int.h  |2 +
 docs/specs/add-cow.txt   |  154 ++
 tests/qemu-iotests/017   |2 +-
 tests/qemu-iotests/020   |2 +-
 tests/qemu-iotests/common|6 +
 tests/qemu-iotests/common.rc |   15 +-
 trace-events |   13 +-
 20 files changed, 1473 insertions(+), 449 deletions(-)
 create mode 100644 block/add-cow.c
 create mode 100644 block/add-cow.h
 create mode 100644 block/block-cache.c
 create mode 100644 block/block-cache.h
 delete mode 100644 block/qcow2-cache.c
 create mode 100644 docs/specs/add-cow.txt




[Qemu-devel] [PATCH V16 4/6] rename qcow2-cache.c to block-cache.c

2012-11-23 Thread Dong Xu Wang
We will re-use qcow2-cache as block layer common cache code,
so change its name and made some changes, define a struct named
BlockTableType, pass BlockTableType and table size parameters to
block cache initialization function.

Signed-off-by: Dong Xu Wang 
---
 block/Makefile.objs|3 +-
 block/block-cache.c|  317 +++
 block/block-cache.h|   76 +++
 block/qcow2-cache.c|  323 
 block/qcow2-cluster.c  |   53 
 block/qcow2-refcount.c |   66 ++-
 block/qcow2.c  |   21 ++--
 block/qcow2.h  |   24 +---
 trace-events   |   13 +-
 9 files changed, 481 insertions(+), 415 deletions(-)
 create mode 100644 block/block-cache.c
 create mode 100644 block/block-cache.h
 delete mode 100644 block/qcow2-cache.c

diff --git a/block/Makefile.objs b/block/Makefile.objs
index 7f01510..d23c250 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -1,5 +1,6 @@
 block-obj-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o 
vvfat.o
-block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o 
qcow2-cache.o
+block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o
+block-obj-y += block-cache.o
 block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-y += qed-check.o
 block-obj-y += parallels.o blkdebug.o blkverify.o
diff --git a/block/block-cache.c b/block/block-cache.c
new file mode 100644
index 000..bf5c57c
--- /dev/null
+++ b/block/block-cache.c
@@ -0,0 +1,317 @@
+/*
+ * QEMU Block Layer Cache
+ *
+ * Copyright IBM, Corp. 2012
+ *
+ * Authors:
+ *  Dong Xu Wang 
+ *
+ * This file is based on qcow2-cache.c, see its copyrights below:
+ *
+ * L2/refcount table cache for the QCOW2 format
+ *
+ * Copyright (c) 2010 Kevin Wolf 
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "block_int.h"
+#include "qemu-common.h"
+#include "trace.h"
+#include "block-cache.h"
+
+BlockCache *block_cache_create(BlockDriverState *bs, int num_tables,
+   size_t cluster_size, BlockTableType type)
+{
+BlockCache *c;
+int i;
+
+c = g_malloc0(sizeof(*c));
+c->size = num_tables;
+c->entries = g_malloc0(sizeof(*c->entries) * num_tables);
+c->table_type = type;
+c->cluster_size = cluster_size;
+
+for (i = 0; i < c->size; i++) {
+c->entries[i].table = qemu_blockalign(bs, cluster_size);
+}
+
+return c;
+}
+
+int block_cache_destroy(BlockDriverState *bs, BlockCache *c)
+{
+int i;
+
+for (i = 0; i < c->size; i++) {
+assert(c->entries[i].ref == 0);
+qemu_vfree(c->entries[i].table);
+}
+
+g_free(c->entries);
+g_free(c);
+
+return 0;
+}
+
+static int block_cache_flush_dependency(BlockDriverState *bs, BlockCache *c)
+{
+int ret;
+
+ret = block_cache_flush(bs, c->depends);
+if (ret < 0) {
+return ret;
+}
+
+c->depends = NULL;
+c->depends_on_flush = false;
+
+return 0;
+}
+
+static int block_cache_entry_flush(BlockDriverState *bs, BlockCache *c, int i)
+{
+int ret = 0;
+
+if (!c->entries[i].dirty || !c->entries[i].offset) {
+return 0;
+}
+
+trace_block_cache_entry_flush(qemu_coroutine_self(), c->table_type, i);
+
+if (c->depends) {
+ret = block_cache_flush_dependency(bs, c);
+} else if (c->depends_on_flush) {
+ret = bdrv_flush(bs->file);
+if (ret >= 0) {
+c->depends_on_flush = false;
+}
+}
+
+if (ret < 0) {
+return ret;
+}
+
+if (c->table_type == BLOCK_TABLE_REF) {
+BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
+} else if (c->table_type == BLOCK_TABLE_L2) {
+BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
+}
+
+ret = bdrv_pwrite(bs->file, c->entries[i].offset,
+  c->entries[i].table, c->cluster_siz

[Qemu-devel] [PATCH V6 10/10] remove QEMUOptionParameter related functions and struct

2012-11-23 Thread Dong Xu Wang
Signed-off-by: Dong Xu Wang 
---
 qemu-option.c |  285 -
 qemu-option.h |   32 ---
 2 files changed, 0 insertions(+), 317 deletions(-)

diff --git a/qemu-option.c b/qemu-option.c
index 63f3474..cda8794 100644
--- a/qemu-option.c
+++ b/qemu-option.c
@@ -153,22 +153,6 @@ int check_params(char *buf, int buf_size,
 return 0;
 }
 
-/*
- * Searches an option list for an option with the given name
- */
-QEMUOptionParameter *get_option_parameter(QEMUOptionParameter *list,
-const char *name)
-{
-while (list && list->name) {
-if (!strcmp(list->name, name)) {
-return list;
-}
-list++;
-}
-
-return NULL;
-}
-
 static void parse_option_bool(const char *name, const char *value, bool *ret,
   Error **errp)
 {
@@ -240,275 +224,6 @@ static void parse_option_size(const char *name, const 
char *value,
 }
 }
 
-/*
- * Sets the value of a parameter in a given option list. The parsing of the
- * value depends on the type of option:
- *
- * OPT_FLAG (uses value.n):
- *  If no value is given, the flag is set to 1.
- *  Otherwise the value must be "on" (set to 1) or "off" (set to 0)
- *
- * OPT_STRING (uses value.s):
- *  value is strdup()ed and assigned as option value
- *
- * OPT_SIZE (uses value.n):
- *  The value is converted to an integer. Suffixes for kilobytes etc. are
- *  allowed (powers of 1024).
- *
- * Returns 0 on succes, -1 in error cases
- */
-int set_option_parameter(QEMUOptionParameter *list, const char *name,
-const char *value)
-{
-bool flag;
-Error *local_err = NULL;
-
-// Find a matching parameter
-list = get_option_parameter(list, name);
-if (list == NULL) {
-fprintf(stderr, "Unknown option '%s'\n", name);
-return -1;
-}
-
-// Process parameter
-switch (list->type) {
-case OPT_FLAG:
-parse_option_bool(name, value, &flag, &local_err);
-if (!error_is_set(&local_err)) {
-list->value.n = flag;
-}
-break;
-
-case OPT_STRING:
-if (value != NULL) {
-list->value.s = g_strdup(value);
-} else {
-fprintf(stderr, "Option '%s' needs a parameter\n", name);
-return -1;
-}
-break;
-
-case OPT_SIZE:
-parse_option_size(name, value, &list->value.n, &local_err);
-break;
-
-default:
-fprintf(stderr, "Bug: Option '%s' has an unknown type\n", name);
-return -1;
-}
-
-if (error_is_set(&local_err)) {
-qerror_report_err(local_err);
-error_free(local_err);
-return -1;
-}
-
-return 0;
-}
-
-/*
- * Sets the given parameter to an integer instead of a string.
- * This function cannot be used to set string options.
- *
- * Returns 0 on success, -1 in error cases
- */
-int set_option_parameter_int(QEMUOptionParameter *list, const char *name,
-uint64_t value)
-{
-// Find a matching parameter
-list = get_option_parameter(list, name);
-if (list == NULL) {
-fprintf(stderr, "Unknown option '%s'\n", name);
-return -1;
-}
-
-// Process parameter
-switch (list->type) {
-case OPT_FLAG:
-case OPT_NUMBER:
-case OPT_SIZE:
-list->value.n = value;
-break;
-
-default:
-return -1;
-}
-
-return 0;
-}
-
-/*
- * Frees a option list. If it contains strings, the strings are freed as well.
- */
-void free_option_parameters(QEMUOptionParameter *list)
-{
-QEMUOptionParameter *cur = list;
-
-while (cur && cur->name) {
-if (cur->type == OPT_STRING) {
-g_free(cur->value.s);
-}
-cur++;
-}
-
-g_free(list);
-}
-
-/*
- * Count valid options in list
- */
-static size_t count_option_parameters(QEMUOptionParameter *list)
-{
-size_t num_options = 0;
-
-while (list && list->name) {
-num_options++;
-list++;
-}
-
-return num_options;
-}
-
-/*
- * Append an option list (list) to an option list (dest).
- *
- * If dest is NULL, a new copy of list is created.
- *
- * Returns a pointer to the first element of dest (or the newly allocated copy)
- */
-QEMUOptionParameter *append_option_parameters(QEMUOptionParameter *dest,
-QEMUOptionParameter *list)
-{
-size_t num_options, num_dest_options;
-
-num_options = count_option_parameters(dest);
-num_dest_options = num_options;
-
-num_options += count_option_parameters(list);
-
-dest = g_realloc(dest, (num_options + 1) * sizeof(QEMUOptionParameter));
-dest[num_dest_options].name = NULL;
-
-while (list && list->name) {
-if (get_option_parameter(dest, list->name) == NULL) {
-dest[num_dest_options++] = *list;
-dest[num_dest_options].name = NULL;
-}
-list++;
-}
-
-return dest;
-}
-
-/*
- * Parses a parameter string (param) into an option list (dest).
- *
- * list is the templ

[Qemu-devel] [PATCH V6 07/10] add def_print_str and use it in qemu_opts_print.

2012-11-23 Thread Dong Xu Wang
qemu_opts_print has no user now, so can re-write the function safely.

qemu_opts_print will be used while using "qemu-img create", it will
produce the same output as previous code.

The behavior of this function has changed:

1. Print every possible option, whether a value has been set or not.
2. Option descriptors may provide a default value.
3. Print to stdout instead of stderr.

Previously the behavior was to print every option that has been set.
Options that have not been set would be skipped.

Signed-off-by: Dong Xu Wang 
---
 qemu-option.c |   27 ---
 qemu-option.h |1 +
 2 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/qemu-option.c b/qemu-option.c
index 94557cf..e0628da 100644
--- a/qemu-option.c
+++ b/qemu-option.c
@@ -862,15 +862,28 @@ void qemu_opts_del(QemuOpts *opts)
 
 int qemu_opts_print(QemuOpts *opts, void *dummy)
 {
-QemuOpt *opt;
+QemuOptDesc *desc = opts->list->desc;
 
-fprintf(stderr, "%s: %s:", opts->list->name,
-opts->id ? opts->id : "");
-QTAILQ_FOREACH(opt, &opts->head, next) {
-fprintf(stderr, " %s=\"%s\"", opt->name, opt->str);
+for (desc = opts->list->desc; desc && desc->name; desc++) {
+const char *value = desc->def_print_str;
+QemuOpt *opt;
+
+opt = qemu_opt_find(opts, desc->name);
+if (opt) {
+value = opt->str;
+}
+
+if (!value) {
+continue;
+}
+
+if (desc->type == QEMU_OPT_STRING) {
+printf("%s='%s' ", desc->name, value);
+} else {
+printf("%s=%s ", desc->name, value);
+}
 }
-fprintf(stderr, "\n");
-return 0;
+ return 0;
 }
 
 static int opts_do_parse(QemuOpts *opts, const char *params,
diff --git a/qemu-option.h b/qemu-option.h
index 002dd07..ab02023 100644
--- a/qemu-option.h
+++ b/qemu-option.h
@@ -96,6 +96,7 @@ typedef struct QemuOptDesc {
 const char *name;
 enum QemuOptType type;
 const char *help;
+const char *def_print_str;
 } QemuOptDesc;
 
 struct QemuOptsList {
-- 
1.7.1




Re: [Qemu-devel] [PATCH 1/5] RFC: Efficient VM backup for qemu (v1)

2012-11-23 Thread Dietmar Maurer
> > Yup, it's already not too bad. I haven't looked into it in much
> > detail, but I'd like to reduce it even a bit more. In particular, the
> > backup_info field in the BlockDriverState feels wrong to me. In the
> > long term the generic block layer shouldn't know at all what a backup
> > is, and baking it into BDS couples it very tightly.
> 
> My plan was to have something like bs->job->job_type->{before,after}_write.
> 
>int coroutine_fn (*before_write)(BlockDriverState *bs,
> int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
> void **cookie);
>int coroutine_fn (*after_write)(BlockDriverState *bs,
> int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
> void *cookie);

I don't think that job is the right place. Instead I would put a list 
of filters into BDS:

typedef struct BlockFilter {
void *opaque;
int cluster_size;
int coroutine_fn (before_read)(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
BdrvRequestFlags flags, void **cookie);
int coroutine_fn (after_read)(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
BdrvRequestFlags flags, void *cookie);
int coroutine_fn (*before_write)(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
void **cookie);
int coroutine_fn (*after_write)(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
void *cookie);
} BlockFilter;

struct BlockDriverState {
   ...
QLIST_HEAD(, BlockFilters) filters;
};

Would that work for you?




[Qemu-devel] [PATCH V6 04/10] introduce qemu_opts_create_nofail function

2012-11-23 Thread Dong Xu Wang
While id is NULL, qemu_opts_create can not fail, so ignore
errors is fine.

Signed-off-by: Dong Xu Wang 
---
 qemu-option.c |9 +
 qemu-option.h |1 +
 2 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/qemu-option.c b/qemu-option.c
index e0131ce..1303188 100644
--- a/qemu-option.c
+++ b/qemu-option.c
@@ -780,6 +780,15 @@ QemuOpts *qemu_opts_create(QemuOptsList *list, const char 
*id,
 return opts;
 }
 
+QemuOpts *qemu_opts_create_nofail(QemuOptsList *list)
+{
+QemuOpts *opts;
+Error *errp = NULL;
+opts = qemu_opts_create(list, NULL, 0, &errp);
+assert_no_error(errp);
+return opts;
+}
+
 void qemu_opts_reset(QemuOptsList *list)
 {
 QemuOpts *opts, *next_opts;
diff --git a/qemu-option.h b/qemu-option.h
index ca72986..b0f8d1e 100644
--- a/qemu-option.h
+++ b/qemu-option.h
@@ -133,6 +133,7 @@ int qemu_opt_foreach(QemuOpts *opts, qemu_opt_loopfunc 
func, void *opaque,
 QemuOpts *qemu_opts_find(QemuOptsList *list, const char *id);
 QemuOpts *qemu_opts_create(QemuOptsList *list, const char *id,
int fail_if_exists, Error **errp);
+QemuOpts *qemu_opts_create_nofail(QemuOptsList *list);
 void qemu_opts_reset(QemuOptsList *list);
 void qemu_opts_loc_restore(QemuOpts *opts);
 int qemu_opts_set(QemuOptsList *list, const char *id,
-- 
1.7.1




[Qemu-devel] [PATCH V6 06/10] create new function: qemu_opt_set_number

2012-11-23 Thread Dong Xu Wang
Signed-off-by: Dong Xu Wang 
---
 qemu-option.c |   22 ++
 qemu-option.h |1 +
 2 files changed, 23 insertions(+), 0 deletions(-)

diff --git a/qemu-option.c b/qemu-option.c
index 1303188..94557cf 100644
--- a/qemu-option.c
+++ b/qemu-option.c
@@ -695,6 +695,28 @@ int qemu_opt_set_bool(QemuOpts *opts, const char *name, 
bool val)
 return 0;
 }
 
+int qemu_opt_set_number(QemuOpts *opts, const char *name, int64_t val)
+{
+QemuOpt *opt;
+const QemuOptDesc *desc = opts->list->desc;
+
+opt = g_malloc0(sizeof(*opt));
+opt->desc = find_desc_by_name(desc, name);
+if (!opt->desc && !opts_accepts_any(opts)) {
+qerror_report(QERR_INVALID_PARAMETER, name);
+g_free(opt);
+return -1;
+}
+
+opt->name = g_strdup(name);
+opt->opts = opts;
+opt->value.uint = val;
+opt->str = g_strdup_printf("%" PRId64, val);
+QTAILQ_INSERT_TAIL(&opts->head, opt, next);
+
+return 0;
+}
+
 int qemu_opt_foreach(QemuOpts *opts, qemu_opt_loopfunc func, void *opaque,
  int abort_on_failure)
 {
diff --git a/qemu-option.h b/qemu-option.h
index b0f8d1e..002dd07 100644
--- a/qemu-option.h
+++ b/qemu-option.h
@@ -126,6 +126,7 @@ int qemu_opt_set(QemuOpts *opts, const char *name, const 
char *value);
 void qemu_opt_set_err(QemuOpts *opts, const char *name, const char *value,
   Error **errp);
 int qemu_opt_set_bool(QemuOpts *opts, const char *name, bool val);
+int qemu_opt_set_number(QemuOpts *opts, const char *name, int64_t val);
 typedef int (*qemu_opt_loopfunc)(const char *name, const char *value, void 
*opaque);
 int qemu_opt_foreach(QemuOpts *opts, qemu_opt_loopfunc func, void *opaque,
  int abort_on_failure);
-- 
1.7.1




[Qemu-devel] [PATCH 1.3 0/5] QOM/qdev lifetime fixes

2012-11-23 Thread Paolo Bonzini
These patches fix problems in the handling of freeing QOM/qdev
objects.  Together, they fix hot-unplug of USB mass storage devices,
which crashed with an assertion failure.

I'm not 100% sure, but I think we were always leaking the scsi-disk in
pre-QOM days.  Now we're freeing it properly, and the assertion proves it.

However, I don't like particularly the assertion in object_delete.  Once
we're sure we've fixed all bugs, we should remove it, because it prevents
a fully correct tracking of references.

In this case, for example, there is still one reference to the scsi-disk
in the MSDState's scsi_dev member.  We don't have neither an object_ref
nor an object_unref for it, so it happens to work.  If we had an
object_ref, the matching object_unref would be in dc->exit.  But then
we'd trip on the assertion failure again, because the SCSI bus is removed
(thus calling qdev_free on the scsi-dev) before dc->exit is called.

I have more patches to actually make the reference count of devices
and buses fully correct, but they are even more scary than these, so
they should wait for 1.4.

Paolo Bonzini (5):
  qom: fix refcount of non-heap-allocated objects
  qdev: move bus removal to object_unparent
  qom: make object_delete usable for statically-allocated objects
  qdev: simplify (de)allocation of buses
  qom: make object_finalize static

 hw/qdev-core.h|  5 -
 hw/qdev.c | 26 ++
 hw/pci.c  |  2 +-
 hw/sysbus.c   |  2 +-
 include/qemu/object.h | 29 -
 qom/object.c  | 12 +---
 6 files changed, 45 insertions(+), 31 deletions(-)

-- 
1.8.0




[Qemu-devel] [PATCH 1.3 4/5] qdev: simplify (de)allocation of buses

2012-11-23 Thread Paolo Bonzini
All conditional deallocation can now be done with object_delete.
Remove the @qom_allocated and @glib_allocated fields; replace the latter
with a direct assignment of the @free function pointer.

Signed-off-by: Paolo Bonzini 
---
 hw/pci.c   |  2 +-
 hw/qdev-core.h |  5 -
 hw/qdev.c  | 10 +-
 hw/sysbus.c|  2 +-
 4 files changed, 3 insertions(+), 16 deletions(-)

diff --git a/hw/pci.c b/hw/pci.c
index 9841e39..97a0cd7 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -301,9 +301,9 @@ PCIBus *pci_bus_new(DeviceState *parent, const char *name,
 PCIBus *bus;
 
 bus = g_malloc0(sizeof(*bus));
-bus->qbus.glib_allocated = true;
 pci_bus_new_inplace(bus, parent, name, address_space_mem,
 address_space_io, devfn_min);
+OBJECT(bus)->free = g_free;
 return bus;
 }
 
diff --git a/hw/qdev-core.h b/hw/qdev-core.h
index fce9e22..fff7f0f 100644
--- a/hw/qdev-core.h
+++ b/hw/qdev-core.h
@@ -106,17 +106,12 @@ typedef struct BusChild {
 
 /**
  * BusState:
- * @qom_allocated: Indicates whether the object was allocated by QOM.
- * @glib_allocated: Indicates whether the object was initialized in-place
- * yet is expected to be freed with g_free().
  */
 struct BusState {
 Object obj;
 DeviceState *parent;
 const char *name;
 int allow_hotplug;
-bool qom_allocated;
-bool glib_allocated;
 int max_index;
 QTAILQ_HEAD(ChildrenHead, BusChild) children;
 QLIST_ENTRY(BusState) sibling;
diff --git a/hw/qdev.c b/hw/qdev.c
index f43717b..788b4da 100644
--- a/hw/qdev.c
+++ b/hw/qdev.c
@@ -454,7 +454,6 @@ BusState *qbus_create(const char *typename, DeviceState 
*parent, const char *nam
 BusState *bus;
 
 bus = BUS(object_new(typename));
-bus->qom_allocated = true;
 
 bus->parent = parent;
 bus->name = name ? g_strdup(name) : NULL;
@@ -465,14 +464,7 @@ BusState *qbus_create(const char *typename, DeviceState 
*parent, const char *nam
 
 void qbus_free(BusState *bus)
 {
-if (bus->qom_allocated) {
-object_delete(OBJECT(bus));
-} else {
-object_finalize(OBJECT(bus));
-if (bus->glib_allocated) {
-g_free(bus);
-}
-}
+object_delete(OBJECT(bus));
 }
 
 static char *bus_get_fw_dev_path(BusState *bus, DeviceState *dev)
diff --git a/hw/sysbus.c b/hw/sysbus.c
index 4969f06..ef8ffb6 100644
--- a/hw/sysbus.c
+++ b/hw/sysbus.c
@@ -274,7 +274,7 @@ static void main_system_bus_create(void)
 main_system_bus = g_malloc0(system_bus_info.instance_size);
 qbus_create_inplace(main_system_bus, TYPE_SYSTEM_BUS, NULL,
 "main-system-bus");
-main_system_bus->glib_allocated = true;
+OBJECT(main_system_bus)->free = g_free;
 object_property_add_child(container_get(qdev_get_machine(),
 "/unattached"),
   "sysbus", OBJECT(main_system_bus), NULL);
-- 
1.8.0





[Qemu-devel] [PATCH 1.3 2/5] qdev: move bus removal to object_unparent

2012-11-23 Thread Paolo Bonzini
Add an ObjectClass method that is done at object_unparent time.  It
should remove any backlinks to the object in the composition tree,
so that object_delete will be able to drop the last reference and
free the object.

Use it for qdev buses.

Signed-off-by: Paolo Bonzini 
---
Ping Fan, with respect to your patch at
http://permalink.gmane.org/gmane.comp.emulators.qemu/179687,
this one removes the need for qdev_unset_parent.  You
can simply call object_unparent.

 hw/qdev.c | 16 +---
 include/qemu/object.h | 11 +++
 qom/object.c  |  3 +++
 3 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/hw/qdev.c b/hw/qdev.c
index 7ddcd24..f43717b 100644
--- a/hw/qdev.c
+++ b/hw/qdev.c
@@ -705,9 +705,6 @@ static void device_finalize(Object *obj)
 qemu_opts_del(dev->opts);
 }
 }
-if (dev->parent_bus) {
-bus_remove_child(dev->parent_bus, dev);
-}
 }
 
 static void device_class_base_init(ObjectClass *class, void *data)
@@ -720,6 +717,18 @@ static void device_class_base_init(ObjectClass *class, 
void *data)
 klass->props = NULL;
 }
 
+static void qdev_remove_from_bus(Object *obj)
+{
+DeviceState *dev = DEVICE(obj);
+
+bus_remove_child(dev->parent_bus, dev);
+}
+
+static void device_class_init(ObjectClass *class, void *data)
+{
+class->unparent = qdev_remove_from_bus;
+}
+
 void device_reset(DeviceState *dev)
 {
 DeviceClass *klass = DEVICE_GET_CLASS(dev);
@@ -747,6 +756,7 @@ static TypeInfo device_type_info = {
 .instance_init = device_initfn,
 .instance_finalize = device_finalize,
 .class_base_init = device_class_base_init,
+.class_init = device_class_init,
 .abstract = true,
 .class_size = sizeof(DeviceClass),
 };
diff --git a/include/qemu/object.h b/include/qemu/object.h
index be707f1..232463b 100644
--- a/include/qemu/object.h
+++ b/include/qemu/object.h
@@ -230,6 +230,15 @@ typedef struct ObjectProperty
 } ObjectProperty;
 
 /**
+ * ObjectUnparent:
+ * @obj: the object that is being removed from the composition tree
+ *
+ * Called when an object is being removed from the QOM composition tree.
+ * The function should remove any backlinks from children objects to @obj.
+ */
+typedef void (ObjectUnparent)(Object *obj);
+
+/**
  * ObjectClass:
  *
  * The base for all classes.  The only thing that #ObjectClass contains is an
@@ -240,6 +249,8 @@ struct ObjectClass
 /*< private >*/
 Type type;
 GSList *interfaces;
+
+ObjectUnparent *unparent;
 };
 
 /**
diff --git a/qom/object.c b/qom/object.c
index 6a8c02a..f4747d0 100644
--- a/qom/object.c
+++ b/qom/object.c
@@ -363,6 +363,9 @@ void object_unparent(Object *obj)
 if (obj->parent) {
 object_property_del_child(obj->parent, obj, NULL);
 }
+if (obj->class->unparent) {
+(obj->class->unparent)(obj);
+}
 }
 
 static void object_deinit(Object *obj, TypeImpl *type)
-- 
1.8.0





[Qemu-devel] [PATCH 1.3 1/5] qom: fix refcount of non-heap-allocated objects

2012-11-23 Thread Paolo Bonzini
The reference count for embedded objects is always one too low, because
object_initialize_with_type returns with zero references to the object.
This causes premature finalization of the object (or an assertion failure)
after calling object_ref to add an extra reference and object_unref to
remove it.

The fix is to move the initial object_ref call from object_new_with_type
to object_initialize_with_type.

Signed-off-by: Paolo Bonzini 
---
 qom/object.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/qom/object.c b/qom/object.c
index d7092b0..6a8c02a 100644
--- a/qom/object.c
+++ b/qom/object.c
@@ -307,6 +307,7 @@ void object_initialize_with_type(void *data, TypeImpl *type)
 
 memset(obj, 0, type->instance_size);
 obj->class = type->class;
+object_ref(obj);
 QTAILQ_INIT(&obj->properties);
 object_init_with_type(obj, type);
 }
@@ -395,7 +396,6 @@ Object *object_new_with_type(Type type)
 
 obj = g_malloc(type->instance_size);
 object_initialize_with_type(obj, type);
-object_ref(obj);
 
 return obj;
 }
-- 
1.8.0





[Qemu-devel] [PATCH 1.3 5/5] qom: make object_finalize static

2012-11-23 Thread Paolo Bonzini
It is not used anymore, and there is no need to make it public.

Signed-off-by: Paolo Bonzini 
---
 include/qemu/object.h | 9 -
 qom/object.c  | 2 +-
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/include/qemu/object.h b/include/qemu/object.h
index 5ddcb4a..ed1f47f 100644
--- a/include/qemu/object.h
+++ b/include/qemu/object.h
@@ -505,15 +505,6 @@ void object_initialize_with_type(void *data, Type type);
 void object_initialize(void *obj, const char *typename);
 
 /**
- * object_finalize:
- * @obj: The object to finalize.
- *
- * This function destroys and object without freeing the memory associated with
- * it.
- */
-void object_finalize(void *obj);
-
-/**
  * object_dynamic_cast:
  * @obj: The object to cast.
  * @typename: The @typename to cast to.
diff --git a/qom/object.c b/qom/object.c
index f3e9517..70354fc 100644
--- a/qom/object.c
+++ b/qom/object.c
@@ -379,7 +379,7 @@ static void object_deinit(Object *obj, TypeImpl *type)
 }
 }
 
-void object_finalize(void *data)
+static void object_finalize(void *data)
 {
 Object *obj = data;
 TypeImpl *ti = obj->class->type;
-- 
1.8.0




[Qemu-devel] [PATCH] qemu-options: Fix space at EOL

2012-11-23 Thread Michal Privoznik
There's no need to add a space at the end of line.
Moreover, it can make problems in some projects that
store the help output into a file (and run couple of
tests based on that) and have space at EOL forbidden.

Signed-off-by: Michal Privoznik 
---
 qemu-options.hx |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/qemu-options.hx b/qemu-options.hx
index 9bb29d3..8f6d0e3 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1331,7 +1331,7 @@ DEF("net", HAS_ARG, QEMU_OPTION_net,
 "connect the host TAP network interface to VLAN 'n'\n"
 #else
 "-net 
tap[,vlan=n][,name=str][,fd=h][,ifname=name][,script=file][,downscript=dfile][,helper=helper][,sndbuf=nbytes][,vnet_hdr=on|off][,vhost=on|off][,vhostfd=h][,vhostforce=on|off]\n"
-"connect the host TAP network interface to VLAN 'n' \n"
+"connect the host TAP network interface to VLAN 'n'\n"
 "use network scripts 'file' (default=" 
DEFAULT_NETWORK_SCRIPT ")\n"
 "to configure it and 'dfile' (default=" 
DEFAULT_NETWORK_DOWN_SCRIPT ")\n"
 "to deconfigure it\n"
-- 
1.7.8.6




[Qemu-devel] [PATCH 1.3 3/5] qom: make object_delete usable for statically-allocated objects

2012-11-23 Thread Paolo Bonzini
Store in the object the freeing function that will be used at deletion
time.  This makes it possible to use object_delete on statically-allocated
(embedded) objects.  Dually, it makes it possible to use object_unparent
and object_unref without leaking memory, when the lifetime of object
might extend until after the call to object_delete.

Signed-off-by: Paolo Bonzini 
---
Ping Fan, this is the patch I mentioned at
http://permalink.gmane.org/gmane.comp.emulators.qemu/180054.
With this patch, your call of object_unref for qdev_unplug_complete
will not leak the device anymore (object_finalize will free it
when the last reference is dropped with object_unref).

 include/qemu/object.h | 9 +
 qom/object.c  | 5 -
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/include/qemu/object.h b/include/qemu/object.h
index 232463b..5ddcb4a 100644
--- a/include/qemu/object.h
+++ b/include/qemu/object.h
@@ -239,6 +239,14 @@ typedef struct ObjectProperty
 typedef void (ObjectUnparent)(Object *obj);
 
 /**
+ * ObjectFree:
+ * @obj: the object being freed
+ *
+ * Called when an object's last reference is removed.
+ */
+typedef void (ObjectFree)(void *obj);
+
+/**
  * ObjectClass:
  *
  * The base for all classes.  The only thing that #ObjectClass contains is an
@@ -272,6 +280,7 @@ struct Object
 {
 /*< private >*/
 ObjectClass *class;
+ObjectFree *free;
 QTAILQ_HEAD(, ObjectProperty) properties;
 uint32_t ref;
 Object *parent;
diff --git a/qom/object.c b/qom/object.c
index f4747d0..f3e9517 100644
--- a/qom/object.c
+++ b/qom/object.c
@@ -388,6 +388,9 @@ void object_finalize(void *data)
 object_property_del_all(obj);
 
 g_assert(obj->ref == 0);
+if (obj->free) {
+obj->free(obj);
+}
 }
 
 Object *object_new_with_type(Type type)
@@ -399,6 +402,7 @@ Object *object_new_with_type(Type type)
 
 obj = g_malloc(type->instance_size);
 object_initialize_with_type(obj, type);
+obj->free = g_free;
 
 return obj;
 }
@@ -415,7 +419,6 @@ void object_delete(Object *obj)
 object_unparent(obj);
 g_assert(obj->ref == 1);
 object_unref(obj);
-g_free(obj);
 }
 
 Object *object_dynamic_cast(Object *obj, const char *typename)
-- 
1.8.0





Re: [Qemu-devel] [PATCH 2/5] add basic backup support to block driver

2012-11-23 Thread Dietmar Maurer
> Is there a 1:1 relationship between BackupInfo and BackupBlockJob?  Then it
> would be nicer to move BlockupInfo fields into BackupBlockJob (which is
> empty right now).  Then you also don't need to add fields to BlockDriverState
> because you know that if your blockjob is running you can access it via bs-
> >job, if necessary.  You can then drop BackupInfo and any memory
> management code for it.

Oh, finally got your point.

There is an ongoing discussion about generic block filters, So I guess this 
totally 
depends on how we implement them.


- Dietmar




Re: [Qemu-devel] [PATCH] qemu-options: Fix space at EOL

2012-11-23 Thread Peter Maydell
On 23 November 2012 08:52, Michal Privoznik  wrote:
> There's no need to add a space at the end of line.
> Moreover, it can make problems in some projects that
> store the help output into a file (and run couple of
> tests based on that) and have space at EOL forbidden.

I have no problem with this patch but I would suggest that
these other projects have a broken workflow if they're
reliant on QEMU's help output to not have trailing spaces
like that :-)

> Signed-off-by: Michal Privoznik 

Reviewed-by: Peter Maydell 

(and cc'd qemu-trivial).

-- PMM

> ---
>  qemu-options.hx |2 +-
>  1 files changed, 1 insertions(+), 1 deletions(-)
>
> diff --git a/qemu-options.hx b/qemu-options.hx
> index 9bb29d3..8f6d0e3 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -1331,7 +1331,7 @@ DEF("net", HAS_ARG, QEMU_OPTION_net,
>  "connect the host TAP network interface to VLAN 'n'\n"
>  #else
>  "-net 
> tap[,vlan=n][,name=str][,fd=h][,ifname=name][,script=file][,downscript=dfile][,helper=helper][,sndbuf=nbytes][,vnet_hdr=on|off][,vhost=on|off][,vhostfd=h][,vhostforce=on|off]\n"
> -"connect the host TAP network interface to VLAN 'n' \n"
> +"connect the host TAP network interface to VLAN 'n'\n"
>  "use network scripts 'file' (default=" 
> DEFAULT_NETWORK_SCRIPT ")\n"
>  "to configure it and 'dfile' (default=" 
> DEFAULT_NETWORK_DOWN_SCRIPT ")\n"
>  "to deconfigure it\n"



Re: [Qemu-devel] [PATCH 1/5] RFC: Efficient VM backup for qemu (v1)

2012-11-23 Thread Dietmar Maurer
> > My plan was to have something like bs->job->job_type-
> >{before,after}_write.
> >
> >int coroutine_fn (*before_write)(BlockDriverState *bs,
> > int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
> > void **cookie);
> >int coroutine_fn (*after_write)(BlockDriverState *bs,
> > int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
> > void *cookie);
> 
> I don't think that job is the right place. Instead I would put a list of 
> filters into
> BDS:

Well, I can also add it to job_type. Just tell me what you prefer, and I will 
write the patch.




Re: [Qemu-devel] [PATCH 1/5] RFC: Efficient VM backup for qemu (v1)

2012-11-23 Thread Dietmar Maurer
> > > My plan was to have something like bs->job->job_type-
> > >{before,after}_write.
> > >
> > >int coroutine_fn (*before_write)(BlockDriverState *bs,
> > > int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
> > > void **cookie);
> > >int coroutine_fn (*after_write)(BlockDriverState *bs,
> > > int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
> > > void *cookie);
> >
> > I don't think that job is the right place. Instead I would put a list
> > of filters into
> > BDS:
> 
> Well, I can also add it to job_type. Just tell me what you prefer, and I will
> write the patch.

BTW, will such filters work with the new virtio-blk-data-plane?




Re: [Qemu-devel] [PATCH 1/5] RFC: Efficient VM backup for qemu (v1)

2012-11-23 Thread Kevin Wolf
Am 23.11.2012 08:38, schrieb Dietmar Maurer:
>> In short, the idea is that you can stick filters on top of a 
>> BlockDriverState, so
>> that any read/writes (and possibly more requests, if necessary) are routed
>> through the filter before they are passed to the block driver of this BDS.
>> Filters would be implemented as BlockDrivers, i.e. you could implement
>> .bdrv_co_write() in a filter to intercept all writes to an image.
> 
> I am quite unsure if that make things easier.

At least it would make for a much cleaner design compared to putting
code for every feature you can think of into bdrv_co_do_readv/writev().

Kevin



[Qemu-devel] [PATCH V16 1/6] docs: document for add-cow file format

2012-11-23 Thread Dong Xu Wang
Document for add-cow format, the usage and spec of add-cow are introduced.

Signed-off-by: Dong Xu Wang 
---
 docs/specs/add-cow.txt |  154 
 1 files changed, 154 insertions(+), 0 deletions(-)
 create mode 100644 docs/specs/add-cow.txt

diff --git a/docs/specs/add-cow.txt b/docs/specs/add-cow.txt
new file mode 100644
index 000..24e9a11
--- /dev/null
+++ b/docs/specs/add-cow.txt
@@ -0,0 +1,154 @@
+== General ==
+
+The raw file format does not support backing files or copy on write feature.
+The add-cow image format makes it possible to use backing files with a raw
+image by keeping a separate .add-cow metadata file. Once all sectors
+have been written into the raw image it is safe to discard the .add-cow
+and backing files, then we can use the raw image directly.
+
+An example usage of add-cow would look like::
+(ubuntu.img is a disk image which has an installed OS.)
+1)  Create a raw image with the same size of ubuntu.img
+qemu-img create -f raw test.raw 8G
+2)  Create an add-cow image which will store dirty bitmap
+qemu-img create -f add-cow test.add-cow \
+-o backing_file=ubuntu.img,image_file=test.raw
+3)  Run qemu with add-cow image
+qemu -drive if=virtio,file=test.add-cow
+
+test.raw may be larger than ubuntu.img, in that case, the size of test.add-cow
+will be calculated from the size of test.raw.
+
+image_fmt can be omitted, in that case image_fmt should be set as "raw".
+backing_fmt can also be omitted, add-cow should do a probe operation and 
determine
+what the backing file's format is.
+
+=Specification=
+
+The file format looks like this:
+
+ +---+---+
+ | Header|   COW bitmap  |
+ +---+---+
+
+All numbers in add-cow are stored in Little Endian byte order.
+
+== Header ==
+
+The Header is included in the first bytes:
+(HEADER_SIZE is defined in 44-47 bytes.)
+Byte0  -  3:magic
+add-cow magic string ("ACOW").
+
+4  -  7:version
+Version number (only valid value is 1 now).
+
+8  - 11:backing file name offset
+Offset in the add-cow file at which the backing file
+name is stored (NB: The string is not NUL-terminated).
+If backing file name does NOT exist, this field will be
+0. Must be between 80 and [HEADER_SIZE - 2](a file name
+must be at least 1 byte).
+
+12 - 15:backing file name size
+Length of the backing file name in bytes. It will be 0
+if the backing file name offset is 0. If backing file
+name offset is non-zero, then it must be non-zero. Must
+be less than [HEADER_SIZE - 80] to fit in the reserved
+part of the header. Backing file name offset + size
+must be no more than HEADER_SIZE.
+
+16 - 19:image file name offset
+Offset in the add-cow file at which the image file name
+is stored (NB: The string is not NUL-terminated). It
+must be between 80 and [HEADER_SIZE - 2]. Image file
+name size + offset must be no more than HEADER_SIZE.
+
+20 - 23:image file name size
+Length of the image file name in bytes.
+Must be less than [HEADER_SIZE - 80] to fit in the 
reserved
+part of the header.
+
+24 - 27:cluster bits
+Number of bits that are used for addressing an offset
+within a cluster (1 << cluster_bits is the cluster 
size).
+Must not be less than 9 (i.e. 512 byte clusters).
+
+Note: qemu as of today has an implementation limit of 
2 MB
+as the maximum cluster size and won't be able to open 
images
+with larger cluster sizes.
+
+28 - 35:features
+Bitmask of features. If a feature bit is set but not 
recognized,
+the add-cow file should be dropped. They are not used 
in v1.
+
+Bits 0-63:  Reserved (set to 0)
+
+36 - 43:compatible features
+Bitmask of compatible features. An implementation can
+safely ignore any unknown bits that are set.
+Bit 0:  All allocated bit.  If this bit is set then
+backing file and COW bitmap will not be 
used,
+and can read from or write to image file 
directly.
+
+   

[Qemu-devel] [PATCH V6 08/10] Create four opts list related functions

2012-11-23 Thread Dong Xu Wang
This patch will create 4 functions, count_opts_list, append_opts_list,
free_opts_list and print_opts_list, they will used in following commits.

v5->v6):
1) allocate enough space in append_opts_list function.

Signed-off-by: Dong Xu Wang 
---
 qemu-option.c |   90 +
 qemu-option.h |4 ++
 2 files changed, 94 insertions(+), 0 deletions(-)

diff --git a/qemu-option.c b/qemu-option.c
index e0628da..63f3474 100644
--- a/qemu-option.c
+++ b/qemu-option.c
@@ -1145,3 +1145,93 @@ int qemu_opts_foreach(QemuOptsList *list, 
qemu_opts_loopfunc func, void *opaque,
 loc_pop(&loc);
 return rc;
 }
+
+static size_t count_opts_list(QemuOptsList *list)
+{
+size_t i = 0;
+
+while (list && list->desc[i].name) {
+i++;
+}
+
+return i;
+}
+
+/* Create a new QemuOptsList and make its desc to the merge of first and 
second.
+ * It will allocate space for one new QemuOptsList plus enouth space for
+ * QemuOptDesc in first and second QemuOptsList. First argument's QemuOptDesc
+ * members take precedence over second's.
+ */
+QemuOptsList *append_opts_list(QemuOptsList *first,
+   QemuOptsList *second)
+{
+size_t num_first_options, num_second_options;
+QemuOptsList *dest = NULL;
+int i = 0;
+int index = 0;
+
+num_first_options = count_opts_list(first);
+num_second_options = count_opts_list(second);
+if (num_first_options + num_second_options == 0) {
+return NULL;
+}
+
+dest = g_malloc0(sizeof(QemuOptsList)
++ (num_first_options + num_second_options + 1) * sizeof(QemuOptDesc));
+
+dest->name = "append_opts_list";
+dest->implied_opt_name = NULL;
+dest->merge_lists = false;
+QTAILQ_INIT(&dest->head);
+while (first && (first->desc[i].name)) {
+if (!find_desc_by_name(dest->desc, first->desc[i].name)) {
+dest->desc[index].name = g_strdup(first->desc[i].name);
+dest->desc[index].help = g_strdup(first->desc[i].help);
+dest->desc[index].type = first->desc[i].type;
+dest->desc[index].def_print_str =
+g_strdup(first->desc[i].def_print_str);
+++index;
+   }
+i++;
+}
+i = 0;
+while (second && (second->desc[i].name)) {
+if (!find_desc_by_name(dest->desc, second->desc[i].name)) {
+dest->desc[index].name = g_strdup(first->desc[i].name);
+dest->desc[index].help = g_strdup(first->desc[i].help);
+dest->desc[index].type = second->desc[i].type;
+dest->desc[index].def_print_str =
+g_strdup(second->desc[i].def_print_str);
+++index;
+}
+i++;
+}
+dest->desc[index].name = NULL;
+return dest;
+}
+
+void free_opts_list(QemuOptsList *list)
+{
+int i = 0;
+
+while (list && list->desc[i].name) {
+g_free((char *)list->desc[i].name);
+g_free((char *)list->desc[i].help);
+g_free((char *)list->desc[i].def_print_str);
+i++;
+}
+
+g_free(list);
+}
+
+void print_opts_list(QemuOptsList *list)
+{
+int i = 0;
+printf("Supported options:\n");
+while (list && list->desc[i].name) {
+printf("%-16s %s\n", list->desc[i].name,
+list->desc[i].help ?
+list->desc[i].help : "No description available");
+i++;
+}
+}
diff --git a/qemu-option.h b/qemu-option.h
index ab02023..091a2f2 100644
--- a/qemu-option.h
+++ b/qemu-option.h
@@ -156,4 +156,8 @@ int qemu_opts_print(QemuOpts *opts, void *dummy);
 int qemu_opts_foreach(QemuOptsList *list, qemu_opts_loopfunc func, void 
*opaque,
   int abort_on_failure);
 
+QemuOptsList *append_opts_list(QemuOptsList *dest,
+   QemuOptsList *list);
+void free_opts_list(QemuOptsList *list);
+void print_opts_list(QemuOptsList *list);
 #endif
-- 
1.7.1




[Qemu-devel] [PATCH V16 3/6] qed_read_string to bdrv_read_string

2012-11-23 Thread Dong Xu Wang
Make qed_read_string function to a common interface, so move it to block.c.

Signed-off-by: Dong Xu Wang 
---
 block.c |   27 +++
 block.h |2 ++
 block/qed.c |   34 --
 3 files changed, 33 insertions(+), 30 deletions(-)

diff --git a/block.c b/block.c
index 6b57611..b3df889 100644
--- a/block.c
+++ b/block.c
@@ -216,6 +216,33 @@ int path_has_protocol(const char *path)
 return *p == ':';
 }
 
+/**
+ * Read a string of known length from the image file
+ *
+ * @bs: Image file
+ * @offset: File offset to start of string, in bytes
+ * @n:  String length in bytes
+ * @buf:Destination buffer
+ * @buflen: Destination buffer length in bytes
+ * @ret:0 on success, -errno on failure
+ *
+ * The string is NUL-terminated.
+ */
+int bdrv_read_string(BlockDriverState *bs, uint64_t offset, size_t n,
+ char *buf, size_t buflen)
+{
+int ret;
+if (n >= buflen) {
+return -EINVAL;
+}
+ret = bdrv_pread(bs, offset, buf, n);
+if (ret < 0) {
+return ret;
+}
+buf[n] = '\0';
+return 0;
+}
+
 int path_is_absolute(const char *path)
 {
 #ifdef _WIN32
diff --git a/block.h b/block.h
index fcf392b..301d6c5 100644
--- a/block.h
+++ b/block.h
@@ -170,6 +170,8 @@ int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
 const void *buf, int count);
 int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
 int nb_sectors, QEMUIOVector *qiov);
+int bdrv_read_string(BlockDriverState *bs, uint64_t offset, size_t n,
+ char *buf, size_t buflen);
 int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
 int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
diff --git a/block/qed.c b/block/qed.c
index 614f24c..7494707 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -217,33 +217,6 @@ static bool qed_is_image_size_valid(uint64_t image_size, 
uint32_t cluster_size,
 }
 
 /**
- * Read a string of known length from the image file
- *
- * @file:   Image file
- * @offset: File offset to start of string, in bytes
- * @n:  String length in bytes
- * @buf:Destination buffer
- * @buflen: Destination buffer length in bytes
- * @ret:0 on success, -errno on failure
- *
- * The string is NUL-terminated.
- */
-static int qed_read_string(BlockDriverState *file, uint64_t offset, size_t n,
-   char *buf, size_t buflen)
-{
-int ret;
-if (n >= buflen) {
-return -EINVAL;
-}
-ret = bdrv_pread(file, offset, buf, n);
-if (ret < 0) {
-return ret;
-}
-buf[n] = '\0';
-return 0;
-}
-
-/**
  * Allocate new clusters
  *
  * @s:  QED state
@@ -437,9 +410,10 @@ static int bdrv_qed_open(BlockDriverState *bs, int flags)
 return -EINVAL;
 }
 
-ret = qed_read_string(bs->file, s->header.backing_filename_offset,
-  s->header.backing_filename_size, 
bs->backing_file,
-  sizeof(bs->backing_file));
+ret = bdrv_read_string(bs->file, s->header.backing_filename_offset,
+   s->header.backing_filename_size,
+   bs->backing_file,
+   sizeof(bs->backing_file));
 if (ret < 0) {
 return ret;
 }
-- 
1.7.1




[Qemu-devel] [PATCH V16 2/6] make path_has_protocol non static

2012-11-23 Thread Dong Xu Wang
We will use path_has_protocol outside block.c, so just make it public.

Signed-off-by: Dong Xu Wang 
Reviewed-by: Michael Roth 
---
 block.c |2 +-
 block.h |1 +
 2 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/block.c b/block.c
index 7d5d628..6b57611 100644
--- a/block.c
+++ b/block.c
@@ -199,7 +199,7 @@ static void bdrv_io_limits_intercept(BlockDriverState *bs,
 }
 
 /* check if the path starts with ":" */
-static int path_has_protocol(const char *path)
+int path_has_protocol(const char *path)
 {
 const char *p;
 
diff --git a/block.h b/block.h
index 3292de7..fcf392b 100644
--- a/block.h
+++ b/block.h
@@ -333,6 +333,7 @@ char *bdrv_snapshot_dump(char *buf, int buf_size, 
QEMUSnapshotInfo *sn);
 
 char *get_human_readable_size(char *buf, int buf_size, int64_t size);
 int path_is_absolute(const char *path);
+int path_has_protocol(const char *path);
 void path_combine(char *dest, int dest_size,
   const char *base_path,
   const char *filename);
-- 
1.7.1




[Qemu-devel] [PATCH V16 6/6] qemu-iotests: add add-cow iotests support.

2012-11-23 Thread Dong Xu Wang
This patch will use qemu-iotests to test add-cow file format.

v15->v16):
1) Rebased on QEMU upstream source tree.

Signed-off-by: Dong Xu Wang 
---
 tests/qemu-iotests/017   |2 +-
 tests/qemu-iotests/020   |2 +-
 tests/qemu-iotests/common|6 ++
 tests/qemu-iotests/common.rc |   15 ++-
 4 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/tests/qemu-iotests/017 b/tests/qemu-iotests/017
index 66951eb..d31432f 100755
--- a/tests/qemu-iotests/017
+++ b/tests/qemu-iotests/017
@@ -40,7 +40,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
 . ./common.pattern
 
 # Any format supporting backing files
-_supported_fmt qcow qcow2 vmdk qed
+_supported_fmt qcow qcow2 vmdk qed add-cow
 _supported_proto generic
 _supported_os Linux
 
diff --git a/tests/qemu-iotests/020 b/tests/qemu-iotests/020
index 2fb0ff8..3dbb495 100755
--- a/tests/qemu-iotests/020
+++ b/tests/qemu-iotests/020
@@ -42,7 +42,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
 . ./common.pattern
 
 # Any format supporting backing files
-_supported_fmt qcow qcow2 vmdk qed
+_supported_fmt qcow qcow2 vmdk qed add-cow
 _supported_proto generic
 _supported_os Linux
 
diff --git a/tests/qemu-iotests/common b/tests/qemu-iotests/common
index b3aad89..1ea4a7f 100644
--- a/tests/qemu-iotests/common
+++ b/tests/qemu-iotests/common
@@ -128,6 +128,7 @@ common options
 check options
 -rawtest raw (default)
 -cowtest cow
+-add-cowtest add-cow
 -qcow   test qcow
 -qcow2  test qcow2
 -qedtest qed
@@ -164,6 +165,11 @@ testlist options
xpand=false
;;
 
+   -add-cow)
+IMGFMT=add-cow
+xpand=false
+;;
+
-qcow)
IMGFMT=qcow
xpand=false
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
index aef5f52..4461f69 100644
--- a/tests/qemu-iotests/common.rc
+++ b/tests/qemu-iotests/common.rc
@@ -107,6 +107,16 @@ _make_test_img()
 fi
 if [ \( "$IMGFMT" = "qcow2" -o "$IMGFMT" = "qed" \) -a -n "$CLUSTER_SIZE" 
]; then
 optstr=$(_optstr_add "$optstr" "cluster_size=$CLUSTER_SIZE")
+elif [ "$IMGFMT" = "add-cow" ]; then
+local IMG="$TEST_IMG"".raw"
+if [ "$1" = "-b" ]; then
+IMG="$IMG"".b"
+$QEMU_IMG create -f raw $IMG $image_size>/dev/null
+extra_img_options="-o image_file=$IMG $extra_img_options"
+else
+$QEMU_IMG create -f raw $IMG $image_size>/dev/null
+extra_img_options="-o image_file=$IMG"
+fi
 fi
 
 if [ -n "$optstr" ]; then
@@ -124,7 +134,8 @@ _make_test_img()
 -e "s# compat='[^']*'##g" \
 -e "s# compat6=\\(on\\|off\\)##g" \
 -e "s# static=\\(on\\|off\\)##g" \
--e "s# lazy_refcounts=\\(on\\|off\\)##g"
+-e "s# lazy_refcounts=\\(on\\|off\\)##g" \
+-e "s# image_file='[^']*'##g"
 
 # Start an NBD server on the image file, which is what we'll be talking to
 if [ $IMGPROTO = "nbd" ]; then
@@ -146,6 +157,8 @@ _cleanup_test_img()
 rm -f $TEST_DIR/t.$IMGFMT
 rm -f $TEST_DIR/t.$IMGFMT.orig
 rm -f $TEST_DIR/t.$IMGFMT.base
+rm -f $TEST_DIR/t.$IMGFMT.raw
+rm -f $TEST_DIR/t.$IMGFMT.raw.b
 ;;
 
 rbd)
-- 
1.7.1




Re: [Qemu-devel] [PATCH 1/5] RFC: Efficient VM backup for qemu (v1)

2012-11-23 Thread Paolo Bonzini
Il 23/11/2012 10:05, Dietmar Maurer ha scritto:
 My plan was to have something like bs->job->job_type-
 {before,after}_write.

int coroutine_fn (*before_write)(BlockDriverState *bs,
 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
 void **cookie);
int coroutine_fn (*after_write)(BlockDriverState *bs,
 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
 void *cookie);
>>>
>>> I don't think that job is the right place. Instead I would put a list
>>> of filters into
>>> BDS:
>>
>> Well, I can also add it to job_type. Just tell me what you prefer, and I will
>> write the patch.
> 
> BTW, will such filters work with the new virtio-blk-data-plane?

No, virtio-blk-data-plane is a hack and will be slowly rewritten to
support all fancy features.

Paolo




Re: [Qemu-devel] [PATCH 1/5] RFC: Efficient VM backup for qemu (v1)

2012-11-23 Thread Dietmar Maurer
> > BTW, will such filters work with the new virtio-blk-data-plane?
> 
> No, virtio-blk-data-plane is a hack and will be slowly rewritten to support 
> all
> fancy features.

Ah, good to know ;-) thanks.




Re: [Qemu-devel] [PATCH 1/5] RFC: Efficient VM backup for qemu (v1)

2012-11-23 Thread Paolo Bonzini
Il 23/11/2012 08:42, Dietmar Maurer ha scritto:
>> > My plan was to have something like bs->job->job_type->{before,after}_write.
>> > 
>> >int coroutine_fn (*before_write)(BlockDriverState *bs,
>> > int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
>> > void **cookie);
>> >int coroutine_fn (*after_write)(BlockDriverState *bs,
>> > int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
>> > void *cookie);
>> > 
>> > 
>> > The before_write could optionally return a "cookie" that is passed back to
>> > the after_write callback.
> I don't really understand why a filter is related to the job? This is 
> sometimes useful,
> but not a generic filter infrastructure (maybe someone want to use filters 
> without a job).

See the part you snipped:

Actually this was plan B, as a poor-man implementation of the filter
infrastructure.  Plan A was that the block filters would materialize
suddenly in someone's git tree.

Paolo




Re: [Qemu-devel] [PATCH 1/5] RFC: Efficient VM backup for qemu (v1)

2012-11-23 Thread Dietmar Maurer
> >> Filters would be implemented as BlockDrivers, i.e. you could
> >> implement
> >> .bdrv_co_write() in a filter to intercept all writes to an image.
> >
> > I am quite unsure if that make things easier.
> 
> At least it would make for a much cleaner design compared to putting code
> for every feature you can think of into bdrv_co_do_readv/writev().

So if you want to add a filter, you simply modify bs->drv to point to the 
filter?




Re: [Qemu-devel] [PATCH 1/5] RFC: Efficient VM backup for qemu (v1)

2012-11-23 Thread Dietmar Maurer
> Actually this was plan B, as a poor-man implementation of the filter
> infrastructure.  Plan A was that the block filters would materialize suddenly 
> in
> someone's git tree.

OK, so let us summarize the options:

a.) wait untit it materialize suddenly in someone's git tree.
b.) add BlockFilter inside BDS
c.) add filter callbacks to block bojs (job_type)
d.) use BlockDriver as filter
e.) use the current BackupInfo unless filters materialize suddenly in someone's 
git tree.

more ideas?




Re: [Qemu-devel] [PATCH 1/5] RFC: Efficient VM backup for qemu (v1)

2012-11-23 Thread Dietmar Maurer
> > >> Filters would be implemented as BlockDrivers, i.e. you could
> > >> implement
> > >> .bdrv_co_write() in a filter to intercept all writes to an image.
> > >
> > > I am quite unsure if that make things easier.
> >
> > At least it would make for a much cleaner design compared to putting
> > code for every feature you can think of into bdrv_co_do_readv/writev().
> 
> So if you want to add a filter, you simply modify bs->drv to point to the 
> filter?

Seems the BlockDriver struct does not contain any 'state' (I guess that is by 
design),
so where do you store filter related dynamic data?




Re: [Qemu-devel] tap devices not receiving packets from a bridge

2012-11-23 Thread Peter Lieven

Am 23.11.2012 um 08:02 schrieb Stefan Hajnoczi:

> On Thu, Nov 22, 2012 at 03:29:52PM +0100, Peter Lieven wrote:
>> is anyone aware of a problem with the linux network bridge that in very rare 
>> circumstances stops
>> a bridge from sending pakets to a tap device?
>> 
>> My problem occurs in conjunction with vanilla qemu-kvm-1.2.0 and Ubuntu 
>> Kernel 3.2.0-34.53
>> which is based on Linux 3.2.33.
>> 
>> I was not yet able to reproduce the issue, it happens in really rare cases. 
>> The symptom is that
>> the tap does not have any TX packets. RX is working fine. I see the packets 
>> coming in at
>> the physical interface on the host, but they are not forwarded to the tap 
>> interface.
>> The bridge itself has learnt the mac address of the vServer that is 
>> connected to the tap interface.
>> It does not help to toggle the bridge link status,  the tap interface status 
>> or the interface in the vServer.
>> It seems that problem occurs if a tap interface that has previously been 
>> used, but set to nonpersistent
>> is set persistent again and then is by chance assigned to the same vServer 
>> (=same mac address on same
>> bridge) again. Unfortunately it seems not to be reproducible.
> 
> Not sure but this patch from Michael Tsirkin may help - it solves an
> issue with persistent tap devices:
> 
> http://patchwork.ozlabs.org/patch/198598/

Hi Stefan,

thanks for the pointer. I have seen this patch, but I have neglected it because 
it was dealing
with persistent taps. But maybe the taps in the kernel are not deleted 
directly. 
Can you remember what the syptomps of the above issue have been? Sorry for
being vague, but I currently have no clue whats going on.

Can someone who has more internal knowledge of the bridging/tap code say if 
qemu can
be responsible at all if the tap device is not receiving packets from the 
bridge.

If I have the following config. Lets say packets coming in via physical 
interface eth1.123,
and a bridge called br123.I further have a virtual machine with tap0. Both 
eth1.123
and tap0 are member of br123. 

If the issue occurs the vServer has no network connectivity inbound. If I sent 
a ping
from the vServer I see it on tap0 and leaving on eth1.123. I see further the 
arp reply coming
in via eth1.123, but the reply can't be seen on tap0.

Peter

> 
> Stefan




Re: [Qemu-devel] [PATCH 1/5] RFC: Efficient VM backup for qemu (v1)

2012-11-23 Thread Kevin Wolf
Am 23.11.2012 10:05, schrieb Dietmar Maurer:
 My plan was to have something like bs->job->job_type-
 {before,after}_write.

int coroutine_fn (*before_write)(BlockDriverState *bs,
 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
 void **cookie);
int coroutine_fn (*after_write)(BlockDriverState *bs,
 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
 void *cookie);
>>>
>>> I don't think that job is the right place. Instead I would put a list
>>> of filters into
>>> BDS:
>>
>> Well, I can also add it to job_type. Just tell me what you prefer, and I will
>> write the patch.

A block filter shouldn't be tied to a job, I think. We have things like
blkdebug that are really filters and aren't coupled with a job, and on
the other hand we want to generalise "block jobs" into just "jobs", so
adding block specific things to job_type would be a step in the wrong
direction.

I also think that before_write/after_write isn't a convenient interface,
it brings back much of the callback-based AIO cruft and passing void*
isn't nice anyway. It's much nice to have a single .bdrv_co_write
callback that somewhere in the middle calls the layer below with a
simple function call.

Also read/write aren't enough, for a full filter interface you
potentially also need flush, discard and probably most other operations.

This is why I suggested using a regular BlockDriver struct for filters,
it already has all functions that are needed.

> BTW, will such filters work with the new virtio-blk-data-plane?

Not initially, but I think as soon as data plane gets support for image
formats, filters would work as well.

Kevin



[Qemu-devel] [PATCH 1/3] S390: Basic CPU model support

2012-11-23 Thread Jens Freimann
From: Viktor Mihajlovski 

This enables qemu -cpu ? to return the list of supported CPU models
on s390. Since only the host model is supported at this point in time
this is pretty straight-forward. Further, a validity check for the
requested CPU model was added.
This change is needed to allow libvirt exploiters (like OpenStack)
to specify a CPU model.

Signed-off-by: Viktor Mihajlovski 
Signed-off-by: Jens Freimann 
Reviewed-by: Christian Borntraeger 
---
 hw/s390-virtio.c   | 5 +
 target-s390x/cpu.c | 6 ++
 target-s390x/cpu.h | 3 +++
 3 files changed, 14 insertions(+)

diff --git a/hw/s390-virtio.c b/hw/s390-virtio.c
index 685cb54..7144ac1 100644
--- a/hw/s390-virtio.c
+++ b/hw/s390-virtio.c
@@ -209,6 +209,11 @@ static void s390_init(QEMUMachineInitArgs *args)
 cpu_model = "host";
 }
 
+if (strcmp(cpu_model, "host")) {
+fprintf(stderr, "S390 only supports host CPU model\n");
+exit(1);
+}
+
 ipi_states = g_malloc(sizeof(S390CPU *) * smp_cpus);
 
 for (i = 0; i < smp_cpus; i++) {
diff --git a/target-s390x/cpu.c b/target-s390x/cpu.c
index 619b202..03fdc31 100644
--- a/target-s390x/cpu.c
+++ b/target-s390x/cpu.c
@@ -25,6 +25,12 @@
 #include "qemu-timer.h"
 
 
+/* generate CPU information for cpu -? */
+void s390_cpu_list(FILE *f, fprintf_function cpu_fprintf)
+{
+(*cpu_fprintf)(f, "s390 %16s\n", "[host]");
+}
+
 /* CPUClass::reset() */
 static void s390_cpu_reset(CPUState *s)
 {
diff --git a/target-s390x/cpu.h b/target-s390x/cpu.h
index 0f9a1f7..3513976 100644
--- a/target-s390x/cpu.h
+++ b/target-s390x/cpu.h
@@ -350,6 +350,9 @@ static inline void cpu_set_tls(CPUS390XState *env, 
target_ulong newtls)
 #define cpu_gen_code cpu_s390x_gen_code
 #define cpu_signal_handler cpu_s390x_signal_handler
 
+void s390_cpu_list(FILE *f, fprintf_function cpu_fprintf);
+#define cpu_list s390_cpu_list
+
 #include "exec-all.h"
 
 #ifdef CONFIG_USER_ONLY
-- 
1.7.12.4




[Qemu-devel] [PATCH 3/3] sclp: Fix uninitialized var in handle_write_event_buf().

2012-11-23 Thread Jens Freimann
From: Cornelia Huck 

Some gcc versions rightly complain about a possibly unitialized rc,
so let's move setting it before the QTAILQ_FOREACH().

Signed-off-by: Cornelia Huck 
Signed-off-by: Christian Borntraeger 
Signed-off-by: Jens Freimann 
---
 hw/s390x/event-facility.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/s390x/event-facility.c b/hw/s390x/event-facility.c
index 9367660..bc9cea9 100644
--- a/hw/s390x/event-facility.c
+++ b/hw/s390x/event-facility.c
@@ -112,12 +112,13 @@ static uint16_t handle_write_event_buf(SCLPEventFacility 
*ef,
 SCLPEvent *event;
 SCLPEventClass *ec;
 
+rc = SCLP_RC_INVALID_FUNCTION;
+
 QTAILQ_FOREACH(kid, &ef->sbus.qbus.children, sibling) {
 DeviceState *qdev = kid->child;
 event = (SCLPEvent *) qdev;
 ec = SCLP_EVENT_GET_CLASS(event);
 
-rc = SCLP_RC_INVALID_FUNCTION;
 if (ec->write_event_data &&
 ec->event_type() == event_buf->type) {
 rc = ec->write_event_data(event, event_buf);
-- 
1.7.12.4




Re: [Qemu-devel] [PATCH 1/5] RFC: Efficient VM backup for qemu (v1)

2012-11-23 Thread Kevin Wolf
Am 23.11.2012 10:31, schrieb Dietmar Maurer:
> Filters would be implemented as BlockDrivers, i.e. you could
> implement
> .bdrv_co_write() in a filter to intercept all writes to an image.

 I am quite unsure if that make things easier.
>>>
>>> At least it would make for a much cleaner design compared to putting
>>> code for every feature you can think of into bdrv_co_do_readv/writev().
>>
>> So if you want to add a filter, you simply modify bs->drv to point to the 
>> filter?
> 
> Seems the BlockDriver struct does not contain any 'state' (I guess that is by 
> design),
> so where do you store filter related dynamic data?

You wouldn't change bs->drv of the block device, you still need that one
after having processed the data in the filter.

Instead, you'd have some BlockDriverState *first_filter in bs to which
requests are forwarded. first_filter->file would point to either the
next filter or if there are no more filters to the real BlockDriverState.

Which raises the question of how to distinguish whether it's a new
request to bs that must go through the filters or whether it actually
comes from the last filter in the chain. As you can see, we don't have a
well thought out plan yet, just rough ideas (otherwise it would probably
be implemented already).

Kevin



[Qemu-devel] [PATCH 2/3] s390: clear registers, psw and prefix at vcpu reset

2012-11-23 Thread Jens Freimann
When resetting vcpus on s390/kvm we have to clear registers, psw
and prefix as described in the z/Architecture PoP, otherwise a
reboot won't work. IPL PSW and prefix are set later on by the
s390-ipl device reset code.

Signed-off-by: Jens Freimann 
---
 target-s390x/kvm.c | 26 +-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c
index 94de764..b1b791e 100644
--- a/target-s390x/kvm.c
+++ b/target-s390x/kvm.c
@@ -85,7 +85,31 @@ int kvm_arch_init_vcpu(CPUS390XState *env)
 
 void kvm_arch_reset_vcpu(CPUS390XState *env)
 {
-/* FIXME: add code to reset vcpu. */
+int i;
+
+/* The initial reset call is needed here to reset in-kernel
+ * vcpu data that we can't access directly from QEMU. Before
+ * this ioctl cpu_synchronize_state() is called in common kvm
+ * code (kvm-all). What remains is clearing registers and psw
+ * in QEMU cpu state */
+if (kvm_vcpu_ioctl(env, KVM_S390_INITIAL_RESET, NULL)) {
+perror("Can't reset vcpu\n");
+}
+env->halted = 1;
+env->exception_index = EXCP_HLT;
+for (i = 0; i < 16; i++) {
+env->regs[i] = 0;
+env->aregs[i] = 0;
+env->cregs[i] = 0;
+env->fregs[i].ll = 0;
+}
+/* architectured initial values for CR 0 and 14 */
+env->cregs[0] = 0xE0UL;
+env->cregs[14] = 0xC200UL;
+env->fpc = 0;
+env->psw.mask = 0;
+env->psw.addr = 0;
+env->psa = 0;
 }
 
 int kvm_arch_put_registers(CPUS390XState *env, int level)
-- 
1.7.12.4




[Qemu-devel] [PATCH 0/3] s390: vcpu reset, -cpu ? and minor sclp fix

2012-11-23 Thread Jens Freimann
Hi Alex,

a few more s390 patches:
Patch 1 enables the -cpu ? option for s390 CPU models.
Patch 2 implements vcpu reset
Patch 3 takes care of an possibly uninitialized variable in SCLP code

regards
Jens

Cornelia Huck (1):
  sclp: Fix uninitialized var in handle_write_event_buf().

Jens Freimann (1):
  s390: clear registers, psw and prefix at vcpu reset

Viktor Mihajlovski (1):
  S390: Basic CPU model support

 hw/s390-virtio.c  |  5 +
 hw/s390x/event-facility.c |  3 ++-
 target-s390x/cpu.c|  6 ++
 target-s390x/cpu.h|  3 +++
 target-s390x/kvm.c| 26 +-
 5 files changed, 41 insertions(+), 2 deletions(-)

-- 
1.7.12.4




Re: [Qemu-devel] [PATCH/RFC] block: Ensure that block size constraints are considered

2012-11-23 Thread Heinz Graalfs
On Wed, 2012-11-21 at 10:15 +0100, Kevin Wolf wrote:
> Am 21.11.2012 09:58, schrieb Christian Borntraeger:
> > From: Heinz Graalfs 
> > 
> > While testing IPL code (booting) for s390x we faced some problems
> > with cache=none on dasds (4k block size) on bdrv_preads with length
> > values != block size.
> > 
> > This patch makes sure that bdrv_pread and friends work fine with
> > unaligned access even with cache=none
> >- propagate alignment value also into bs->file struct
> >- modify the size in case of no cache to avoid EINVAL on
> >  pread() etc. (file was opened with O_DIRECT).
> > 
> > This patch seems to cure the problems.
> > 
> > CC: Kevin Wolf 
> > CC: Stefan Hajnoczi 
> > Signed-off-by: Heinz Graalfs 
> > Signed-off-by: Christian Borntraeger 
> > ---
> >  block.c   |3 +++
> >  block/raw-posix.c |6 ++
> >  2 files changed, 9 insertions(+)
> > 
> > diff --git a/block.c b/block.c
> > index 854ebd6..f23c562 100644
> > --- a/block.c
> > +++ b/block.c
> > @@ -4242,6 +4242,9 @@ BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
> >  void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
> >  {
> >  bs->buffer_alignment = align;
> > +if ((bs->open_flags & BDRV_O_NOCACHE)) {
> > +bs->file->buffer_alignment = align;
> > +}
> 
> Any reason to restrict this to BDRV_O_NOCACHE?
> 
> There have been patches to change the BDRV_O_NOCACHE flag from the
> monitor, in which case bdrv_set_buffer_alignment() wouldn't be called
> anew and O_DIRECT requests start to fail again.
> 

OK

> >  }
> >  
> >  void *qemu_blockalign(BlockDriverState *bs, size_t size)
> > diff --git a/block/raw-posix.c b/block/raw-posix.c
> > index f2f0404..baebf1d 100644
> > --- a/block/raw-posix.c
> > +++ b/block/raw-posix.c
> > @@ -700,6 +700,12 @@ static BlockDriverAIOCB *paio_submit(BlockDriverState 
> > *bs, int fd,
> >  acb->aio_nbytes = nb_sectors * 512;
> >  acb->aio_offset = sector_num * 512;
> >  
> > +/* O_DIRECT also requires an aligned length */
> > +if (bs->open_flags & BDRV_O_NOCACHE) {
> > +acb->aio_nbytes += acb->bs->buffer_alignment - 1;
> > +acb->aio_nbytes &= ~(acb->bs->buffer_alignment - 1);
> > +}
> 
> Modifying aio_nbytes, but not the iov looks wrong to me. This may work
> in the handle_aiocb_rw_linear() code path, but not with actual vectored I/O.
> 

Current coding ensures that read IO buffers always seem to be aligned
correctly. Whereas read length values are not always appropriate for an
O_DIRECT scenario.

For a 2048 formatted disk I verified that

1. non vectored IO - the length needs to be adapted several times,
   which is accomplished now by the patch.

2. vectored IO - the qiov's total length is always a multiple of the
   logical block size 
  (which is also verified in virtio_blk_handle_read())
   The particular iov length fields are already correctly setup as a
   multiple of the logical block size when processed in
   virtio_blk_handle_request().


> Kevin
> 





Re: [Qemu-devel] [Qemu-ppc] [PATCH 12/12] pseries: Generate unique LIOBNs for PCI host bridges

2012-11-23 Thread Michael S. Tsirkin
On Fri, Nov 23, 2012 at 03:13:07PM +1100, David Gibson wrote:
> On Thu, Nov 22, 2012 at 09:23:03AM +0200, Michael S. Tsirkin wrote:
> > On Thu, Nov 22, 2012 at 01:27:18PM +1100, David Gibson wrote:
> > > On Wed, Nov 21, 2012 at 05:27:37PM +0200, Michael S. Tsirkin wrote:
> > > > On Wed, Nov 21, 2012 at 02:27:08PM +0100, Alexander Graf wrote:
> > > > > On 11/21/2012 02:21 PM, David Gibson wrote:
> > > > > >On Wed, Nov 21, 2012 at 03:13:39PM +0200, Michael S. Tsirkin wrote:
> > > > > >>On Wed, Nov 21, 2012 at 11:36:00PM +1100, David Gibson wrote:
> > > > > >>>On Wed, Nov 21, 2012 at 01:34:48PM +0200, Michael S. Tsirkin wrote:
> > > > > On Wed, Nov 21, 2012 at 11:57:05AM +1100, David Gibson wrote:
> > > > > >On Tue, Nov 20, 2012 at 02:26:09PM +0200, Michael S. Tsirkin 
> > > > > >wrote:
> > > > > >>On Tue, Nov 20, 2012 at 10:27:11AM +0100, Alexander Graf wrote:
> > > > > >>>On 19.11.2012, at 23:51, David Gibson wrote:
> > > > > >>>
> > > > > On Mon, Nov 19, 2012 at 05:34:12PM +0100, Alexander Graf 
> > > > > wrote:
> > > > > >On 13.11.2012, at 03:47, David Gibson wrote:
> > > > > >
> > > > > >>From: Alexey Kardashevskiy
> > > > > >>
> > > > > >>In future (with VFIO) we will have multiple PCI host 
> > > > > >>bridges on
> > > > > >>pseries.  Each one needs a unique LIOBN (IOMMU id).  At the 
> > > > > >>moment we
> > > > > >>derive these from the pci domain number, but the whole 
> > > > > >>notion of
> > > > > >>domain numbers on the qemu side is bogus and in any case 
> > > > > >>they're not
> > > > > >>actually uniquely allocated at this point.
> > > > > >>
> > > > > >>This patch, therefore uses a simple sequence counter to 
> > > > > >>generate
> > > > > >>unique LIOBNs for PCI host bridges.
> > > > > >>
> > > > > >>Signed-off-by: Alexey Kardashevskiy
> > > > > >>Signed-off-by: David Gibson
> > > > > >I don't really like the idea of having a global variable just
> > > > > >because our domain ID generation seems to not work as
> > > > > >expected. Michael, any comments here?
> > > > > Well, the patch I sent which changed domain id generation was
> > > > > ignored.  In any case, as I said, the whole concept of domain 
> > > > > numbers
> > > > > >>>Michael?
> > > > > >>This is user visible, right?
> > > > > >>So IMHO we should have the user specify LIOBN through a 
> > > > > >>property,
> > > > > >>rather than assign what's essentially a random value.
> > > > > >Well, I can implement an override through a property, which 
> > > > > >could be
> > > > > >useful in some circumstances.  But we still need to have qemu 
> > > > > >generate
> > > > > >unique defaults, rather than forcing it to be specified in every 
> > > > > >case.
> > > > > I don't see why.
> > > > > And if you want automatic defaults then they need to be generated 
> > > > > in a
> > > > > way that does not depend on implementation detail such as order of
> > > > > device initialization.
> > > > > >>>Because requiring explicit unique liobns to be supplied whenever 
> > > > > >>>there
> > > > > >>>is more than one PHB is horrible for usability.
> > > > > >>We should make simple things simple and complex things possible.
> > > > > >>More than one PHB seems like an advanced feature
> > > > > >Not for pseries.  On real hardware of this type, dozens of PHBs is
> > > > > >routine.  Plus, vfio passthrough is coming, we need at minimum one 
> > > > > >PHB
> > > > > >for emulated devices and one for passthrough devices.
> > > > > 
> > > > > Yeah, I second Davids opinion here. We need to make this easy for 
> > > > > users.
> > > > 
> > > > I think users don't normally create PHBs. They request a disk, a network
> > > > device, a pass-through device ... In this case if this requires
> > > > more PHBs create them internally but I imagine this doesn't
> > > > require any allocation scheme - simply set some address
> > > > for virtual devices, some other one for assigned devices ... no?
> > > 
> > > No.  One PHB for passthrough and one for emulated is the minimum.
> > > Since we don't emulated p2p bridges,
> > 
> > Actually qemu does emulate p2p bridges.
> > 
> > > each PHB can only support a small
> > > number of PCI devices, so if enough PCI devices are requested, we will
> > > still need to create - and assign numbers to - additional PHBs.
> > 
> > Each PHB can support up to 32 slots right? This seems ample for
> > a typical use. If you want many tens of devices you need to
> > supply addresses manually, this looks reasonable to me.
> > 
> > Allocating PHBs on the fly seems unencessarily tricky.
> 
> I still think you're thinking far too much in terms of x86-like
> guests.  But, I guess, emulated PCI devices will be pretty rare, since
> they're so 

Re: [Qemu-devel] [PATCH 1/5] RFC: Efficient VM backup for qemu (v1)

2012-11-23 Thread Markus Armbruster
Kevin Wolf  writes:

> Am 23.11.2012 10:05, schrieb Dietmar Maurer:
> My plan was to have something like bs->job->job_type-
> {before,after}_write.
>
>int coroutine_fn (*before_write)(BlockDriverState *bs,
> int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
> void **cookie);
>int coroutine_fn (*after_write)(BlockDriverState *bs,
> int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
> void *cookie);

 I don't think that job is the right place. Instead I would put a list
 of filters into
 BDS:
>>>
>>> Well, I can also add it to job_type. Just tell me what you prefer, and I 
>>> will
>>> write the patch.
>
> A block filter shouldn't be tied to a job, I think. We have things like
> blkdebug that are really filters and aren't coupled with a job, and on
> the other hand we want to generalise "block jobs" into just "jobs", so
> adding block specific things to job_type would be a step in the wrong
> direction.
>
> I also think that before_write/after_write isn't a convenient interface,
> it brings back much of the callback-based AIO cruft and passing void*
> isn't nice anyway. It's much nice to have a single .bdrv_co_write
> callback that somewhere in the middle calls the layer below with a
> simple function call.
>
> Also read/write aren't enough, for a full filter interface you
> potentially also need flush, discard and probably most other operations.
>
> This is why I suggested using a regular BlockDriver struct for filters,
> it already has all functions that are needed.

Let me elaborate a bit.

A block backend is a tree of block driver instances (BlockDriverState).
Common examples:

raw qcow2   qcow2
 |  /   \   /   \
file file   raw  file  qcow2
 | /   \
file file  raw
|
   file

A less common example:

raw
 |
  blkdebug
 |
file

Here, "blkdebug" acts as a filter, i.e. a block driver that can be put
between two adjacent tree nodes.  It injects errors by selectively
failing some bdrv_aio_readv() and bdrv_aio_writev() operations.

Actually, "raw" could also be viewed as a degenerate filter that does
nothing[*], but such a filter isn't particularly useful.

Except perhaps to serve as base for real filters, that do stuff.  To do
stuff in your filter, you'd replace raw's operations with your own.

Hmm, blkdebug implements much fewer operations than raw.  Makes me
wonder whether it works only in special places in the tree now.

[...]


[*] Except occasionally inject bugs when somebody adds new BlockDriver
operations without updating "raw" to forward them.



Re: [Qemu-devel] tap devices not receiving packets from a bridge

2012-11-23 Thread Michael S. Tsirkin
On Fri, Nov 23, 2012 at 10:41:21AM +0100, Peter Lieven wrote:
> 
> Am 23.11.2012 um 08:02 schrieb Stefan Hajnoczi:
> 
> > On Thu, Nov 22, 2012 at 03:29:52PM +0100, Peter Lieven wrote:
> >> is anyone aware of a problem with the linux network bridge that in very 
> >> rare circumstances stops
> >> a bridge from sending pakets to a tap device?
> >> 
> >> My problem occurs in conjunction with vanilla qemu-kvm-1.2.0 and Ubuntu 
> >> Kernel 3.2.0-34.53
> >> which is based on Linux 3.2.33.
> >> 
> >> I was not yet able to reproduce the issue, it happens in really rare 
> >> cases. The symptom is that
> >> the tap does not have any TX packets. RX is working fine. I see the 
> >> packets coming in at
> >> the physical interface on the host, but they are not forwarded to the tap 
> >> interface.
> >> The bridge itself has learnt the mac address of the vServer that is 
> >> connected to the tap interface.
> >> It does not help to toggle the bridge link status,  the tap interface 
> >> status or the interface in the vServer.
> >> It seems that problem occurs if a tap interface that has previously been 
> >> used, but set to nonpersistent
> >> is set persistent again and then is by chance assigned to the same vServer 
> >> (=same mac address on same
> >> bridge) again. Unfortunately it seems not to be reproducible.
> > 
> > Not sure but this patch from Michael Tsirkin may help - it solves an
> > issue with persistent tap devices:
> > 
> > http://patchwork.ozlabs.org/patch/198598/
> 
> Hi Stefan,
> 
> thanks for the pointer. I have seen this patch, but I have neglected it 
> because it was dealing
> with persistent taps. But maybe the taps in the kernel are not deleted 
> directly. 
> Can you remember what the syptomps of the above issue have been? Sorry for
> being vague, but I currently have no clue whats going on.
> 
> Can someone who has more internal knowledge of the bridging/tap code say if 
> qemu can
> be responsible at all if the tap device is not receiving packets from the 
> bridge.
> 
> If I have the following config. Lets say packets coming in via physical 
> interface eth1.123,
> and a bridge called br123.I further have a virtual machine with tap0. Both 
> eth1.123
> and tap0 are member of br123. 
> 
> If the issue occurs the vServer has no network connectivity inbound. If I 
> sent a ping
> from the vServer I see it on tap0 and leaving on eth1.123. I see further the 
> arp reply coming
> in via eth1.123, but the reply can't be seen on tap0.
> 
> Peter

If guest is not consuming packets, a TX queue in tap device
will with time overrun (there's space for 1000 packets there).
This is code from tun:

if (skb_queue_len(&tfile->socket.sk->sk_receive_queue)
  >= dev->tx_queue_len / tun->numqueues){
if (!(tun->flags & TUN_ONE_QUEUE)) {
/* Normal queueing mode. */
/* Packet scheduler handles dropping of further
 * packets. */
netif_stop_subqueue(dev, txq);

/* We won't see all dropped packets
 * individually, so overrun
 * error is more appropriate. */
dev->stats.tx_fifo_errors++;


So you can detect that this triggered by looking at fifo errors counter in 
device.

Once this happens TX queue is stopped, then you hit this path:

if (!netif_xmit_stopped(txq)) {
__this_cpu_inc(xmit_recursion);
rc = dev_hard_start_xmit(skb, dev, txq);
__this_cpu_dec(xmit_recursion);
if (dev_xmit_complete(rc)) {
HARD_TX_UNLOCK(dev, txq);
goto out;
}
}

so packets are not passed to device anymore.
It will stay this way until guest consumes some packets and
queue is restarted.

> > 
> > Stefan



Re: [Qemu-devel] tap devices not receiving packets from a bridge

2012-11-23 Thread Peter Lieven

Am 23.11.2012 um 12:01 schrieb Michael S. Tsirkin:

> On Fri, Nov 23, 2012 at 10:41:21AM +0100, Peter Lieven wrote:
>> 
>> Am 23.11.2012 um 08:02 schrieb Stefan Hajnoczi:
>> 
>>> On Thu, Nov 22, 2012 at 03:29:52PM +0100, Peter Lieven wrote:
 is anyone aware of a problem with the linux network bridge that in very 
 rare circumstances stops
 a bridge from sending pakets to a tap device?
 
 My problem occurs in conjunction with vanilla qemu-kvm-1.2.0 and Ubuntu 
 Kernel 3.2.0-34.53
 which is based on Linux 3.2.33.
 
 I was not yet able to reproduce the issue, it happens in really rare 
 cases. The symptom is that
 the tap does not have any TX packets. RX is working fine. I see the 
 packets coming in at
 the physical interface on the host, but they are not forwarded to the tap 
 interface.
 The bridge itself has learnt the mac address of the vServer that is 
 connected to the tap interface.
 It does not help to toggle the bridge link status,  the tap interface 
 status or the interface in the vServer.
 It seems that problem occurs if a tap interface that has previously been 
 used, but set to nonpersistent
 is set persistent again and then is by chance assigned to the same vServer 
 (=same mac address on same
 bridge) again. Unfortunately it seems not to be reproducible.
>>> 
>>> Not sure but this patch from Michael Tsirkin may help - it solves an
>>> issue with persistent tap devices:
>>> 
>>> http://patchwork.ozlabs.org/patch/198598/
>> 
>> Hi Stefan,
>> 
>> thanks for the pointer. I have seen this patch, but I have neglected it 
>> because it was dealing
>> with persistent taps. But maybe the taps in the kernel are not deleted 
>> directly. 
>> Can you remember what the syptomps of the above issue have been? Sorry for
>> being vague, but I currently have no clue whats going on.
>> 
>> Can someone who has more internal knowledge of the bridging/tap code say if 
>> qemu can
>> be responsible at all if the tap device is not receiving packets from the 
>> bridge.
>> 
>> If I have the following config. Lets say packets coming in via physical 
>> interface eth1.123,
>> and a bridge called br123.I further have a virtual machine with tap0. Both 
>> eth1.123
>> and tap0 are member of br123. 
>> 
>> If the issue occurs the vServer has no network connectivity inbound. If I 
>> sent a ping
>> from the vServer I see it on tap0 and leaving on eth1.123. I see further the 
>> arp reply coming
>> in via eth1.123, but the reply can't be seen on tap0.
>> 
>> Peter
> 
> If guest is not consuming packets, a TX queue in tap device
> will with time overrun (there's space for 1000 packets there).
> This is code from tun:

From what I remember there where zero TX packets and no TX errors
on the device.

Might it be that this queue is somehow not cleared correctly when
the device is reassigned (although it was nonpersistant in between).

Thank you,
Peter

> 
>if (skb_queue_len(&tfile->socket.sk->sk_receive_queue)
>> = dev->tx_queue_len / tun->numqueues){
>if (!(tun->flags & TUN_ONE_QUEUE)) {
>/* Normal queueing mode. */
>/* Packet scheduler handles dropping of further
> * packets. */
>netif_stop_subqueue(dev, txq);
> 
>/* We won't see all dropped packets
> * individually, so overrun
> * error is more appropriate. */
>dev->stats.tx_fifo_errors++;
> 
> 
> So you can detect that this triggered by looking at fifo errors counter in 
> device.
> 
> Once this happens TX queue is stopped, then you hit this path:
> 
>if (!netif_xmit_stopped(txq)) {
>__this_cpu_inc(xmit_recursion);
>rc = dev_hard_start_xmit(skb, dev, txq);
>__this_cpu_dec(xmit_recursion);
>if (dev_xmit_complete(rc)) {
>HARD_TX_UNLOCK(dev, txq);
>goto out;
>}
>}
> 
> so packets are not passed to device anymore.
> It will stay this way until guest consumes some packets and
> queue is restarted.
> 
>>> 
>>> Stefan




[Qemu-devel] [PATCH] win32: Switch thread abstraction to us TLS variable internally

2012-11-23 Thread Jan Kiszka
We already depend on working __thread support for coroutines, so this
complication here is no longer needed.

Signed-off-by: Jan Kiszka 
---

Post-1.3 material.

 qemu-thread-win32.c |   24 ++--
 1 files changed, 6 insertions(+), 18 deletions(-)

diff --git a/qemu-thread-win32.c b/qemu-thread-win32.c
index 4b3db60..6499ce9 100644
--- a/qemu-thread-win32.c
+++ b/qemu-thread-win32.c
@@ -239,7 +239,7 @@ struct QemuThreadData {
 CRITICAL_SECTION  cs;
 };
 
-static int qemu_thread_tls_index = TLS_OUT_OF_INDEXES;
+static __thread QemuThreadData *qemu_thread_data;
 
 static unsigned __stdcall win32_start_routine(void *arg)
 {
@@ -251,14 +251,15 @@ static unsigned __stdcall win32_start_routine(void *arg)
 g_free(data);
 data = NULL;
 }
-TlsSetValue(qemu_thread_tls_index, data);
+qemu_thread_data = data;
 qemu_thread_exit(start_routine(thread_arg));
 abort();
 }
 
 void qemu_thread_exit(void *arg)
 {
-QemuThreadData *data = TlsGetValue(qemu_thread_tls_index);
+QemuThreadData *data = qemu_thread_data;
+
 if (data) {
 assert(data->mode != QEMU_THREAD_DETACHED);
 data->ret = arg;
@@ -298,25 +299,13 @@ void *qemu_thread_join(QemuThread *thread)
 return ret;
 }
 
-static inline void qemu_thread_init(void)
-{
-if (qemu_thread_tls_index == TLS_OUT_OF_INDEXES) {
-qemu_thread_tls_index = TlsAlloc();
-if (qemu_thread_tls_index == TLS_OUT_OF_INDEXES) {
-error_exit(ERROR_NO_SYSTEM_RESOURCES, __func__);
-}
-}
-}
-
-
 void qemu_thread_create(QemuThread *thread,
void *(*start_routine)(void *),
void *arg, int mode)
 {
 HANDLE hThread;
-
 struct QemuThreadData *data;
-qemu_thread_init();
+
 data = g_malloc(sizeof *data);
 data->start_routine = start_routine;
 data->arg = arg;
@@ -338,8 +327,7 @@ void qemu_thread_create(QemuThread *thread,
 
 void qemu_thread_get_self(QemuThread *thread)
 {
-qemu_thread_init();
-thread->data = TlsGetValue(qemu_thread_tls_index);
+thread->data = qemu_thread_data;
 thread->tid = GetCurrentThreadId();
 }
 
-- 
1.7.3.4



Re: [Qemu-devel] [PATCH] win32: Switch thread abstraction to us TLS variable internally

2012-11-23 Thread Paolo Bonzini

> We already depend on working __thread support for coroutines, so this
> complication here is no longer needed.
> 
> Signed-off-by: Jan Kiszka 

Reviewed-by: Paolo Bonzini 

> ---
> 
> Post-1.3 material.
> 
>  qemu-thread-win32.c |   24 ++--
>  1 files changed, 6 insertions(+), 18 deletions(-)
> 
> diff --git a/qemu-thread-win32.c b/qemu-thread-win32.c
> index 4b3db60..6499ce9 100644
> --- a/qemu-thread-win32.c
> +++ b/qemu-thread-win32.c
> @@ -239,7 +239,7 @@ struct QemuThreadData {
>  CRITICAL_SECTION  cs;
>  };
>  
> -static int qemu_thread_tls_index = TLS_OUT_OF_INDEXES;
> +static __thread QemuThreadData *qemu_thread_data;
>  
>  static unsigned __stdcall win32_start_routine(void *arg)
>  {
> @@ -251,14 +251,15 @@ static unsigned __stdcall
> win32_start_routine(void *arg)
>  g_free(data);
>  data = NULL;
>  }
> -TlsSetValue(qemu_thread_tls_index, data);
> +qemu_thread_data = data;
>  qemu_thread_exit(start_routine(thread_arg));
>  abort();
>  }
>  
>  void qemu_thread_exit(void *arg)
>  {
> -QemuThreadData *data = TlsGetValue(qemu_thread_tls_index);
> +QemuThreadData *data = qemu_thread_data;
> +
>  if (data) {
>  assert(data->mode != QEMU_THREAD_DETACHED);
>  data->ret = arg;
> @@ -298,25 +299,13 @@ void *qemu_thread_join(QemuThread *thread)
>  return ret;
>  }
>  
> -static inline void qemu_thread_init(void)
> -{
> -if (qemu_thread_tls_index == TLS_OUT_OF_INDEXES) {
> -qemu_thread_tls_index = TlsAlloc();
> -if (qemu_thread_tls_index == TLS_OUT_OF_INDEXES) {
> -error_exit(ERROR_NO_SYSTEM_RESOURCES, __func__);
> -}
> -}
> -}
> -
> -
>  void qemu_thread_create(QemuThread *thread,
> void *(*start_routine)(void *),
> void *arg, int mode)
>  {
>  HANDLE hThread;
> -
>  struct QemuThreadData *data;
> -qemu_thread_init();
> +
>  data = g_malloc(sizeof *data);
>  data->start_routine = start_routine;
>  data->arg = arg;
> @@ -338,8 +327,7 @@ void qemu_thread_create(QemuThread *thread,
>  
>  void qemu_thread_get_self(QemuThread *thread)
>  {
> -qemu_thread_init();
> -thread->data = TlsGetValue(qemu_thread_tls_index);
> +thread->data = qemu_thread_data;
>  thread->tid = GetCurrentThreadId();
>  }
>  
> --
> 1.7.3.4
> 



Re: [Qemu-devel] [Qemu-trivial] [PATCH] Legacy qemu-kvm options have no argument

2012-11-23 Thread Luiz Capitulino
On Fri, 23 Nov 2012 07:50:49 +0100
Stefan Hajnoczi  wrote:

> On Thu, Nov 22, 2012 at 10:34:11AM -0200, Luiz Capitulino wrote:
> > On Wed, 21 Nov 2012 13:12:53 +0100
> > Jan Kiszka  wrote:
> > 
> > > On 2012-11-20 16:14, Markus Armbruster wrote:
> > > > Cc'ing Marcelo & Jan to speed up patch application.
> > > > 
> > > > Bruce Rogers  writes:
> > > > 
> > > >> The options no-kvm, no-kvm-pit, no-kvm-pit-reinjection, and 
> > > >> no-kvm-irqchip
> > > >> should be marked as having no argument.
> > > >>
> > > >> Signed-off-by: Bruce Rogers 
> > > >> ---
> > > >>  qemu-options.hx |8 
> > > >>  1 files changed, 4 insertions(+), 4 deletions(-)
> > > >>
> > > >> diff --git a/qemu-options.hx b/qemu-options.hx
> > > >> index 9bb29d3..fbcf079 100644
> > > >> --- a/qemu-options.hx
> > > >> +++ b/qemu-options.hx
> > > >> @@ -2918,17 +2918,17 @@ Enable FIPS 140-2 compliance mode.
> > > >>  ETEXI
> > > >>  
> > > >>  HXCOMM Deprecated by -machine accel=tcg property
> > > >> -DEF("no-kvm", HAS_ARG, QEMU_OPTION_no_kvm, "", QEMU_ARCH_I386)
> > > >> +DEF("no-kvm", 0, QEMU_OPTION_no_kvm, "", QEMU_ARCH_I386)
> > > >>  
> > > >>  HXCOMM Deprecated by kvm-pit driver properties
> > > >> -DEF("no-kvm-pit-reinjection", HAS_ARG, 
> > > >> QEMU_OPTION_no_kvm_pit_reinjection,
> > > >> +DEF("no-kvm-pit-reinjection", 0, QEMU_OPTION_no_kvm_pit_reinjection,
> > > >>  "", QEMU_ARCH_I386)
> > > >>  
> > > >>  HXCOMM Deprecated (ignored)
> > > >> -DEF("no-kvm-pit", HAS_ARG, QEMU_OPTION_no_kvm_pit, "", QEMU_ARCH_I386)
> > > >> +DEF("no-kvm-pit", 0, QEMU_OPTION_no_kvm_pit, "", QEMU_ARCH_I386)
> > > >>  
> > > >>  HXCOMM Deprecated by -machine kernel_irqchip=on|off property
> > > >> -DEF("no-kvm-irqchip", HAS_ARG, QEMU_OPTION_no_kvm_irqchip, "", 
> > > >> QEMU_ARCH_I386)
> > > >> +DEF("no-kvm-irqchip", 0, QEMU_OPTION_no_kvm_irqchip, "", 
> > > >> QEMU_ARCH_I386)
> > > >>  
> > > >>  HXCOMM Deprecated (ignored)
> > > >>  DEF("tdf", 0, QEMU_OPTION_tdf,"", QEMU_ARCH_ALL)
> > > 
> > > Reviewed-by: Jan Kiszka 
> > 
> > Candidate for -trivial.
> 
> This is more urgent than trivial.  It is necessary for qemu-kvm
> compatibility and I think should go into QEMU 1.3.  Otherwise we have to
> wait until QEMU 1.4 to complete the qemu-kvm -> qemu transition.

That's true, I missed the fact that you don't do pull requests during
hard freeze...

Thanks!



[Qemu-devel] for-1.3: [PATCH] chardev: Use real-time clock for open timer

2012-11-23 Thread Luiz Capitulino
To avoid it getting lost in qemu forest.



Re: [Qemu-devel] [RFC PATCH v2 1/3] virtio-bus : Introduce VirtioBus.

2012-11-23 Thread Cornelia Huck
On Thu, 22 Nov 2012 15:50:50 +0100
fred.kon...@greensocs.com wrote:


> +/* Create a virtio bus.  */
> +VirtioBus *virtio_bus_new(DeviceState *host, const VirtioBusInfo *info)
> +{
> +/*
> + * This is needed, as we want to have different names for each 
> virtio-bus.
> + * If we don't do that, we can't add more than one VirtIODevice.
> + */
> +static int next_virtio_bus;
> +char *bus_name = g_strdup_printf("virtio-bus.%d", next_virtio_bus++);

This still has the overflow/id-reuse problem, hasn't it?

> +
> +BusState *qbus = qbus_create(TYPE_VIRTIO_BUS, host, bus_name);
> +VirtioBus *bus = VIRTIO_BUS(qbus);
> +bus->info = info;
> +qbus->allow_hotplug = 0;
> +bus->bus_in_use = false;
> +DPRINTF("%s bus created\n", bus_name);
> +return bus;
> +}

Don't you need a way to destroy the bus again when the proxy device is
hotunplugged?




Re: [Qemu-devel] [PATCH v3 1/1] atapi: make change media detection for guests easier

2012-11-23 Thread Kevin Wolf
Am 21.11.2012 18:17, schrieb Pavel Hrdina:
> If you have a guest with a media in the optical drive and you change the media
> the windows guest cannot properly recognize this media change.
> 
> Windows needs to detect the sense "NOT_READY with ASC_MEDIUM_NOT_PRESENT"
> before we send the sense "UNIT_ATTENTION with ASC_MEDIUM_MAY_HAVE_CHANGED".
> 
> v3: remove timeout as it isn't needed anymore
> 
> v2: disable debug messages
> 
> Signed-off-by: Pavel Hrdina 

Hope that we'll get libqos soon so that IDE can be properly tested with
qtest. CD-ROM media change is something that broke more than once.

The change makes sense to me, it's one of these "how could this ever
work?" cases. However I do have one or two comments:

> ---
>  hw/ide/atapi.c| 16 +++-
>  hw/ide/core.c | 12 
>  hw/ide/internal.h |  1 +
>  3 files changed, 24 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
> index 685cbaa..1534afe 100644
> --- a/hw/ide/atapi.c
> +++ b/hw/ide/atapi.c
> @@ -1124,12 +1124,18 @@ void ide_atapi_cmd(IDEState *s)
>   * GET_EVENT_STATUS_NOTIFICATION to detect such tray open/close
>   * states rely on this behavior.
>   */
> -if (!s->tray_open && bdrv_is_inserted(s->bs) && s->cdrom_changed) {
> -ide_atapi_cmd_error(s, NOT_READY, ASC_MEDIUM_NOT_PRESENT);
> +if (!(atapi_cmd_table[s->io_buffer[0]].flags & ALLOW_UA) &&
> +!s->tray_open && bdrv_is_inserted(s->bs) && s->cdrom_changed) {
> +
> +if (!s->fake_cdrom_eject) {
> +ide_atapi_cmd_error(s, NOT_READY, ASC_MEDIUM_NOT_PRESENT);
> +s->fake_cdrom_eject = 1;
> +} else {
> +ide_atapi_cmd_error(s, UNIT_ATTENTION, 
> ASC_MEDIUM_MAY_HAVE_CHANGED);
> +s->fake_cdrom_eject = 0;
> +s->cdrom_changed = 0;
> +}
>  
> -s->cdrom_changed = 0;
> -s->sense_key = UNIT_ATTENTION;
> -s->asc = ASC_MEDIUM_MAY_HAVE_CHANGED;
>  return;
>  }
>  
> diff --git a/hw/ide/core.c b/hw/ide/core.c
> index 7d6b0fa..013671a 100644
> --- a/hw/ide/core.c
> +++ b/hw/ide/core.c
> @@ -1851,6 +1851,7 @@ static void ide_reset(IDEState *s)
>  s->sense_key = 0;
>  s->asc = 0;
>  s->cdrom_changed = 0;
> +s->fake_cdrom_eject = 0;
>  s->packet_transfer_size = 0;
>  s->elementary_transfer_size = 0;
>  s->io_buffer_index = 0;
> @@ -2143,6 +2144,16 @@ static int transfer_end_table_idx(EndTransferFunc *fn)
>  return -1;
>  }
>  
> +static void ide_drive_pre_save(void *opaque)
> +{
> +IDEState *s = opaque;
> +
> +if (s->cdrom_changed) {
> +s->sense_key = UNIT_ATTENTION;
> +s->asc = ASC_MEDIUM_MAY_HAVE_CHANGED;
> +}
> +}

If we migrate immediately after the media change, before the OS has sent
another request, we skip the ASC_MEDIUM_NOT_PRESENT step, don't we? As
far as I can tell, adding this step is the real fix, so won't media
change break during migration this way?

The other thing is that if it's valid to set s->sense_key/asc in any
place instead of just during the start of a command (is it? I would
guess so, but I'm not sure), why don't we set ASC_MEDIUM_NOT_PRESENT
already in the change handler?

Another thing I would consider is using cdrom_changed = 0/1/2 instead of
adding fake_cdrom_eject to add another state. Migration would
automatically do the right thing for it, old versions would in both 1
and 2 state switch to ASC_MEDIUM_MAY_HAVE_CHANGED next, which is correct
in the latter case and is in the former case the same bug as the old
qemu we're migrating to has anyway.

Kevin



Re: [Qemu-devel] [RFC PATCH v2 2/3] virtio-pci : add a virtio-bus interface

2012-11-23 Thread Cornelia Huck
On Thu, 22 Nov 2012 15:50:51 +0100
fred.kon...@greensocs.com wrote:


> +static void virtiopci_class_init(ObjectClass *oc, void *data)
> +{
> +DeviceClass *dc = DEVICE_CLASS(oc);
> +PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc);
> +
> +pc->init = virtiopci_qdev_init;
> +pc->exit = virtio_exit_pci;

Doesn't the exit function need to be symmetric to the init function?

> +pc->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
> +pc->revision = VIRTIO_PCI_ABI_VERSION;
> +pc->class_id = PCI_CLASS_OTHERS;
> +/* TODO : Add the correct device information below */
> +/* pc->exit =
> + * pc->device_id =
> + * pc->subsystem_vendor_id =
> + * pc->subsystem_id =
> + * dc->reset =
> + * dc->vmsd =
> + */
> +dc->props = virtiopci_properties;
> +dc->desc = "virtio-pci transport.";
> +}




Re: [Qemu-devel] [RFC PATCH v2 1/3] virtio-bus : Introduce VirtioBus.

2012-11-23 Thread Stefan Hajnoczi
On Thu, Nov 22, 2012 at 03:50:50PM +0100, fred.kon...@greensocs.com wrote:
> +/* Bind the VirtIODevice to the VirtioBus. */
> +void virtio_bus_bind_device(VirtioBus *bus)
> +{
> +BusState *qbus = BUS(bus);
> +assert(bus != NULL);
> +assert(bus->vdev != NULL);
> +virtio_bind_device(bus->vdev, &(bus->info->virtio_bindings), 
> qbus->parent);
> +}

Should plug and bind be together in a single function?  Binding the
device seems like an internal step that the bus takes when you plug the
device.

> +struct VirtioBusInfo {

This is defining an ad-hoc interface.  QOM has support for interfaces so
that a virtio-pci adapter brovides a VirtioBindingInterface which
VirtioBus can talk to intead of using VirtioBusInfo.

> +void (*init_cb)(DeviceState *dev);
> +void (*exit_cb)(DeviceState *dev);

Can _cb be dropped from the name?  Structs with function pointers always
provide "callbacks" so the _cb is unnecessary.  For example, QOM methods
like BusClass->reset() don't include _cb either.

> +VirtIOBindings virtio_bindings;
> +};
> +
> +struct VirtioBus {
> +BusState qbus;
> +bool bus_in_use;

Should bus_in_use basically be bus->vdev != NULL?



Re: [Qemu-devel] [RFC PATCH v2 2/3] virtio-pci : add a virtio-bus interface

2012-11-23 Thread Stefan Hajnoczi
On Thu, Nov 22, 2012 at 03:50:51PM +0100, fred.kon...@greensocs.com wrote:
> +static const struct VirtioBusInfo virtio_bus_info = {
> +.init_cb = virtio_pci_init_cb,
> +.exit_cb = virtio_pci_exit_cb,
> +
> +.virtio_bindings = {

Eventually VirtIOBindings can probably be inlined into VirtioBusInfo.  I
don't see a need for separate structs.

> +.notify = virtio_pci_notify,
> +.save_config = virtio_pci_save_config,
> +.load_config = virtio_pci_load_config,
> +.save_queue = virtio_pci_save_queue,
> +.load_queue = virtio_pci_load_queue,
> +.get_features = virtio_pci_get_features,
> +.query_guest_notifiers = virtio_pci_query_guest_notifiers,
> +.set_host_notifier = virtio_pci_set_host_notifier,
> +.set_guest_notifiers = virtio_pci_set_guest_notifiers,
> +.vmstate_change = virtio_pci_vmstate_change,
> +}
> +};



Re: [Qemu-devel] [RFC PATCH v2 3/3] virtio-blk : add the virtio-blk device.

2012-11-23 Thread Stefan Hajnoczi
On Thu, Nov 22, 2012 at 03:50:52PM +0100, fred.kon...@greensocs.com wrote:
> @@ -656,3 +658,83 @@ void virtio_blk_exit(VirtIODevice *vdev)
>  blockdev_mark_auto_del(s->bs);
>  virtio_cleanup(vdev);
>  }
> +
> +static int virtio_blk_qdev_init(DeviceState *qdev)
> +{
> +VirtIOBLKState *s = VIRTIO_BLK(qdev);

Please use "Block" or "Blk", not "BLK".



Re: [Qemu-devel] [RFC PATCH v2 2/3] virtio-pci : add a virtio-bus interface

2012-11-23 Thread Peter Maydell
On 23 November 2012 12:29, Stefan Hajnoczi  wrote:
> On Thu, Nov 22, 2012 at 03:50:51PM +0100, fred.kon...@greensocs.com wrote:
>> +static const struct VirtioBusInfo virtio_bus_info = {
>> +.init_cb = virtio_pci_init_cb,
>> +.exit_cb = virtio_pci_exit_cb,
>> +
>> +.virtio_bindings = {
>
> Eventually VirtIOBindings can probably be inlined into VirtioBusInfo.  I
> don't see a need for separate structs.

I agree. It might (or might not) be convenient to retain it
temporarily while converting all the transports, but
VirtIOBindings is part of the old code which we're trying
to refactor here, and I'd expect it to go away when we're done.

-- PMM



[Qemu-devel] [PATCH] S390: Basic CPU model support

2012-11-23 Thread Jens Freimann
From: Viktor Mihajlovski 

This enables qemu -cpu ? to return the list of supported CPU models
on s390. Since only the host model is supported at this point in time
this is pretty straight-forward. Further, a validity check for the
requested CPU model was added.
This change is needed to allow libvirt exploiters (like OpenStack)
to specify a CPU model.

Signed-off-by: Viktor Mihajlovski 
Signed-off-by: Jens Freimann 
Reviewed-by: Christian Borntraeger 
---

Added copyright statement changes for s390-virtio.c, equivalent to the 
changes we made to kvm.c recently.  

 hw/s390-virtio.c   | 11 ++-
 target-s390x/cpu.c |  6 ++
 target-s390x/cpu.h |  3 +++
 3 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/hw/s390-virtio.c b/hw/s390-virtio.c
index 685cb54..bb85de7 100644
--- a/hw/s390-virtio.c
+++ b/hw/s390-virtio.c
@@ -2,6 +2,7 @@
  * QEMU S390 virtio target
  *
  * Copyright (c) 2009 Alexander Graf 
+ * Copyright IBM Corp 2012
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -13,7 +14,10 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
- * You should have received a copy of the GNU Lesser General Public
+ * Contributions after 2012-10-29 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ *
+ * You should have received a copy of the GNU (Lesser) General Public
  * License along with this library; if not, see .
  */
 
@@ -209,6 +213,11 @@ static void s390_init(QEMUMachineInitArgs *args)
 cpu_model = "host";
 }
 
+if (strcmp(cpu_model, "host")) {
+fprintf(stderr, "S390 only supports host CPU model\n");
+exit(1);
+}
+
 ipi_states = g_malloc(sizeof(S390CPU *) * smp_cpus);
 
 for (i = 0; i < smp_cpus; i++) {
diff --git a/target-s390x/cpu.c b/target-s390x/cpu.c
index 619b202..03fdc31 100644
--- a/target-s390x/cpu.c
+++ b/target-s390x/cpu.c
@@ -25,6 +25,12 @@
 #include "qemu-timer.h"
 
 
+/* generate CPU information for cpu -? */
+void s390_cpu_list(FILE *f, fprintf_function cpu_fprintf)
+{
+(*cpu_fprintf)(f, "s390 %16s\n", "[host]");
+}
+
 /* CPUClass::reset() */
 static void s390_cpu_reset(CPUState *s)
 {
diff --git a/target-s390x/cpu.h b/target-s390x/cpu.h
index 0f9a1f7..3513976 100644
--- a/target-s390x/cpu.h
+++ b/target-s390x/cpu.h
@@ -350,6 +350,9 @@ static inline void cpu_set_tls(CPUS390XState *env, 
target_ulong newtls)
 #define cpu_gen_code cpu_s390x_gen_code
 #define cpu_signal_handler cpu_s390x_signal_handler
 
+void s390_cpu_list(FILE *f, fprintf_function cpu_fprintf);
+#define cpu_list s390_cpu_list
+
 #include "exec-all.h"
 
 #ifdef CONFIG_USER_ONLY
-- 
1.7.12.4




Re: [Qemu-devel] [RFC PATCH v2 0/3] Virtio-refactoring.

2012-11-23 Thread Stefan Hajnoczi
On Thu, Nov 22, 2012 at 03:50:49PM +0100, fred.kon...@greensocs.com wrote:
> From: KONRAD Frederic 
> I made the changes you suggest in the last RFC. 
> 
> There are still two issues with the command line :
> 
> * When I use ./qemu* -device virtio-blk -device virtio-pci
>   It is said that no virtio-bus are present.
> * The virtio-blk is plugged in the last created virtio-bus if no "bus="
>   option is present. It's an issue as we can only plug one virtio-device.
> 
> The first problem is a more general issue as it is the case for the SCSI bus 
> and
> can be fixed later.

Thanks for sharing virtio refactoring progress.

I think the challenge will be truly converting existing code over to the
new approach.  This RFC series adds a new layer on top of the existing
code but doesn't actually replace it.

Would be interesting to see the complete picture, even if you need to
leave some TODOs in the middle when sending RFC patches.

Stefan



[Qemu-devel] [PATCH 0/1] [PULL] qemu-kvm.git uq/master queue

2012-11-23 Thread Marcelo Tosatti
The following changes since commit 1ccbc2851282564308f790753d7158487b6af8e2:

  qemu-sockets: Fix parsing of the inet option 'to'. (2012-11-21 12:07:59 +0400)

are available in the git repository at:
  git://git.kernel.org/pub/scm/virt/kvm/qemu-kvm.git uq/master

Bruce Rogers (1):
  Legacy qemu-kvm options have no argument

 qemu-options.hx |8 
 1 files changed, 4 insertions(+), 4 deletions(-)



[Qemu-devel] [PATCH 1/1] Legacy qemu-kvm options have no argument

2012-11-23 Thread Marcelo Tosatti
From: Bruce Rogers 

The options no-kvm, no-kvm-pit, no-kvm-pit-reinjection, and no-kvm-irqchip
should be marked as having no argument.

Signed-off-by: Bruce Rogers 
Reviewed-by: Jan Kiszka 
Signed-off-by: Marcelo Tosatti 
---
 qemu-options.hx |8 
 1 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/qemu-options.hx b/qemu-options.hx
index 9bb29d3..fbcf079 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -2918,17 +2918,17 @@ Enable FIPS 140-2 compliance mode.
 ETEXI
 
 HXCOMM Deprecated by -machine accel=tcg property
-DEF("no-kvm", HAS_ARG, QEMU_OPTION_no_kvm, "", QEMU_ARCH_I386)
+DEF("no-kvm", 0, QEMU_OPTION_no_kvm, "", QEMU_ARCH_I386)
 
 HXCOMM Deprecated by kvm-pit driver properties
-DEF("no-kvm-pit-reinjection", HAS_ARG, QEMU_OPTION_no_kvm_pit_reinjection,
+DEF("no-kvm-pit-reinjection", 0, QEMU_OPTION_no_kvm_pit_reinjection,
 "", QEMU_ARCH_I386)
 
 HXCOMM Deprecated (ignored)
-DEF("no-kvm-pit", HAS_ARG, QEMU_OPTION_no_kvm_pit, "", QEMU_ARCH_I386)
+DEF("no-kvm-pit", 0, QEMU_OPTION_no_kvm_pit, "", QEMU_ARCH_I386)
 
 HXCOMM Deprecated by -machine kernel_irqchip=on|off property
-DEF("no-kvm-irqchip", HAS_ARG, QEMU_OPTION_no_kvm_irqchip, "", QEMU_ARCH_I386)
+DEF("no-kvm-irqchip", 0, QEMU_OPTION_no_kvm_irqchip, "", QEMU_ARCH_I386)
 
 HXCOMM Deprecated (ignored)
 DEF("tdf", 0, QEMU_OPTION_tdf,"", QEMU_ARCH_ALL)
-- 
1.7.6.4




Re: [Qemu-devel] [Qemu-ppc] [PATCH 12/12] pseries: Generate unique LIOBNs for PCI host bridges

2012-11-23 Thread David Gibson
On Fri, Nov 23, 2012 at 12:53:23PM +0200, Michael S. Tsirkin wrote:
> On Fri, Nov 23, 2012 at 03:13:07PM +1100, David Gibson wrote:
> > On Thu, Nov 22, 2012 at 09:23:03AM +0200, Michael S. Tsirkin wrote:
> > > On Thu, Nov 22, 2012 at 01:27:18PM +1100, David Gibson wrote:
> > > > On Wed, Nov 21, 2012 at 05:27:37PM +0200, Michael S. Tsirkin wrote:
> > > > > On Wed, Nov 21, 2012 at 02:27:08PM +0100, Alexander Graf wrote:
> > > > > > On 11/21/2012 02:21 PM, David Gibson wrote:
> > > > > > >On Wed, Nov 21, 2012 at 03:13:39PM +0200, Michael S. Tsirkin wrote:
> > > > > > >>On Wed, Nov 21, 2012 at 11:36:00PM +1100, David Gibson wrote:
> > > > > > >>>On Wed, Nov 21, 2012 at 01:34:48PM +0200, Michael S. Tsirkin 
> > > > > > >>>wrote:
> > > > > > On Wed, Nov 21, 2012 at 11:57:05AM +1100, David Gibson wrote:
> > > > > > >On Tue, Nov 20, 2012 at 02:26:09PM +0200, Michael S. Tsirkin 
> > > > > > >wrote:
> > > > > > >>On Tue, Nov 20, 2012 at 10:27:11AM +0100, Alexander Graf 
> > > > > > >>wrote:
> > > > > > >>>On 19.11.2012, at 23:51, David Gibson wrote:
> > > > > > >>>
> > > > > > On Mon, Nov 19, 2012 at 05:34:12PM +0100, Alexander Graf 
> > > > > > wrote:
> > > > > > >On 13.11.2012, at 03:47, David Gibson wrote:
> > > > > > >
> > > > > > >>From: Alexey Kardashevskiy
> > > > > > >>
> > > > > > >>In future (with VFIO) we will have multiple PCI host 
> > > > > > >>bridges on
> > > > > > >>pseries.  Each one needs a unique LIOBN (IOMMU id).  At 
> > > > > > >>the moment we
> > > > > > >>derive these from the pci domain number, but the whole 
> > > > > > >>notion of
> > > > > > >>domain numbers on the qemu side is bogus and in any case 
> > > > > > >>they're not
> > > > > > >>actually uniquely allocated at this point.
> > > > > > >>
> > > > > > >>This patch, therefore uses a simple sequence counter to 
> > > > > > >>generate
> > > > > > >>unique LIOBNs for PCI host bridges.
> > > > > > >>
> > > > > > >>Signed-off-by: Alexey Kardashevskiy
> > > > > > >>Signed-off-by: David Gibson
> > > > > > >I don't really like the idea of having a global variable 
> > > > > > >just
> > > > > > >because our domain ID generation seems to not work as
> > > > > > >expected. Michael, any comments here?
> > > > > > Well, the patch I sent which changed domain id generation 
> > > > > > was
> > > > > > ignored.  In any case, as I said, the whole concept of 
> > > > > > domain numbers
> > > > > > >>>Michael?
> > > > > > >>This is user visible, right?
> > > > > > >>So IMHO we should have the user specify LIOBN through a 
> > > > > > >>property,
> > > > > > >>rather than assign what's essentially a random value.
> > > > > > >Well, I can implement an override through a property, which 
> > > > > > >could be
> > > > > > >useful in some circumstances.  But we still need to have qemu 
> > > > > > >generate
> > > > > > >unique defaults, rather than forcing it to be specified in 
> > > > > > >every case.
> > > > > > I don't see why.
> > > > > > And if you want automatic defaults then they need to be 
> > > > > > generated in a
> > > > > > way that does not depend on implementation detail such as order 
> > > > > > of
> > > > > > device initialization.
> > > > > > >>>Because requiring explicit unique liobns to be supplied whenever 
> > > > > > >>>there
> > > > > > >>>is more than one PHB is horrible for usability.
> > > > > > >>We should make simple things simple and complex things possible.
> > > > > > >>More than one PHB seems like an advanced feature
> > > > > > >Not for pseries.  On real hardware of this type, dozens of PHBs is
> > > > > > >routine.  Plus, vfio passthrough is coming, we need at minimum one 
> > > > > > >PHB
> > > > > > >for emulated devices and one for passthrough devices.
> > > > > > 
> > > > > > Yeah, I second Davids opinion here. We need to make this easy for 
> > > > > > users.
> > > > > 
> > > > > I think users don't normally create PHBs. They request a disk, a 
> > > > > network
> > > > > device, a pass-through device ... In this case if this requires
> > > > > more PHBs create them internally but I imagine this doesn't
> > > > > require any allocation scheme - simply set some address
> > > > > for virtual devices, some other one for assigned devices ... no?
> > > > 
> > > > No.  One PHB for passthrough and one for emulated is the minimum.
> > > > Since we don't emulated p2p bridges,
> > > 
> > > Actually qemu does emulate p2p bridges.
> > > 
> > > > each PHB can only support a small
> > > > number of PCI devices, so if enough PCI devices are requested, we will
> > > > still need to create - and assign numbers to - additional PHBs.
> > > 
> > > Each PHB can support up to 32 slots right? This seems amp

[Qemu-devel] qemu-xen update to QEMU 1.3.0-rc0

2012-11-23 Thread Stefano Stabellini
Hi all,
I have just updated the qemu-xen tree (qemu-upstream-unstable.git) that
we use with xen-unstable for QEMU development to QEMU 1.3.0-rc0 (plus a
couple of commits).

After the updated qemu-xen tree manages to pass the required automated
tests, I'll send a patch to flip the switch in libxl to set qemu-xen as
the *default device model*.

I'll be tracking the QEMU 1.3 RCs very closely from now on and I am aiming at
releasing QEMU 1.3 as the device emulator of choice for Xen 4.3.

Cheers,

Stefano



Re: [Qemu-devel] [PATCH] S390: Basic CPU model support

2012-11-23 Thread Alexander Graf

On 23.11.2012, at 13:36, Jens Freimann wrote:

> From: Viktor Mihajlovski 
> 
> This enables qemu -cpu ? to return the list of supported CPU models
> on s390. Since only the host model is supported at this point in time
> this is pretty straight-forward. Further, a validity check for the
> requested CPU model was added.
> This change is needed to allow libvirt exploiters (like OpenStack)
> to specify a CPU model.
> 
> Signed-off-by: Viktor Mihajlovski 
> Signed-off-by: Jens Freimann 
> Reviewed-by: Christian Borntraeger 
> ---
> 
> Added copyright statement changes for s390-virtio.c, equivalent to the 
> changes we made to kvm.c recently.  
> 
> hw/s390-virtio.c   | 11 ++-
> target-s390x/cpu.c |  6 ++
> target-s390x/cpu.h |  3 +++
> 3 files changed, 19 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/s390-virtio.c b/hw/s390-virtio.c
> index 685cb54..bb85de7 100644
> --- a/hw/s390-virtio.c
> +++ b/hw/s390-virtio.c
> @@ -2,6 +2,7 @@
>  * QEMU S390 virtio target
>  *
>  * Copyright (c) 2009 Alexander Graf 
> + * Copyright IBM Corp 2012
>  *
>  * This library is free software; you can redistribute it and/or
>  * modify it under the terms of the GNU Lesser General Public
> @@ -13,7 +14,10 @@
>  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>  * Lesser General Public License for more details.
>  *
> - * You should have received a copy of the GNU Lesser General Public
> + * Contributions after 2012-10-29 are licensed under the terms of the
> + * GNU GPL, version 2 or (at your option) any later version.
> + *
> + * You should have received a copy of the GNU (Lesser) General Public
>  * License along with this library; if not, see 
> .
>  */
> 
> @@ -209,6 +213,11 @@ static void s390_init(QEMUMachineInitArgs *args)
> cpu_model = "host";
> }
> 
> +if (strcmp(cpu_model, "host")) {
> +fprintf(stderr, "S390 only supports host CPU model\n");

This is not true. KVM only supports -cpu "host". TCG only supports -cpu z9 (or 
so - the very first 64-bit CPU, whatever that one was called). We should set 
the default depending on which mode we're in.


Alex

> +exit(1);
> +}
> +
> ipi_states = g_malloc(sizeof(S390CPU *) * smp_cpus);
> 
> for (i = 0; i < smp_cpus; i++) {
> diff --git a/target-s390x/cpu.c b/target-s390x/cpu.c
> index 619b202..03fdc31 100644
> --- a/target-s390x/cpu.c
> +++ b/target-s390x/cpu.c
> @@ -25,6 +25,12 @@
> #include "qemu-timer.h"
> 
> 
> +/* generate CPU information for cpu -? */
> +void s390_cpu_list(FILE *f, fprintf_function cpu_fprintf)
> +{
> +(*cpu_fprintf)(f, "s390 %16s\n", "[host]");
> +}
> +
> /* CPUClass::reset() */
> static void s390_cpu_reset(CPUState *s)
> {
> diff --git a/target-s390x/cpu.h b/target-s390x/cpu.h
> index 0f9a1f7..3513976 100644
> --- a/target-s390x/cpu.h
> +++ b/target-s390x/cpu.h
> @@ -350,6 +350,9 @@ static inline void cpu_set_tls(CPUS390XState *env, 
> target_ulong newtls)
> #define cpu_gen_code cpu_s390x_gen_code
> #define cpu_signal_handler cpu_s390x_signal_handler
> 
> +void s390_cpu_list(FILE *f, fprintf_function cpu_fprintf);
> +#define cpu_list s390_cpu_list
> +
> #include "exec-all.h"
> 
> #ifdef CONFIG_USER_ONLY
> -- 
> 1.7.12.4
> 




Re: [Qemu-devel] [PATCH 2/3] s390: clear registers, psw and prefix at vcpu reset

2012-11-23 Thread Alexander Graf

On 23.11.2012, at 11:18, Jens Freimann wrote:

> When resetting vcpus on s390/kvm we have to clear registers, psw
> and prefix as described in the z/Architecture PoP, otherwise a
> reboot won't work. IPL PSW and prefix are set later on by the
> s390-ipl device reset code.
> 
> Signed-off-by: Jens Freimann 
> ---
> target-s390x/kvm.c | 26 +-
> 1 file changed, 25 insertions(+), 1 deletion(-)
> 
> diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c
> index 94de764..b1b791e 100644
> --- a/target-s390x/kvm.c
> +++ b/target-s390x/kvm.c

This needs to go into generic vcpu reset code.


Alex

> @@ -85,7 +85,31 @@ int kvm_arch_init_vcpu(CPUS390XState *env)
> 
> void kvm_arch_reset_vcpu(CPUS390XState *env)
> {
> -/* FIXME: add code to reset vcpu. */
> +int i;
> +
> +/* The initial reset call is needed here to reset in-kernel
> + * vcpu data that we can't access directly from QEMU. Before
> + * this ioctl cpu_synchronize_state() is called in common kvm
> + * code (kvm-all). What remains is clearing registers and psw
> + * in QEMU cpu state */
> +if (kvm_vcpu_ioctl(env, KVM_S390_INITIAL_RESET, NULL)) {
> +perror("Can't reset vcpu\n");
> +}
> +env->halted = 1;
> +env->exception_index = EXCP_HLT;
> +for (i = 0; i < 16; i++) {
> +env->regs[i] = 0;
> +env->aregs[i] = 0;
> +env->cregs[i] = 0;
> +env->fregs[i].ll = 0;
> +}
> +/* architectured initial values for CR 0 and 14 */
> +env->cregs[0] = 0xE0UL;
> +env->cregs[14] = 0xC200UL;
> +env->fpc = 0;
> +env->psw.mask = 0;
> +env->psw.addr = 0;
> +env->psa = 0;
> }
> 
> int kvm_arch_put_registers(CPUS390XState *env, int level)
> -- 
> 1.7.12.4
> 




Re: [Qemu-devel] [Qemu-ppc] [PATCH 12/12] pseries: Generate unique LIOBNs for PCI host bridges

2012-11-23 Thread Michael S. Tsirkin
On Fri, Nov 23, 2012 at 11:59:51PM +1100, David Gibson wrote:
> > Look, even if solution using a required property is less elegant for CLI
> > use, it will work, won't it?
> > So how about we merge it so that things work, and then we can discuss a
> > patch on top that auto-generates this property?
> 
> Well, there you have a point.  And actually I've realised there are
> other things we need to assign uniquely for each PHB and don't yet (IO
> window addresses).  So I need to look at a wider rework of this, which
> I'll start on next week.

Fine. Basically my point is it's typically a mistake to
make some userspace visible parameter depend on order
of initialization of devices in qemu. I don't insist
on making users fully specify such parameters but it
is one way to do this.


> -- 
> David Gibson  | I'll have my music baroque, and my code
> david AT gibson.dropbear.id.au| minimalist, thank you.  NOT _the_ 
> _other_
>   | _way_ _around_!
> http://www.ozlabs.org/~dgibson



Re: [Qemu-devel] [PATCH 3/3] sclp: Fix uninitialized var in handle_write_event_buf().

2012-11-23 Thread Alexander Graf

On 23.11.2012, at 11:18, Jens Freimann wrote:

> From: Cornelia Huck 
> 
> Some gcc versions rightly complain about a possibly unitialized rc,
> so let's move setting it before the QTAILQ_FOREACH().
> 
> Signed-off-by: Cornelia Huck 
> Signed-off-by: Christian Borntraeger 
> Signed-off-by: Jens Freimann 

Thanks, applied to s390-next.


Alex




Re: [Qemu-devel] [Qemu-ppc] [PATCH 12/12] pseries: Generate unique LIOBNs for PCI host bridges

2012-11-23 Thread Alexander Graf

On 23.11.2012, at 14:44, Michael S. Tsirkin wrote:

> On Fri, Nov 23, 2012 at 11:59:51PM +1100, David Gibson wrote:
>>> Look, even if solution using a required property is less elegant for CLI
>>> use, it will work, won't it?
>>> So how about we merge it so that things work, and then we can discuss a
>>> patch on top that auto-generates this property?
>> 
>> Well, there you have a point.  And actually I've realised there are
>> other things we need to assign uniquely for each PHB and don't yet (IO
>> window addresses).  So I need to look at a wider rework of this, which
>> I'll start on next week.
> 
> Fine. Basically my point is it's typically a mistake to
> make some userspace visible parameter depend on order
> of initialization of devices in qemu. I don't insist
> on making users fully specify such parameters but it
> is one way to do this.

I think it's reasonable to require to be able to specify it. If you don't, it's 
fine to base on device order IMHO. But you have to have the ability to specify 
it by hand if your management tool of choice actually wants reproducible 
results.


Alex




Re: [Qemu-devel] [Qemu-ppc] [PATCH 12/12] pseries: Generate unique LIOBNs for PCI host bridges

2012-11-23 Thread Michael S. Tsirkin
On Fri, Nov 23, 2012 at 02:44:15PM +0100, Alexander Graf wrote:
> 
> On 23.11.2012, at 14:44, Michael S. Tsirkin wrote:
> 
> > On Fri, Nov 23, 2012 at 11:59:51PM +1100, David Gibson wrote:
> >>> Look, even if solution using a required property is less elegant for CLI
> >>> use, it will work, won't it?
> >>> So how about we merge it so that things work, and then we can discuss a
> >>> patch on top that auto-generates this property?
> >> 
> >> Well, there you have a point.  And actually I've realised there are
> >> other things we need to assign uniquely for each PHB and don't yet (IO
> >> window addresses).  So I need to look at a wider rework of this, which
> >> I'll start on next week.
> > 
> > Fine. Basically my point is it's typically a mistake to
> > make some userspace visible parameter depend on order
> > of initialization of devices in qemu. I don't insist
> > on making users fully specify such parameters but it
> > is one way to do this.
> 
> I think it's reasonable to require to be able to specify it. If you
> don't, it's fine to base on device order IMHO.

Let me clarify why it's not fine.  My understanding is these addresses
do not change across reboots on real hardware.  On the other hand order
of initialization can change because of internal changes in qemu;
thus shut down and start guest under another qemu version
changes addresses.

So it's a bug. No?

> But you have to have the ability to specify it by hand if your
> management tool of choice actually wants reproducible results.
> 
> 
> Alex

How do you know whether your guest relies on reproducible results?
You typically don't. Imagine a guest which does rely on this. Then:

What I propose: user runs qemu with many PHBs but with no iobns, gets error
'initialization failed. please add iobn= where  is a unique
 number 1 to 64K.'

What you propose: user runs qemu with many PHBs but with no iobns,
guest fails to boot or behaves incorrectly.

I think as a user I prefer the first type of failure. No?

-- 
MST



Re: [Qemu-devel] [Qemu-ppc] [PATCH 12/12] pseries: Generate unique LIOBNs for PCI host bridges

2012-11-23 Thread Alexander Graf

On 23.11.2012, at 15:01, Michael S. Tsirkin wrote:

> On Fri, Nov 23, 2012 at 02:44:15PM +0100, Alexander Graf wrote:
>> 
>> On 23.11.2012, at 14:44, Michael S. Tsirkin wrote:
>> 
>>> On Fri, Nov 23, 2012 at 11:59:51PM +1100, David Gibson wrote:
> Look, even if solution using a required property is less elegant for CLI
> use, it will work, won't it?
> So how about we merge it so that things work, and then we can discuss a
> patch on top that auto-generates this property?
 
 Well, there you have a point.  And actually I've realised there are
 other things we need to assign uniquely for each PHB and don't yet (IO
 window addresses).  So I need to look at a wider rework of this, which
 I'll start on next week.
>>> 
>>> Fine. Basically my point is it's typically a mistake to
>>> make some userspace visible parameter depend on order
>>> of initialization of devices in qemu. I don't insist
>>> on making users fully specify such parameters but it
>>> is one way to do this.
>> 
>> I think it's reasonable to require to be able to specify it. If you
>> don't, it's fine to base on device order IMHO.
> 
> Let me clarify why it's not fine.  My understanding is these addresses
> do not change across reboots on real hardware.  On the other hand order
> of initialization can change because of internal changes in qemu;
> thus shut down and start guest under another qemu version
> changes addresses.
> 
> So it's a bug. No?
> 
>> But you have to have the ability to specify it by hand if your
>> management tool of choice actually wants reproducible results.
>> 
>> 
>> Alex
> 
> How do you know whether your guest relies on reproducible results?
> You typically don't. Imagine a guest which does rely on this. Then:
> 
> What I propose: user runs qemu with many PHBs but with no iobns, gets error
> 'initialization failed. please add iobn= where  is a unique
> number 1 to 64K.'
> 
> What you propose: user runs qemu with many PHBs but with no iobns,
> guest fails to boot or behaves incorrectly.
> 
> I think as a user I prefer the first type of failure. No?

I tend to disagree. The device creation logic should stay identical with the 
same versioned machine. So if I use -M pc-0.12, I am guaranteed that QEMU's 
internal semantics on which device goes where does not change.

For unstable machine types (which -M pseries is, same as -M pc), we don't 
guarantee that the guest view stays stable across version updates FWIW. So if 
we want to give the user a stable view of the world, we would need to create a 
-M pseries-1.3 which then would always keep the same device creation order.

It's the same for x86, no?


Alex




Re: [Qemu-devel] qcow2: slow internal snapshot creation

2012-11-23 Thread Alexandre DERUMIER
performance is also reduced when snapshot exist. (like if they are no 
preallocated metadatas)


see initial git commit

http://git.qemu.org/?p=qemu.git;a=commit;h=a35e1c177debb01240243bd656caca302410d38c
"qcow2: Metadata preallocation

This introduces a qemu-img create option for qcow2 which allows the metadata to
be preallocated, i.e. clusters are reserved in the refcount table and L1/L2
tables, but no data is written to them. Metadata is quite small, so this
happens in almost no time.

Especially with qcow2 on virtio this helps to gain a bit of performance during
the initial writes. However, as soon as create a snapshot, we're back to the
normal slow speed, obviously. So this isn't the real fix, but kind of a cheat
while we're still having trouble with qcow2 on virtio."



- Mail original -

De: "Dietmar Maurer" 
À: qemu-devel@nongnu.org
Cc: "Kevin Wolf (kw...@redhat.com)" 
Envoyé: Vendredi 23 Novembre 2012 08:26:13
Objet: [Qemu-devel] qcow2: slow internal snapshot creation



qcow2 snapshot on newly created files are fast:

# qemu-img create -f qcow2 test.img 200G
# time qemu-img snapshot -c snap1 test.img
real 0m0.014s

but if metadata is allocated it gets very slow:

# qemu-img create -f qcow2 -o "preallocation=metadata" test.img 200G
# time qemu-img snapshot -c snap1 test.img
real 1m20.399s

but reading the metadata is also fast:

# time qemu-img check test.img
real 0m0.371s

So why is creating a new snapshot that slow – any ideas?



Re: [Qemu-devel] qcow2: slow internal snapshot creation

2012-11-23 Thread Kevin Wolf
Am 23.11.2012 15:03, schrieb Alexandre DERUMIER:
> performance is also reduced when snapshot exist. (like if they are no 
> preallocated metadatas)

Preallocation doesn't matter much these days.

> see initial git commit
> 
> http://git.qemu.org/?p=qemu.git;a=commit;h=a35e1c177debb01240243bd656caca302410d38c
> "qcow2: Metadata preallocation
> 
> This introduces a qemu-img create option for qcow2 which allows the metadata 
> to
> be preallocated, i.e. clusters are reserved in the refcount table and L1/L2
> tables, but no data is written to them. Metadata is quite small, so this
> happens in almost no time.
> 
> Especially with qcow2 on virtio this helps to gain a bit of performance during
> the initial writes. However, as soon as create a snapshot, we're back to the
> normal slow speed, obviously. So this isn't the real fix, but kind of a cheat
> while we're still having trouble with qcow2 on virtio."

This is a commit message from 2009 that already says that this wasn't
the final solution.

Kevin



Re: [Qemu-devel] [PATCH] overflow of int ret: use ssize_t for ret

2012-11-23 Thread Stefan Hajnoczi
On Thu, Nov 22, 2012 at 10:07 AM, Stefan Priebe  wrote:
> diff --git a/block/rbd.c b/block/rbd.c
> index 5a0f79f..0384c6c 100644
> --- a/block/rbd.c
> +++ b/block/rbd.c
> @@ -69,7 +69,7 @@ typedef enum {
>  typedef struct RBDAIOCB {
>  BlockDriverAIOCB common;
>  QEMUBH *bh;
> -int ret;
> +ssize_t ret;
>  QEMUIOVector *qiov;
>  char *bounce;
>  RBDAIOCmd cmd;
> @@ -86,7 +86,7 @@ typedef struct RADOSCB {
>  int done;
>  int64_t size;
>  char *buf;
> -int ret;
> +ssize_t ret;
>  } RADOSCB;
>
>  #define RBD_FD_READ 0

I preferred your previous patch:

ssize_t on 32-bit hosts has sizeof(ssize_t) == 4.  In
qemu_rbd_complete_aio() we may assign acb->ret = rcb->size.  Here the
size field is int64_t, so ssize_t ret would truncate the value.

But BlockDriverCompetionFunc only takes an int argument so we're back
to square one.

The types are busted and changing the type of ret won't fix that :(.

Stefan



Re: [Qemu-devel] [RFC PATCH v2 1/3] virtio-bus : Introduce VirtioBus.

2012-11-23 Thread Konrad Frederic

On 23/11/2012 13:08, Cornelia Huck wrote:

On Thu, 22 Nov 2012 15:50:50 +0100
fred.kon...@greensocs.com wrote:



+/* Create a virtio bus.  */
+VirtioBus *virtio_bus_new(DeviceState *host, const VirtioBusInfo *info)
+{
+/*
+ * This is needed, as we want to have different names for each virtio-bus.
+ * If we don't do that, we can't add more than one VirtIODevice.
+ */
+static int next_virtio_bus;
+char *bus_name = g_strdup_printf("virtio-bus.%d", next_virtio_bus++);

This still has the overflow/id-reuse problem, hasn't it?

What do you mean by overflow problem ?




+
+BusState *qbus = qbus_create(TYPE_VIRTIO_BUS, host, bus_name);
+VirtioBus *bus = VIRTIO_BUS(qbus);
+bus->info = info;
+qbus->allow_hotplug = 0;
+bus->bus_in_use = false;
+DPRINTF("%s bus created\n", bus_name);
+return bus;
+}

Don't you need a way to destroy the bus again when the proxy device is
hotunplugged?

Yes, you're right I must add a way to destroy the bus, and the devices 
when the proxy is hot unplugged!


Thanks,

Fred



Re: [Qemu-devel] [PATCH] overflow of int ret: use ssize_t for ret

2012-11-23 Thread Peter Maydell
On 23 November 2012 14:11, Stefan Hajnoczi  wrote:
> On Thu, Nov 22, 2012 at 10:07 AM, Stefan Priebe  wrote:
>> diff --git a/block/rbd.c b/block/rbd.c
>> index 5a0f79f..0384c6c 100644
>> --- a/block/rbd.c
>> +++ b/block/rbd.c
>> @@ -69,7 +69,7 @@ typedef enum {
>>  typedef struct RBDAIOCB {
>>  BlockDriverAIOCB common;
>>  QEMUBH *bh;
>> -int ret;
>> +ssize_t ret;
>>  QEMUIOVector *qiov;
>>  char *bounce;
>>  RBDAIOCmd cmd;
>> @@ -86,7 +86,7 @@ typedef struct RADOSCB {
>>  int done;
>>  int64_t size;
>>  char *buf;
>> -int ret;
>> +ssize_t ret;
>>  } RADOSCB;
>>
>>  #define RBD_FD_READ 0
>
> I preferred your previous patch:
>
> ssize_t on 32-bit hosts has sizeof(ssize_t) == 4.  In
> qemu_rbd_complete_aio() we may assign acb->ret = rcb->size.  Here the
> size field is int64_t, so ssize_t ret would truncate the value.

The rcb size field should be a size_t: it is used for calling
rbd_aio_write and rbd_aio_read so if we've overflowed 32 bits
then we've already got a problem there.

-- PMM



Re: [Qemu-devel] [Qemu-ppc] [PATCH 12/12] pseries: Generate unique LIOBNs for PCI host bridges

2012-11-23 Thread Michael S. Tsirkin
On Fri, Nov 23, 2012 at 03:03:24PM +0100, Alexander Graf wrote:
> 
> On 23.11.2012, at 15:01, Michael S. Tsirkin wrote:
> 
> > On Fri, Nov 23, 2012 at 02:44:15PM +0100, Alexander Graf wrote:
> >> 
> >> On 23.11.2012, at 14:44, Michael S. Tsirkin wrote:
> >> 
> >>> On Fri, Nov 23, 2012 at 11:59:51PM +1100, David Gibson wrote:
> > Look, even if solution using a required property is less elegant for CLI
> > use, it will work, won't it?
> > So how about we merge it so that things work, and then we can discuss a
> > patch on top that auto-generates this property?
>  
>  Well, there you have a point.  And actually I've realised there are
>  other things we need to assign uniquely for each PHB and don't yet (IO
>  window addresses).  So I need to look at a wider rework of this, which
>  I'll start on next week.
> >>> 
> >>> Fine. Basically my point is it's typically a mistake to
> >>> make some userspace visible parameter depend on order
> >>> of initialization of devices in qemu. I don't insist
> >>> on making users fully specify such parameters but it
> >>> is one way to do this.
> >> 
> >> I think it's reasonable to require to be able to specify it. If you
> >> don't, it's fine to base on device order IMHO.
> > 
> > Let me clarify why it's not fine.  My understanding is these addresses
> > do not change across reboots on real hardware.  On the other hand order
> > of initialization can change because of internal changes in qemu;
> > thus shut down and start guest under another qemu version
> > changes addresses.
> > 
> > So it's a bug. No?
> > 
> >> But you have to have the ability to specify it by hand if your
> >> management tool of choice actually wants reproducible results.
> >> 
> >> 
> >> Alex
> > 
> > How do you know whether your guest relies on reproducible results?
> > You typically don't. Imagine a guest which does rely on this. Then:
> > 
> > What I propose: user runs qemu with many PHBs but with no iobns, gets error
> > 'initialization failed. please add iobn= where  is a unique
> > number 1 to 64K.'
> > 
> > What you propose: user runs qemu with many PHBs but with no iobns,
> > guest fails to boot or behaves incorrectly.
> > 
> > I think as a user I prefer the first type of failure. No?
> 
> I tend to disagree. The device creation logic should stay identical
> with the same versioned machine. So if I use -M pc-0.12, I am
> guaranteed that QEMU's internal semantics on which device goes where
> does not change.

This is exactly why you should not rely on device initialization
order for your address allocator - it is not guaranteed to
be consistent.

> For unstable machine types (which -M pseries is, same as -M pc), we
> don't guarantee that the guest view stays stable across version
> updates FWIW. So if we want to give the user a stable view of the
> world, we would need to create a -M pseries-1.3 which then would
> always keep the same device creation order.
> 
> It's the same for x86, no?
> 
> 
> Alex

Same for x86 in that we broke guests in the past by changing order,
and the lesson is to always require full specification if possible.
Only reason we keep pci slot allocation around is for
backward compatibility.

-- 
MST



Re: [Qemu-devel] qcow2: slow internal snapshot creation

2012-11-23 Thread Alexandre DERUMIER
>>Preallocation doesn't matter much these days.

I have done benchmark through nfs on a netapp san, (with directio),

I got 300 iops without preallocation, and 6000 iops with preallocation 



- Mail original -

De: "Kevin Wolf" 
À: "Alexandre DERUMIER" 
Cc: "Dietmar Maurer" , qemu-devel@nongnu.org
Envoyé: Vendredi 23 Novembre 2012 15:09:15
Objet: Re: [Qemu-devel] qcow2: slow internal snapshot creation

Am 23.11.2012 15:03, schrieb Alexandre DERUMIER:
> performance is also reduced when snapshot exist. (like if they are no 
> preallocated metadatas)

Preallocation doesn't matter much these days.

> see initial git commit
>
> http://git.qemu.org/?p=qemu.git;a=commit;h=a35e1c177debb01240243bd656caca302410d38c
> "qcow2: Metadata preallocation
>
> This introduces a qemu-img create option for qcow2 which allows the metadata 
> to
> be preallocated, i.e. clusters are reserved in the refcount table and L1/L2
> tables, but no data is written to them. Metadata is quite small, so this 
> happens in almost no time.
>
> Especially with qcow2 on virtio this helps to gain a bit of performance during
> the initial writes. However, as soon as create a snapshot, we're back to the
> normal slow speed, obviously. So this isn't the real fix, but kind of a cheat
> while we're still having trouble with qcow2 on virtio."

This is a commit message from 2009 that already says that this wasn't
the final solution.

Kevin



Re: [Qemu-devel] [PATCH 0/2] Followup patches regarding block default interface

2012-11-23 Thread Stefan Hajnoczi
On Thu, Nov 22, 2012 at 9:02 PM, Christian Borntraeger
 wrote:
> here are two followup patches.
>
>
> Christian Borntraeger (2):
>   block: simply default_drive
>   block: clarify comment about IF_IDE = 0
>
>  blockdev.h |6 +-
>  vl.c   |   20 ++--
>  2 files changed, 11 insertions(+), 15 deletions(-)

Acked-by: Stefan Hajnoczi 

Looks good but I'd like to give Markus a chance to review this.

Stefan



Re: [Qemu-devel] [RFC PATCH v2 1/3] virtio-bus : Introduce VirtioBus.

2012-11-23 Thread Konrad Frederic

On 23/11/2012 13:23, Stefan Hajnoczi wrote:

On Thu, Nov 22, 2012 at 03:50:50PM +0100, fred.kon...@greensocs.com wrote:

+/* Bind the VirtIODevice to the VirtioBus. */
+void virtio_bus_bind_device(VirtioBus *bus)
+{
+BusState *qbus = BUS(bus);
+assert(bus != NULL);
+assert(bus->vdev != NULL);
+virtio_bind_device(bus->vdev,&(bus->info->virtio_bindings), qbus->parent);
+}

Should plug and bind be together in a single function?  Binding the
device seems like an internal step that the bus takes when you plug the
device.
Maybe we can, but we must init the "transport" device before bind the 
device.





+struct VirtioBusInfo {

This is defining an ad-hoc interface.  QOM has support for interfaces so
that a virtio-pci adapter brovides a VirtioBindingInterface which
VirtioBus can talk to intead of using VirtioBusInfo.
Can you point me to example in the tree to see how this QOM interfaces 
work ?



+void (*init_cb)(DeviceState *dev);
+void (*exit_cb)(DeviceState *dev);

Can _cb be dropped from the name?  Structs with function pointers always
provide "callbacks" so the _cb is unnecessary.  For example, QOM methods
like BusClass->reset() don't include _cb either.

Ok.

+VirtIOBindings virtio_bindings;
+};
+
+struct VirtioBus {
+BusState qbus;
+bool bus_in_use;

Should bus_in_use basically be bus->vdev != NULL?

Yes I can do that. :).

Thanks,

Fred



Re: [Qemu-devel] [RFC PATCH v2 2/3] virtio-pci : add a virtio-bus interface

2012-11-23 Thread Konrad Frederic

On 23/11/2012 13:34, Peter Maydell wrote:

On 23 November 2012 12:29, Stefan Hajnoczi  wrote:

On Thu, Nov 22, 2012 at 03:50:51PM +0100, fred.kon...@greensocs.com wrote:

+static const struct VirtioBusInfo virtio_bus_info = {
+.init_cb = virtio_pci_init_cb,
+.exit_cb = virtio_pci_exit_cb,
+
+.virtio_bindings = {

Eventually VirtIOBindings can probably be inlined into VirtioBusInfo.  I
don't see a need for separate structs.

I agree. It might (or might not) be convenient to retain it
temporarily while converting all the transports, but
VirtIOBindings is part of the old code which we're trying
to refactor here, and I'd expect it to go away when we're done.

-- PMM
Yes, for the moment, I didn't refactor this VirtIOBindings, so it is 
better to separate struct to keep the virtiodevice binding function.




Re: [Qemu-devel] [RFC PATCH v2 2/3] virtio-pci : add a virtio-bus interface

2012-11-23 Thread Peter Maydell
On 23 November 2012 14:23, Konrad Frederic  wrote:
> On 23/11/2012 13:34, Peter Maydell wrote:
>> On 23 November 2012 12:29, Stefan Hajnoczi  wrote:
>>> Eventually VirtIOBindings can probably be inlined into VirtioBusInfo.  I
>>> don't see a need for separate structs.
>>
>> I agree. It might (or might not) be convenient to retain it
>> temporarily while converting all the transports, but
>> VirtIOBindings is part of the old code which we're trying
>> to refactor here, and I'd expect it to go away when we're done.

> Yes, for the moment, I didn't refactor this VirtIOBindings, so it
> is better to separate struct to keep the virtiodevice binding function.

Where you're deliberately not changing something as a temporary
step you need to comment it to make that clear. Otherwise
people trying to review the code won't be able to tell...

-- PMM



Re: [Qemu-devel] [Qemu-ppc] [PATCH 12/12] pseries: Generate unique LIOBNs for PCI host bridges

2012-11-23 Thread Alexander Graf

On 23.11.2012, at 15:18, Michael S. Tsirkin wrote:

> On Fri, Nov 23, 2012 at 03:03:24PM +0100, Alexander Graf wrote:
>> 
>> On 23.11.2012, at 15:01, Michael S. Tsirkin wrote:
>> 
>>> On Fri, Nov 23, 2012 at 02:44:15PM +0100, Alexander Graf wrote:
 
 On 23.11.2012, at 14:44, Michael S. Tsirkin wrote:
 
> On Fri, Nov 23, 2012 at 11:59:51PM +1100, David Gibson wrote:
>>> Look, even if solution using a required property is less elegant for CLI
>>> use, it will work, won't it?
>>> So how about we merge it so that things work, and then we can discuss a
>>> patch on top that auto-generates this property?
>> 
>> Well, there you have a point.  And actually I've realised there are
>> other things we need to assign uniquely for each PHB and don't yet (IO
>> window addresses).  So I need to look at a wider rework of this, which
>> I'll start on next week.
> 
> Fine. Basically my point is it's typically a mistake to
> make some userspace visible parameter depend on order
> of initialization of devices in qemu. I don't insist
> on making users fully specify such parameters but it
> is one way to do this.
 
 I think it's reasonable to require to be able to specify it. If you
 don't, it's fine to base on device order IMHO.
>>> 
>>> Let me clarify why it's not fine.  My understanding is these addresses
>>> do not change across reboots on real hardware.  On the other hand order
>>> of initialization can change because of internal changes in qemu;
>>> thus shut down and start guest under another qemu version
>>> changes addresses.
>>> 
>>> So it's a bug. No?
>>> 
 But you have to have the ability to specify it by hand if your
 management tool of choice actually wants reproducible results.
 
 
 Alex
>>> 
>>> How do you know whether your guest relies on reproducible results?
>>> You typically don't. Imagine a guest which does rely on this. Then:
>>> 
>>> What I propose: user runs qemu with many PHBs but with no iobns, gets error
>>> 'initialization failed. please add iobn= where  is a unique
>>> number 1 to 64K.'
>>> 
>>> What you propose: user runs qemu with many PHBs but with no iobns,
>>> guest fails to boot or behaves incorrectly.
>>> 
>>> I think as a user I prefer the first type of failure. No?
>> 
>> I tend to disagree. The device creation logic should stay identical
>> with the same versioned machine. So if I use -M pc-0.12, I am
>> guaranteed that QEMU's internal semantics on which device goes where
>> does not change.
> 
> This is exactly why you should not rely on device initialization
> order for your address allocator - it is not guaranteed to
> be consistent.
> 
>> For unstable machine types (which -M pseries is, same as -M pc), we
>> don't guarantee that the guest view stays stable across version
>> updates FWIW. So if we want to give the user a stable view of the
>> world, we would need to create a -M pseries-1.3 which then would
>> always keep the same device creation order.
>> 
>> It's the same for x86, no?
>> 
>> 
>> Alex
> 
> Same for x86 in that we broke guests in the past by changing order,
> and the lesson is to always require full specification if possible.
> Only reason we keep pci slot allocation around is for
> backward compatibility.

Yeah, that's why I would expect libvirt for example to always pass in pci slot 
ids manually for example. But if you want a convenience QEMU command line, that 
is not guaranteed to be identical across different versions.

So IMHO it's fine to have a fuzzy non-consistent fallback as long as it's 
possible to specify the consistent variant. I guess that's a matter of taste 
really :).


Alex




Re: [Qemu-devel] [RFC PATCH v2 0/3] Virtio-refactoring.

2012-11-23 Thread Konrad Frederic

On 23/11/2012 13:38, Stefan Hajnoczi wrote:

On Thu, Nov 22, 2012 at 03:50:49PM +0100, fred.kon...@greensocs.com wrote:

From: KONRAD Frederic
I made the changes you suggest in the last RFC.

There are still two issues with the command line :

 * When I use ./qemu* -device virtio-blk -device virtio-pci
   It is said that no virtio-bus are present.
 * The virtio-blk is plugged in the last created virtio-bus if no "bus="
   option is present. It's an issue as we can only plug one virtio-device.

The first problem is a more general issue as it is the case for the SCSI bus and
can be fixed later.

Thanks for sharing virtio refactoring progress.

I think the challenge will be truly converting existing code over to the
new approach.  This RFC series adds a new layer on top of the existing
code but doesn't actually replace it.

Would be interesting to see the complete picture, even if you need to
leave some TODOs in the middle when sending RFC patches.

Stefan

Yes, sure.

So the next would be :
* use QOM interface in place of VirtioBusInfo ?
* refactor the VirtIODevice to remove VirtIOBinding ?

I though modifying the less I can the VirtIODevice as it could break all 
the s390 devices.


Fred



Re: [Qemu-devel] [PATCH 2/3] s390: clear registers, psw and prefix at vcpu reset

2012-11-23 Thread Christian Borntraeger
On 23/11/12 14:40, Alexander Graf wrote:
> 
> On 23.11.2012, at 11:18, Jens Freimann wrote:
> 
>> When resetting vcpus on s390/kvm we have to clear registers, psw
>> and prefix as described in the z/Architecture PoP, otherwise a
>> reboot won't work. IPL PSW and prefix are set later on by the
>> s390-ipl device reset code.
>>
>> Signed-off-by: Jens Freimann 
>> ---
>> target-s390x/kvm.c | 26 +-
>> 1 file changed, 25 insertions(+), 1 deletion(-)
>>
>> diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c
>> index 94de764..b1b791e 100644
>> --- a/target-s390x/kvm.c
>> +++ b/target-s390x/kvm.c
> 
> This needs to go into generic vcpu reset code.

The kvm ioctl certainly not, no? (definitely necessary for kernels
without sync regs).

I guess you are talking about moving the register initialisation 
into s390_cpu_reset (target-s390x/cpu.c). Right? Jens can you have
a look?

Christian




Re: [Qemu-devel] [RFC PATCH v2 2/3] virtio-pci : add a virtio-bus interface

2012-11-23 Thread Konrad Frederic

On 23/11/2012 15:26, Peter Maydell wrote:

On 23 November 2012 14:23, Konrad Frederic  wrote:

On 23/11/2012 13:34, Peter Maydell wrote:

On 23 November 2012 12:29, Stefan Hajnoczi   wrote:

Eventually VirtIOBindings can probably be inlined into VirtioBusInfo.  I
don't see a need for separate structs.

I agree. It might (or might not) be convenient to retain it
temporarily while converting all the transports, but
VirtIOBindings is part of the old code which we're trying
to refactor here, and I'd expect it to go away when we're done.

Yes, for the moment, I didn't refactor this VirtIOBindings, so it
is better to separate struct to keep the virtiodevice binding function.

Where you're deliberately not changing something as a temporary
step you need to comment it to make that clear. Otherwise
people trying to review the code won't be able to tell...

-- PMM

Ok, sorry for that.

Fred



Re: [Qemu-devel] [PATCH 2/3] s390: clear registers, psw and prefix at vcpu reset

2012-11-23 Thread Alexander Graf

On 23.11.2012, at 15:32, Christian Borntraeger wrote:

> On 23/11/12 14:40, Alexander Graf wrote:
>> 
>> On 23.11.2012, at 11:18, Jens Freimann wrote:
>> 
>>> When resetting vcpus on s390/kvm we have to clear registers, psw
>>> and prefix as described in the z/Architecture PoP, otherwise a
>>> reboot won't work. IPL PSW and prefix are set later on by the
>>> s390-ipl device reset code.
>>> 
>>> Signed-off-by: Jens Freimann 
>>> ---
>>> target-s390x/kvm.c | 26 +-
>>> 1 file changed, 25 insertions(+), 1 deletion(-)
>>> 
>>> diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c
>>> index 94de764..b1b791e 100644
>>> --- a/target-s390x/kvm.c
>>> +++ b/target-s390x/kvm.c
>> 
>> This needs to go into generic vcpu reset code.
> 
> The kvm ioctl certainly not, no? (definitely necessary for kernels
> without sync regs).

Yes, of course :).

> I guess you are talking about moving the register initialisation 
> into s390_cpu_reset (target-s390x/cpu.c). Right? Jens can you have
> a look?

Yup. This is just normal reset logic that needs to be in the normal CPU reset 
functions.


Alex




Re: [Qemu-devel] [RFC PATCH v2 1/3] virtio-bus : Introduce VirtioBus.

2012-11-23 Thread Cornelia Huck
On Fri, 23 Nov 2012 15:12:45 +0100
Konrad Frederic  wrote:

> On 23/11/2012 13:08, Cornelia Huck wrote:
> > On Thu, 22 Nov 2012 15:50:50 +0100
> > fred.kon...@greensocs.com wrote:
> >
> >
> >> +/* Create a virtio bus.  */
> >> +VirtioBus *virtio_bus_new(DeviceState *host, const VirtioBusInfo *info)
> >> +{
> >> +/*
> >> + * This is needed, as we want to have different names for each 
> >> virtio-bus.
> >> + * If we don't do that, we can't add more than one VirtIODevice.
> >> + */
> >> +static int next_virtio_bus;
> >> +char *bus_name = g_strdup_printf("virtio-bus.%d", next_virtio_bus++);
> > This still has the overflow/id-reuse problem, hasn't it?
> What do you mean by overflow problem ?

If you do a lot of hotplugs (and hotunplugs), at some point
next_virtio_bus will overflow and the code will start to create
virtio-bus.. It's a bit of a pathological case, but I
can see it happening on machines with lots of devices that change
rapidly (such as somebody running a test script doing
device_add/device_del).

Maybe use something like the ida stuff the virtio kernel code is using
(don't know whether something similar exists in qemu).

> 
> >
> >> +
> >> +BusState *qbus = qbus_create(TYPE_VIRTIO_BUS, host, bus_name);
> >> +VirtioBus *bus = VIRTIO_BUS(qbus);
> >> +bus->info = info;
> >> +qbus->allow_hotplug = 0;
> >> +bus->bus_in_use = false;
> >> +DPRINTF("%s bus created\n", bus_name);
> >> +return bus;
> >> +}
> > Don't you need a way to destroy the bus again when the proxy device is
> > hotunplugged?
> >
> Yes, you're right I must add a way to destroy the bus, and the devices 
> when the proxy is hot unplugged!
> 
> Thanks,
> 
> Fred
> 




Re: [Qemu-devel] [PATCH] overflow of int ret: use ssize_t for ret

2012-11-23 Thread Stefan Priebe - Profihost AG

Hi,

i'm not a ceph or inktank guy. I can't made any decision on what to 
change. At least right now you'll see failing I/O's in your guest, when 
you discard whole disks.


I could fix this for me with int64 and with ssize_t. So if i should 
resend another patch i need a concrete advise how to patch.


Thanks!

Greets,
Stefan

Am 23.11.2012 15:15, schrieb Peter Maydell:

On 23 November 2012 14:11, Stefan Hajnoczi  wrote:

On Thu, Nov 22, 2012 at 10:07 AM, Stefan Priebe  wrote:

diff --git a/block/rbd.c b/block/rbd.c
index 5a0f79f..0384c6c 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -69,7 +69,7 @@ typedef enum {
  typedef struct RBDAIOCB {
  BlockDriverAIOCB common;
  QEMUBH *bh;
-int ret;
+ssize_t ret;
  QEMUIOVector *qiov;
  char *bounce;
  RBDAIOCmd cmd;
@@ -86,7 +86,7 @@ typedef struct RADOSCB {
  int done;
  int64_t size;
  char *buf;
-int ret;
+ssize_t ret;
  } RADOSCB;

  #define RBD_FD_READ 0


I preferred your previous patch:

ssize_t on 32-bit hosts has sizeof(ssize_t) == 4.  In
qemu_rbd_complete_aio() we may assign acb->ret = rcb->size.  Here the
size field is int64_t, so ssize_t ret would truncate the value.


The rcb size field should be a size_t: it is used for calling
rbd_aio_write and rbd_aio_read so if we've overflowed 32 bits
then we've already got a problem there.

-- PMM
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html





Re: [Qemu-devel] qcow2: slow internal snapshot creation

2012-11-23 Thread Kevin Wolf
Am 23.11.2012 15:17, schrieb Alexandre DERUMIER:
>>> Preallocation doesn't matter much these days.
> 
> I have done benchmark through nfs on a netapp san, (with directio),
> 
> I got 300 iops without preallocation, and 6000 iops with preallocation 

This doesn't sound right, even for writethrough modes.

Can you post more details about your benchmark setup so that this can be
reproduced? And does it reproduce with local files as well or only using
NFS?

Kevin



[Qemu-devel] [PATCH 1.3] aio: avoid livelock behavior for Win32

2012-11-23 Thread Paolo Bonzini
The repeated calls to WaitForMultipleObjects may cause a livelock in aio_poll,
where no progress is made on bottom halves.  This patch matches the behavior
of the POSIX code.

Signed-off-by: Paolo Bonzini 
---
Found by the new unit tests.

 aio-win32.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/aio-win32.c b/aio-win32.c
index a84eb71..cec4646 100644
--- a/aio-win32.c
+++ b/aio-win32.c
@@ -173,7 +173,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
 }
 
 /* wait until next event */
-for (;;) {
+while (count > 0) {
 int timeout = blocking ? INFINITE : 0;
 int ret = WaitForMultipleObjects(count, events, FALSE, timeout);
 
@@ -209,6 +209,9 @@ bool aio_poll(AioContext *ctx, bool blocking)
 g_free(tmp);
 }
 }
+
+/* Try again, but only call each handler once.  */
+events[ret - WAIT_OBJECT_0] = events[--count];
 }
 
 return progress;
-- 
1.8.0




Re: [Qemu-devel] [PATCH 2/3] s390: clear registers, psw and prefix at vcpu reset

2012-11-23 Thread Peter Maydell
On 23 November 2012 10:18, Jens Freimann  wrote:
> +/* The initial reset call is needed here to reset in-kernel
> + * vcpu data that we can't access directly from QEMU.

If there's in-kernel vcpu data we can't access from QEMU,
doesn't this cause problems for migration?

> Before
> + * this ioctl cpu_synchronize_state() is called in common kvm
> + * code (kvm-all). What remains is clearing registers and psw
> + * in QEMU cpu state */
> +if (kvm_vcpu_ioctl(env, KVM_S390_INITIAL_RESET, NULL)) {
> +perror("Can't reset vcpu\n");
> +}

Doesn't this mean we're now out-of-sync again, since the
kernel state will be whatever INITIAL_RESET says it should
be but the QEMU state won't have been changed?

Generally, I think an arch-independent 'reset vcpu' ioctl
would be useful. At the moment for ARM you have to pull
everything out and back in again via the GET/SET_REG interface,
which works [you can keep all the state around to write back
on vcpu reset] but is ugly.

-- PMM



Re: [Qemu-devel] [PATCH 0/2] Followup patches regarding block default interface

2012-11-23 Thread Markus Armbruster
Christian Borntraeger  writes:

> here are two followup patches.

Thanks!

Stefan, PATCH 1/2 could be squashed into "Support default block
interfaces per QEMUMachine".  Your choice.

Reviewed-by: Markus Armbruster 



[Qemu-devel] [PATCH] qemu-timer: Don't use RDTSC on 386s and 486s

2012-11-23 Thread Peter Maydell
Adjust the conditional which guards the implementation of
cpu_get_real_ticks() via RDTSC, so that we don't try to use it
on x86 CPUs which don't implement RDTSC.  Instead we will fall
back to the no-cycle-counter-available default implementation.

Reported-by: Yurij Popov 
Signed-off-by: Peter Maydell 
---
 qemu-timer.h |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/qemu-timer.h b/qemu-timer.h
index da7e97c..e35f163 100644
--- a/qemu-timer.h
+++ b/qemu-timer.h
@@ -169,7 +169,7 @@ static inline int64_t cpu_get_real_ticks(void)
 return retval;
 }
 
-#elif defined(__i386__)
+#elif defined(__i586__)
 
 static inline int64_t cpu_get_real_ticks(void)
 {
-- 
1.7.9.5




Re: [Qemu-devel] [PATCH 2/3] s390: clear registers, psw and prefix at vcpu reset

2012-11-23 Thread Alexander Graf

On 23.11.2012, at 16:02, Peter Maydell wrote:

> On 23 November 2012 10:18, Jens Freimann  wrote:
>> +/* The initial reset call is needed here to reset in-kernel
>> + * vcpu data that we can't access directly from QEMU.
> 
> If there's in-kernel vcpu data we can't access from QEMU,
> doesn't this cause problems for migration?

This one is for older kernels. For newer kernels, we have sync regs and ONE_REG 
interfaces to everything we need FWIW :).

> 
>> Before
>> + * this ioctl cpu_synchronize_state() is called in common kvm
>> + * code (kvm-all). What remains is clearing registers and psw
>> + * in QEMU cpu state */
>> +if (kvm_vcpu_ioctl(env, KVM_S390_INITIAL_RESET, NULL)) {
>> +perror("Can't reset vcpu\n");
>> +}
> 
> Doesn't this mean we're now out-of-sync again, since the
> kernel state will be whatever INITIAL_RESET says it should
> be but the QEMU state won't have been changed?
> 
> Generally, I think an arch-independent 'reset vcpu' ioctl
> would be useful. At the moment for ARM you have to pull
> everything out and back in again via the GET/SET_REG interface,
> which works [you can keep all the state around to write back
> on vcpu reset] but is ugly.

Actually it's the way it's supposed to work. The reset call is just a hack, 
because it means we need to duplicate the reset logic for KVM & TCG. But it 
dates back to when KVM on s390 wasn't using QEMU.


Alex




[Qemu-devel] [PATCH 1.3? 0/2] AioContext and thread pool unit tests

2012-11-23 Thread Paolo Bonzini
As requested. :)  The tests pass on both Linux and Wine.

I'm not sure if it makes sense to include these in 1.3, but the idea
has some worth.

Paolo Bonzini (2):
  tests: add AioContext unit tests
  tests: add thread pool unit tests

 tests/Makefile   |   4 +
 tests/test-aio.c | 667 +++
 tests/test-thread-pool.c | 213 +++
 3 files changed, 884 insertions(+)
 create mode 100644 tests/test-aio.c
 create mode 100644 tests/test-thread-pool.c

-- 
1.8.0




[Qemu-devel] [PATCH 1.3? 2/2] tests: add thread pool unit tests

2012-11-23 Thread Paolo Bonzini
Signed-off-by: Paolo Bonzini 
---
 tests/Makefile   |   2 +
 tests/test-thread-pool.c | 213 +++
 2 files changed, 215 insertions(+)
 create mode 100644 tests/test-thread-pool.c

diff --git a/tests/Makefile b/tests/Makefile
index 61cbe3b..b60f0fb 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -16,6 +16,7 @@ check-unit-y += tests/test-coroutine$(EXESUF)
 check-unit-y += tests/test-visitor-serialization$(EXESUF)
 check-unit-y += tests/test-iov$(EXESUF)
 check-unit-y += tests/test-aio$(EXESUF)
+check-unit-y += tests/test-thread-pool$(EXESUF)
 
 check-block-$(CONFIG_POSIX) += tests/qemu-iotests-quick.sh
 
@@ -51,6 +52,7 @@ tests/check-qfloat$(EXESUF): tests/check-qfloat.o qfloat.o
 tests/check-qjson$(EXESUF): tests/check-qjson.o $(qobject-obj-y) qemu-tool.o
 tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(coroutine-obj-y) 
$(tools-obj-y) $(block-obj-y) iov.o libqemustub.a
 tests/test-aio$(EXESUF): tests/test-aio.o $(coroutine-obj-y) $(tools-obj-y) 
$(block-obj-y) libqemustub.a
+tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(coroutine-obj-y) 
$(tools-obj-y) $(block-obj-y) libqemustub.a
 tests/test-iov$(EXESUF): tests/test-iov.o iov.o
 
 tests/test-qapi-types.c tests/test-qapi-types.h :\
diff --git a/tests/test-thread-pool.c b/tests/test-thread-pool.c
new file mode 100644
index 000..fd02f38
--- /dev/null
+++ b/tests/test-thread-pool.c
@@ -0,0 +1,213 @@
+#include 
+#include "qemu-common.h"
+#include "qemu-aio.h"
+#include "thread-pool.h"
+#include "block.h"
+
+static int active;
+
+typedef struct {
+BlockDriverAIOCB *aiocb;
+int n;
+int ret;
+} WorkerTestData;
+
+static int worker_cb(void *opaque)
+{
+WorkerTestData *data = opaque;
+return __sync_fetch_and_add(&data->n, 1);
+}
+
+static int long_cb(void *opaque)
+{
+WorkerTestData *data = opaque;
+__sync_fetch_and_add(&data->n, 1);
+g_usleep(200);
+__sync_fetch_and_add(&data->n, 1);
+return 0;
+}
+
+static void done_cb(void *opaque, int ret)
+{
+WorkerTestData *data = opaque;
+g_assert_cmpint(data->ret, ==, -EINPROGRESS);
+data->ret = ret;
+data->aiocb = NULL;
+
+/* Callbacks are serialized, so no need to use atomic ops.  */
+active--;
+}
+
+/* A non-blocking poll of the main AIO context (we cannot use aio_poll
+ * because we do not know the AioContext).
+ */
+static void qemu_aio_wait_nonblocking(void)
+{
+qemu_notify_event();
+qemu_aio_wait();
+}
+
+static void test_submit(void)
+{
+WorkerTestData data = { .n = 0 };
+thread_pool_submit(worker_cb, &data);
+qemu_aio_flush();
+g_assert_cmpint(data.n, ==, 1);
+}
+
+static void test_submit_aio(void)
+{
+WorkerTestData data = { .n = 0, .ret = -EINPROGRESS };
+data.aiocb = thread_pool_submit_aio(worker_cb, &data, done_cb, &data);
+
+/* The callbacks are not called until after the first wait.  */
+active = 1;
+g_assert_cmpint(data.ret, ==, -EINPROGRESS);
+qemu_aio_flush();
+g_assert_cmpint(active, ==, 0);
+g_assert_cmpint(data.n, ==, 1);
+g_assert_cmpint(data.ret, ==, 0);
+}
+
+static void co_test_cb(void *opaque)
+{
+WorkerTestData *data = opaque;
+
+active = 1;
+data->n = 0;
+data->ret = -EINPROGRESS;
+thread_pool_submit_co(worker_cb, data);
+
+/* The test continues in test_submit_co, after qemu_coroutine_enter... */
+
+g_assert_cmpint(data->n, ==, 1);
+data->ret = 0;
+active--;
+
+/* The test continues in test_submit_co, after qemu_aio_flush... */
+}
+
+static void test_submit_co(void)
+{
+WorkerTestData data;
+Coroutine *co = qemu_coroutine_create(co_test_cb);
+
+qemu_coroutine_enter(co, &data);
+
+/* Back here once the worker has started.  */
+
+g_assert_cmpint(active, ==, 1);
+g_assert_cmpint(data.ret, ==, -EINPROGRESS);
+
+/* qemu_aio_flush will execute the rest of the coroutine.  */
+
+qemu_aio_flush();
+
+/* Back here after the coroutine has finished.  */
+
+g_assert_cmpint(active, ==, 0);
+g_assert_cmpint(data.ret, ==, 0);
+}
+
+static void test_submit_many(void)
+{
+WorkerTestData data[100];
+int i;
+
+/* Start more work items than there will be threads.  */
+for (i = 0; i < 100; i++) {
+data[i].n = 0;
+data[i].ret = -EINPROGRESS;
+thread_pool_submit_aio(worker_cb, &data[i], done_cb, &data[i]);
+}
+
+active = 100;
+while (active > 0) {
+qemu_aio_wait();
+}
+for (i = 0; i < 100; i++) {
+g_assert_cmpint(data[i].n, ==, 1);
+g_assert_cmpint(data[i].ret, ==, 0);
+}
+}
+
+static void test_cancel(void)
+{
+WorkerTestData data[100];
+int i;
+
+/* Start more work items than there will be threads, to ensure
+ * the pool is full.
+ */
+test_submit_many();
+
+/* Start long running jobs, to ensure we can cancel some.  */
+for (i = 0; i < 100; i++) {
+data[i].n = 0;
+data[i].ret = -EINPROGRESS;
+ 

[Qemu-devel] [PATCH 1.3? 1/2] tests: add AioContext unit tests

2012-11-23 Thread Paolo Bonzini
Signed-off-by: Paolo Bonzini 
---
 tests/Makefile   |   2 +
 tests/test-aio.c | 667 +++
 2 files changed, 669 insertions(+)
 create mode 100644 tests/test-aio.c

diff --git a/tests/Makefile b/tests/Makefile
index ca680e5..61cbe3b 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -15,6 +15,7 @@ check-unit-y += tests/test-string-output-visitor$(EXESUF)
 check-unit-y += tests/test-coroutine$(EXESUF)
 check-unit-y += tests/test-visitor-serialization$(EXESUF)
 check-unit-y += tests/test-iov$(EXESUF)
+check-unit-y += tests/test-aio$(EXESUF)
 
 check-block-$(CONFIG_POSIX) += tests/qemu-iotests-quick.sh
 
@@ -49,6 +50,7 @@ tests/check-qlist$(EXESUF): tests/check-qlist.o qlist.o qint.o
 tests/check-qfloat$(EXESUF): tests/check-qfloat.o qfloat.o
 tests/check-qjson$(EXESUF): tests/check-qjson.o $(qobject-obj-y) qemu-tool.o
 tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(coroutine-obj-y) 
$(tools-obj-y) $(block-obj-y) iov.o libqemustub.a
+tests/test-aio$(EXESUF): tests/test-aio.o $(coroutine-obj-y) $(tools-obj-y) 
$(block-obj-y) libqemustub.a
 tests/test-iov$(EXESUF): tests/test-iov.o iov.o
 
 tests/test-qapi-types.c tests/test-qapi-types.h :\
diff --git a/tests/test-aio.c b/tests/test-aio.c
new file mode 100644
index 000..f53c908
--- /dev/null
+++ b/tests/test-aio.c
@@ -0,0 +1,667 @@
+/*
+ * AioContext tests
+ *
+ * Copyright Red Hat, Inc. 2012
+ *
+ * Authors:
+ *  Paolo Bonzini
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#include 
+#include "qemu-aio.h"
+
+AioContext *ctx;
+
+/* Simple callbacks for testing.  */
+
+typedef struct {
+QEMUBH *bh;
+int n;
+int max;
+} BHTestData;
+
+static void bh_test_cb(void *opaque)
+{
+BHTestData *data = opaque;
+if (++data->n < data->max) {
+qemu_bh_schedule(data->bh);
+}
+}
+
+static void bh_delete_cb(void *opaque)
+{
+BHTestData *data = opaque;
+if (++data->n < data->max) {
+qemu_bh_schedule(data->bh);
+} else {
+qemu_bh_delete(data->bh);
+data->bh = NULL;
+}
+}
+
+typedef struct {
+EventNotifier e;
+int n;
+int active;
+bool auto_set;
+} EventNotifierTestData;
+
+static int event_active_cb(EventNotifier *e)
+{
+EventNotifierTestData *data = container_of(e, EventNotifierTestData, e);
+return data->active > 0;
+}
+
+static void event_ready_cb(EventNotifier *e)
+{
+EventNotifierTestData *data = container_of(e, EventNotifierTestData, e);
+g_assert(event_notifier_test_and_clear(e));
+data->n++;
+if (data->active > 0) {
+data->active--;
+}
+if (data->auto_set && data->active) {
+event_notifier_set(e);
+}
+}
+
+/* Tests using aio_*.  */
+
+static void test_notify(void)
+{
+g_assert(!aio_poll(ctx, false));
+aio_notify(ctx);
+g_assert(!aio_poll(ctx, true));
+g_assert(!aio_poll(ctx, false));
+}
+
+static void test_flush(void)
+{
+g_assert(!aio_poll(ctx, false));
+aio_notify(ctx);
+aio_flush(ctx);
+g_assert(!aio_poll(ctx, false));
+}
+
+static void test_bh_schedule(void)
+{
+BHTestData data = { .n = 0 };
+data.bh = aio_bh_new(ctx, bh_test_cb, &data);
+
+qemu_bh_schedule(data.bh);
+g_assert_cmpint(data.n, ==, 0);
+
+g_assert(aio_poll(ctx, true));
+g_assert_cmpint(data.n, ==, 1);
+
+g_assert(!aio_poll(ctx, false));
+g_assert_cmpint(data.n, ==, 1);
+qemu_bh_delete(data.bh);
+}
+
+static void test_bh_schedule10(void)
+{
+BHTestData data = { .n = 0, .max = 10 };
+data.bh = aio_bh_new(ctx, bh_test_cb, &data);
+
+qemu_bh_schedule(data.bh);
+g_assert_cmpint(data.n, ==, 0);
+
+g_assert(aio_poll(ctx, false));
+g_assert_cmpint(data.n, ==, 1);
+
+g_assert(aio_poll(ctx, true));
+g_assert_cmpint(data.n, ==, 2);
+
+aio_flush(ctx);
+g_assert_cmpint(data.n, ==, 10);
+
+g_assert(!aio_poll(ctx, false));
+g_assert_cmpint(data.n, ==, 10);
+qemu_bh_delete(data.bh);
+}
+
+static void test_bh_cancel(void)
+{
+BHTestData data = { .n = 0 };
+data.bh = aio_bh_new(ctx, bh_test_cb, &data);
+
+qemu_bh_schedule(data.bh);
+g_assert_cmpint(data.n, ==, 0);
+
+qemu_bh_cancel(data.bh);
+g_assert_cmpint(data.n, ==, 0);
+
+g_assert(!aio_poll(ctx, false));
+g_assert_cmpint(data.n, ==, 0);
+qemu_bh_delete(data.bh);
+}
+
+static void test_bh_delete(void)
+{
+BHTestData data = { .n = 0 };
+data.bh = aio_bh_new(ctx, bh_test_cb, &data);
+
+qemu_bh_schedule(data.bh);
+g_assert_cmpint(data.n, ==, 0);
+
+qemu_bh_delete(data.bh);
+g_assert_cmpint(data.n, ==, 0);
+
+g_assert(!aio_poll(ctx, false));
+g_assert_cmpint(data.n, ==, 0);
+}
+
+static void test_bh_delete_from_cb(void)
+{
+BHTestData data1 = { .n = 0, .max = 1 };
+
+data1.bh = aio_bh_new(ctx, bh_delete_cb, &data1);
+
+qemu_bh_schedule(data1.bh);
+g_assert_cmpint(data1.

Re: [Qemu-devel] [PATCH] qemu-timer: Don't use RDTSC on 386s and 486s

2012-11-23 Thread Paolo Bonzini
Il 23/11/2012 16:12, Peter Maydell ha scritto:
> Adjust the conditional which guards the implementation of
> cpu_get_real_ticks() via RDTSC, so that we don't try to use it
> on x86 CPUs which don't implement RDTSC.  Instead we will fall
> back to the no-cycle-counter-available default implementation.
> 
> Reported-by: Yurij Popov 
> Signed-off-by: Peter Maydell 
> ---
>  qemu-timer.h |2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/qemu-timer.h b/qemu-timer.h
> index da7e97c..e35f163 100644
> --- a/qemu-timer.h
> +++ b/qemu-timer.h
> @@ -169,7 +169,7 @@ static inline int64_t cpu_get_real_ticks(void)
>  return retval;
>  }
>  
> -#elif defined(__i386__)
> +#elif defined(__i586__)
>  
>  static inline int64_t cpu_get_real_ticks(void)
>  {
> 

You should at least test __i686__ too:

$ gcc -m32 -dM -E -x c /dev/null |grep __i
#define __i686 1
#define __i686__ 1
#define __i386 1
#define __i386__ 1

Paolo



Re: [Qemu-devel] [PATCH] qemu-timer: Don't use RDTSC on 386s and 486s

2012-11-23 Thread Peter Maydell
On 23 November 2012 15:15, Paolo Bonzini  wrote:
> Il 23/11/2012 16:12, Peter Maydell ha scritto:
>> Adjust the conditional which guards the implementation of
>>
>> -#elif defined(__i386__)
>> +#elif defined(__i586__)
>>
>>  static inline int64_t cpu_get_real_ticks(void)
>>  {
>>
>
> You should at least test __i686__ too:
>
> $ gcc -m32 -dM -E -x c /dev/null |grep __i
> #define __i686 1
> #define __i686__ 1
> #define __i386 1
> #define __i386__ 1

Yuck. I had assumed gcc would define everything from i386
on up when building for later cores.

-- PMM



Re: [Qemu-devel] [PATCH 1/3] S390: Basic CPU model support

2012-11-23 Thread Andreas Färber
Am 23.11.2012 11:18, schrieb Jens Freimann:
> From: Viktor Mihajlovski 
> 
> This enables qemu -cpu ? to return the list of supported CPU models
> on s390. Since only the host model is supported at this point in time
> this is pretty straight-forward. Further, a validity check for the
> requested CPU model was added.
> This change is needed to allow libvirt exploiters (like OpenStack)
> to specify a CPU model.
> 
> Signed-off-by: Viktor Mihajlovski 
> Signed-off-by: Jens Freimann 
> Reviewed-by: Christian Borntraeger 
> ---
>  hw/s390-virtio.c   | 5 +
>  target-s390x/cpu.c | 6 ++
>  target-s390x/cpu.h | 3 +++
>  3 files changed, 14 insertions(+)
> 
> diff --git a/hw/s390-virtio.c b/hw/s390-virtio.c
> index 685cb54..7144ac1 100644
> --- a/hw/s390-virtio.c
> +++ b/hw/s390-virtio.c
> @@ -209,6 +209,11 @@ static void s390_init(QEMUMachineInitArgs *args)
>  cpu_model = "host";
>  }
>  
> +if (strcmp(cpu_model, "host")) {
> +fprintf(stderr, "S390 only supports host CPU model\n");
> +exit(1);
> +}

When Cornelia introduces a second machine for virtio-ccw, you will have
to duplicate this logic. Other targets have an, e.g., cpu_s390_init()
function that encapsulates the instantiation logic.

Also, "host" doesn't make a lot of sense on non-s390 host, no?

> +
>  ipi_states = g_malloc(sizeof(S390CPU *) * smp_cpus);
>  
>  for (i = 0; i < smp_cpus; i++) {
> diff --git a/target-s390x/cpu.c b/target-s390x/cpu.c
> index 619b202..03fdc31 100644
> --- a/target-s390x/cpu.c
> +++ b/target-s390x/cpu.c
> @@ -25,6 +25,12 @@
>  #include "qemu-timer.h"
>  
>  
> +/* generate CPU information for cpu -? */
> +void s390_cpu_list(FILE *f, fprintf_function cpu_fprintf)
> +{
> +(*cpu_fprintf)(f, "s390 %16s\n", "[host]");

The other targets don't use a target prefix ("s390 "), but then again
the text and layout are fully target-specific at this time. :)

Regards,
Andreas

> +}
> +
>  /* CPUClass::reset() */
>  static void s390_cpu_reset(CPUState *s)
>  {
> diff --git a/target-s390x/cpu.h b/target-s390x/cpu.h
> index 0f9a1f7..3513976 100644
> --- a/target-s390x/cpu.h
> +++ b/target-s390x/cpu.h
> @@ -350,6 +350,9 @@ static inline void cpu_set_tls(CPUS390XState *env, 
> target_ulong newtls)
>  #define cpu_gen_code cpu_s390x_gen_code
>  #define cpu_signal_handler cpu_s390x_signal_handler
>  
> +void s390_cpu_list(FILE *f, fprintf_function cpu_fprintf);
> +#define cpu_list s390_cpu_list
> +
>  #include "exec-all.h"
>  
>  #ifdef CONFIG_USER_ONLY
> 


-- 
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg



Re: [Qemu-devel] drive_add 0 if=scsi crashes

2012-11-23 Thread Markus Armbruster
Paolo Bonzini  writes:

> Il 22/11/2012 15:02, Markus Armbruster ha scritto:
>> Watch this:
>> 
>> (qemu) drive_add 0 if=scsi
>> Segmentation fault (core dumped)
>> 
>> Broken in commit 0d936928 "qdev: Convert busses to QEMU Object Model".
>> Before:
>> 
>> (qemu) drive_add 0 if=scsi
>> Device is not a SCSI adapter
>
> Seems more of an improvement to me. :)
>
> Can you open a bug for Fedora or launchpad?

https://bugzilla.redhat.com/show_bug.cgi?id=879657



Re: [Qemu-devel] qcow2: slow internal snapshot creation

2012-11-23 Thread Kevin Wolf
Am 23.11.2012 08:26, schrieb Dietmar Maurer:
> qcow2 snapshot on newly created files are fast:
> 
> # qemu-img create -f qcow2 test.img 200G
> # time qemu-img snapshot -c snap1 test.img
> real   0m0.014s
> 
> but if metadata is allocated it gets very slow:
> 
> # qemu-img create  -f qcow2 -o "preallocation=metadata" test.img 200G
> # time qemu-img snapshot -c snap1 test.img
> real   1m20.399s
> 
> but reading the metadata is also fast:
> 
> # time qemu-img check test.img
> real   0m0.371s
> 
> So why is creating a new snapshot that slow – any ideas?

Had a look at this now. The culprit is the unconditional bdrv_flush() in
update_cluster_refcount(). I suspect it's completely unnecessary by now,
but I need to give it a closer review before I send a patch to remove
it. (And in any case error handling is missing there)

Affected are writing compressed images and creating/deleting internal
snapshots, no other code uses this path.

Kevin



Re: [Qemu-devel] [PATCH] qemu-timer: Don't use RDTSC on 386s and 486s

2012-11-23 Thread Jamie Lokier
Peter Maydell wrote:
> On 23 November 2012 15:15, Paolo Bonzini  wrote:
> > Il 23/11/2012 16:12, Peter Maydell ha scritto:
> >> Adjust the conditional which guards the implementation of
> >>
> >> -#elif defined(__i386__)
> >> +#elif defined(__i586__)
> >>
> >>  static inline int64_t cpu_get_real_ticks(void)
> >>  {
> >>
> >
> > You should at least test __i686__ too:
> >
> > $ gcc -m32 -dM -E -x c /dev/null |grep __i
> > #define __i686 1
> > #define __i686__ 1
> > #define __i386 1
> > #define __i386__ 1
> 
> Yuck. I had assumed gcc would define everything from i386
> on up when building for later cores.

No, and it doesn't define __i686__ on all x86-32 targets after i686 either:

$ gcc -march=core2 -dM -E -x c /dev/null | grep __[0-9a-z] | sort
#define __core2 1
#define __core2__ 1
#define __gnu_linux__ 1
#define __i386 1
#define __i386__ 1
#define __linux 1
#define __linux__ 1
#define __tune_core2__ 1
#define __unix 1
#define __unix__ 1

x86 instruction sets haven't followed a linear progression of features
for quite a while, especially including non-Intel chips, so it stopped
making sense for GCC to indicate the instruction set in that way.

GCC 4.6.3 defines __i586__ only when the target arch is set by -march
(or default) to i586, pentium or pentium-mmx.

And it defines __i686__ only when -march= is set (or default) to c3-2,
i686, pentiumpro, pentium2, pentium3, pentium3m or pentium-m.

Otherwise it's just things like __athlon__, __corei7__, etc.

The only one that's consistent is __i386__ (and __i386).

-- Jamie



  1   2   >