date:20100618

[Qemu-devel] [PATCH v3 2/3] pci: set PCI multi-function bit appropriately.

2010-06-18 Thread Isaku Yamahata

Set PCI multi-function bit appropriately.
PCI address, devfn ,is exported to users as addr property,
so users can populate pci function(PCIDevice in qemu)
at arbitrary devfn.
It means each function(PCIDevice) don't know whether pci device
(PCIDevice[8]) is multi function or not.
So this patch makes pci generic layer set the bit appropriately.

Signed-off-by: Isaku Yamahata 

---
changes v2 -> v3:
- introduce PCI_FUNC_MAX
- more commit log

changes v1 -> v2:
don't set header type register in configuration space.
---
 hw/pci.c |   27 +++
 hw/pci.h |1 +
 2 files changed, 28 insertions(+), 0 deletions(-)

diff --git a/hw/pci.c b/hw/pci.c
index 62308df..d17770e 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -574,6 +574,32 @@ static void pci_init_wmask_bridge(PCIDevice *d)
 pci_set_word(d->wmask + PCI_BRIDGE_CONTROL, 0x);
 }
 
+static void pci_init_multifunction(PCIBus *bus, PCIDevice *dev)
+{
+uint8_t slot = PCI_SLOT(dev->devfn);
+uint8_t func;
+
+/* we are here before bus->devices[dev->devfn] = dev */
+assert(bus->devices[dev->devfn] == NULL);
+
+for (func = 0; func < PCI_FUNC_MAX; ++func) {
+if (bus->devices[PCI_DEVFN(slot, func)]) {
+break;
+}
+}
+if (func == PCI_FUNC_MAX) {
+return;
+}
+
+for (func = 0; func < PCI_FUNC_MAX; ++func) {
+if (bus->devices[PCI_DEVFN(slot, func)]) {
+bus->devices[PCI_DEVFN(slot, func)]->config[PCI_HEADER_TYPE] |=
+PCI_HEADER_TYPE_MULTI_FUNCTION;
+}
+}
+dev->config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION;
+}
+
 static void pci_config_alloc(PCIDevice *pci_dev)
 {
 int config_size = pci_config_size(pci_dev);
@@ -627,6 +653,7 @@ static PCIDevice *do_pci_register_device(PCIDevice 
*pci_dev, PCIBus *bus,
 if (is_bridge) {
 pci_init_wmask_bridge(pci_dev);
 }
+pci_init_multifunction(bus, pci_dev);
 
 if (!config_read)
 config_read = pci_default_read_config;
diff --git a/hw/pci.h b/hw/pci.h
index 4d682d4..76adc66 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -14,6 +14,7 @@
 #define PCI_DEVFN(slot, func)   slot) & 0x1f) << 3) | ((func) & 0x07))
 #define PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f)
 #define PCI_FUNC(devfn) ((devfn) & 0x07)
+#define PCI_FUNC_MAX8
 
 /* Class, Vendor and Device IDs from Linux's pci_ids.h */
 #include "pci_ids.h"
-- 
1.6.6.1

[Qemu-devel] [PATCH v3 0/3] pci: multi-function bit fixes

2010-06-18 Thread Isaku Yamahata

Changes v2 -> v3:
Make this bug fix patch independent.
added test description.

patch description:

When pci devices are populated as multi-function,
OS can fail to probe function > 0. It's because multi function
bit of header type register in configuration space isn't set,
so OS probes only function 0 skipping function > 0 as optimization.

This patch set make qemu set multi function bit when function > 0 
is populated.


Isaku Yamahata (3):
  pci: remove PCIDeviceInfo::header_type
  pci: set PCI multi-function bit appropriately.
  pci: don't overwrite multi functio bit in pci header type.

 hw/ac97.c |1 -
 hw/acpi_piix4.c   |1 -
 hw/apb_pci.c  |4 +---
 hw/dec_pci.c  |2 +-
 hw/grackle_pci.c  |1 -
 hw/ide/cmd646.c   |1 -
 hw/ide/piix.c |1 -
 hw/macio.c|1 -
 hw/ne2000.c   |1 -
 hw/openpic.c  |1 -
 hw/pci.c  |   42 +++---
 hw/pci.h  |9 +++--
 hw/pcnet.c|1 -
 hw/piix4.c|3 +--
 hw/piix_pci.c |4 +---
 hw/prep_pci.c |1 -
 hw/rtl8139.c  |1 -
 hw/sun4u.c|1 -
 hw/unin_pci.c |4 
 hw/usb-uhci.c |1 -
 hw/vga-pci.c  |1 -
 hw/virtio-pci.c   |1 -
 hw/vmware_vga.c   |1 -
 hw/wdt_i6300esb.c |1 -
 24 files changed, 46 insertions(+), 39 deletions(-)

[Qemu-devel] [PATCH v3 1/3] pci: remove PCIDeviceInfo::header_type

2010-06-18 Thread Isaku Yamahata

replace PCIDeviceInfo::header_type with is_bridge
as suggested by Michael S. Tsirkin 

Signed-off-by: Isaku Yamahata 
---
 hw/apb_pci.c |2 +-
 hw/dec_pci.c |2 +-
 hw/pci.c |   15 ---
 hw/pci.h |8 ++--
 4 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/hw/apb_pci.c b/hw/apb_pci.c
index 31c8d70..6b9cd59 100644
--- a/hw/apb_pci.c
+++ b/hw/apb_pci.c
@@ -436,7 +436,7 @@ static PCIDeviceInfo pbm_pci_host_info = {
 .qdev.name = "pbm",
 .qdev.size = sizeof(PCIDevice),
 .init  = pbm_pci_host_init,
-.header_type  = PCI_HEADER_TYPE_BRIDGE,
+.is_bridge = 1,
 };
 
 static SysBusDeviceInfo pbm_host_info = {
diff --git a/hw/dec_pci.c b/hw/dec_pci.c
index 024c67c..83ce4cd 100644
--- a/hw/dec_pci.c
+++ b/hw/dec_pci.c
@@ -90,7 +90,7 @@ static PCIDeviceInfo dec_21154_pci_host_info = {
 .qdev.name = "dec-21154",
 .qdev.size = sizeof(PCIDevice),
 .init  = dec_21154_pci_host_init,
-.header_type  = PCI_HEADER_TYPE_BRIDGE,
+.is_bridge  = 1,
 };
 
 static void dec_register_devices(void)
diff --git a/hw/pci.c b/hw/pci.c
index ef17eb4..62308df 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -597,7 +597,7 @@ static PCIDevice *do_pci_register_device(PCIDevice 
*pci_dev, PCIBus *bus,
  const char *name, int devfn,
  PCIConfigReadFunc *config_read,
  PCIConfigWriteFunc *config_write,
- uint8_t header_type)
+ bool is_bridge)
 {
 if (devfn < 0) {
 for(devfn = bus->devfn_min ; devfn < ARRAY_SIZE(bus->devices);
@@ -619,13 +619,12 @@ static PCIDevice *do_pci_register_device(PCIDevice 
*pci_dev, PCIBus *bus,
 pci_dev->irq_state = 0;
 pci_config_alloc(pci_dev);
 
-header_type &= ~PCI_HEADER_TYPE_MULTI_FUNCTION;
-if (header_type == PCI_HEADER_TYPE_NORMAL) {
+if (!is_bridge) {
 pci_set_default_subsystem_id(pci_dev);
 }
 pci_init_cmask(pci_dev);
 pci_init_wmask(pci_dev);
-if (header_type == PCI_HEADER_TYPE_BRIDGE) {
+if (is_bridge) {
 pci_init_wmask_bridge(pci_dev);
 }
 
@@ -1554,7 +1553,9 @@ static int pci_bridge_initfn(PCIDevice *dev)
 pci_set_word(dev->config + PCI_STATUS,
  PCI_STATUS_66MHZ | PCI_STATUS_FAST_BACK);
 pci_config_set_class(dev->config, PCI_CLASS_BRIDGE_PCI);
-dev->config[PCI_HEADER_TYPE] = PCI_HEADER_TYPE_BRIDGE;
+dev->config[PCI_HEADER_TYPE] =
+(dev->config[PCI_HEADER_TYPE] & PCI_HEADER_TYPE_MULTI_FUNCTION) |
+PCI_HEADER_TYPE_BRIDGE;
 pci_set_word(dev->config + PCI_SEC_STATUS,
  PCI_STATUS_66MHZ | PCI_STATUS_FAST_BACK);
 return 0;
@@ -1605,7 +1606,7 @@ static int pci_qdev_init(DeviceState *qdev, DeviceInfo 
*base)
 devfn = pci_dev->devfn;
 pci_dev = do_pci_register_device(pci_dev, bus, base->name, devfn,
  info->config_read, info->config_write,
- info->header_type);
+ info->is_bridge);
 if (pci_dev == NULL)
 return -1;
 rc = info->init(pci_dev);
@@ -1855,7 +1856,7 @@ static PCIDeviceInfo bridge_info = {
 .init = pci_bridge_initfn,
 .exit = pci_bridge_exitfn,
 .config_write = pci_bridge_write_config,
-.header_type  = PCI_HEADER_TYPE_BRIDGE,
+.is_bridge= 1,
 .qdev.props   = (Property[]) {
 DEFINE_PROP_HEX32("vendorid", PCIBridge, vid, 0),
 DEFINE_PROP_HEX32("deviceid", PCIBridge, did, 0),
diff --git a/hw/pci.h b/hw/pci.h
index 6a2bc6a..4d682d4 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -325,8 +325,12 @@ typedef struct {
 PCIConfigReadFunc *config_read;
 PCIConfigWriteFunc *config_write;
 
-/* pci config header type */
-uint8_t header_type;
+/*
+ * pci-to-pci bridge or normal device.
+ * This doesn't mean pci host switch.
+ * When card bus bridge is supported, this would be enhanced.
+ */
+int is_bridge;
 
 /* pcie stuff */
 int is_express;   /* is this device pci express? */
-- 
1.6.6.1

[Qemu-devel] [PATCH v3 3/3] pci: don't overwrite multi functio bit in pci header type.

2010-06-18 Thread Isaku Yamahata

Don't overwrite pci header type.
Otherwise, multi function bit which pci_init_header_type() sets
appropriately is lost.
Anyway PCI_HEADER_TYPE_NORMAL is zero, so it is unnecessary to zero
which is already zero cleared.

how to test:
run qemu and issue info pci to see whether a device in question is
normal device, not pci-to-pci bridge.
This is handy because guest os isn't required.

tested changes:
The following files are covered by using following commands.
sparc64-softmmu
  apb_pci.c, vga-pci.c, cmd646.c, ne2k_pci.c, sun4u.c
ppc-softmmu
  grackle_pci.c, cmd646.c, ne2k_pci.c, vga-pci.c, macio.c
ppc-softmmu -M mac99
  unin_pci.c(uni-north, uni-north-agp)
ppc64-softmmu
  pci-ohci, ne2k_pci, vga-pci, unin_pci.c(u3-agp)
x86_64-softmmu
  acpi_piix4.c, ide/piix.c, piix4.c, piix_pci.c
  -vga vmware vmware_vga.c
  -watchdog i6300esb wdt_i6300esb.c
  -usb usb-uhci.c
  -sound ac97 ac97.c
  -nic model=rtl8139 rtl8139.c
  -nic model=pcnet pcnet.c
  -balloon virtio virtio-pci.c:

untested changes:
The following changes aren't tested.
prep_pci.c: ppc-softmmu -M prep should cover, but core dumped.
unin_pci.c(uni-north-pci): the caller is commented out.
openpic.c: the caller is commented out in ppc_prep.c

Signed-off-by: Isaku Yamahata 

---
chnages v2 -> v3:
- added  record what was tested in the commit message
changes v1 -> v2:
- set header type of bridge type in pci_bridge_initfn().
- dropped ugly hunk in apb_pci.c.
---
 hw/ac97.c |1 -
 hw/acpi_piix4.c   |1 -
 hw/apb_pci.c  |2 --
 hw/grackle_pci.c  |1 -
 hw/ide/cmd646.c   |1 -
 hw/ide/piix.c |1 -
 hw/macio.c|1 -
 hw/ne2000.c   |1 -
 hw/openpic.c  |1 -
 hw/pcnet.c|1 -
 hw/piix4.c|3 +--
 hw/piix_pci.c |4 +---
 hw/prep_pci.c |1 -
 hw/rtl8139.c  |1 -
 hw/sun4u.c|1 -
 hw/unin_pci.c |4 
 hw/usb-uhci.c |1 -
 hw/vga-pci.c  |1 -
 hw/virtio-pci.c   |1 -
 hw/vmware_vga.c   |1 -
 hw/wdt_i6300esb.c |1 -
 21 files changed, 2 insertions(+), 28 deletions(-)

diff --git a/hw/ac97.c b/hw/ac97.c
index 4319bc8..d71072d 100644
--- a/hw/ac97.c
+++ b/hw/ac97.c
@@ -1295,7 +1295,6 @@ static int ac97_initfn (PCIDevice *dev)
 c[PCI_REVISION_ID] = 0x01;  /* rid revision ro */
 c[PCI_CLASS_PROG] = 0x00;  /* pi programming interface ro */
 pci_config_set_class (c, PCI_CLASS_MULTIMEDIA_AUDIO); /* ro */
-c[PCI_HEADER_TYPE] = PCI_HEADER_TYPE_NORMAL; /* headtyp header type ro */
 
 /* TODO set when bar is registered. no need to override. */
 /* nabmar native audio mixer base address rw */
diff --git a/hw/acpi_piix4.c b/hw/acpi_piix4.c
index 8d1a628..bfa1d9a 100644
--- a/hw/acpi_piix4.c
+++ b/hw/acpi_piix4.c
@@ -369,7 +369,6 @@ static int piix4_pm_initfn(PCIDevice *dev)
 pci_conf[0x08] = 0x03; // revision number
 pci_conf[0x09] = 0x00;
 pci_config_set_class(pci_conf, PCI_CLASS_BRIDGE_OTHER);
-pci_conf[PCI_HEADER_TYPE] = PCI_HEADER_TYPE_NORMAL; // header_type
 pci_conf[0x3d] = 0x01; // interrupt pin 1
 
 pci_conf[0x40] = 0x01; /* PM io base read only bit */
diff --git a/hw/apb_pci.c b/hw/apb_pci.c
index 6b9cd59..69a774d 100644
--- a/hw/apb_pci.c
+++ b/hw/apb_pci.c
@@ -427,8 +427,6 @@ static int pbm_pci_host_init(PCIDevice *d)
  PCI_STATUS_FAST_BACK | PCI_STATUS_66MHZ |
  PCI_STATUS_DEVSEL_MEDIUM);
 pci_config_set_class(d->config, PCI_CLASS_BRIDGE_HOST);
-pci_set_byte(d->config + PCI_HEADER_TYPE,
- PCI_HEADER_TYPE_NORMAL);
 return 0;
 }
 
diff --git a/hw/grackle_pci.c b/hw/grackle_pci.c
index aa0c51b..b3a5f54 100644
--- a/hw/grackle_pci.c
+++ b/hw/grackle_pci.c
@@ -126,7 +126,6 @@ static int grackle_pci_host_init(PCIDevice *d)
 d->config[0x08] = 0x00; // revision
 d->config[0x09] = 0x01;
 pci_config_set_class(d->config, PCI_CLASS_BRIDGE_HOST);
-d->config[PCI_HEADER_TYPE] = PCI_HEADER_TYPE_NORMAL; // header_type
 return 0;
 }
 
diff --git a/hw/ide/cmd646.c b/hw/ide/cmd646.c
index 559147f..756ee81 100644
--- a/hw/ide/cmd646.c
+++ b/hw/ide/cmd646.c
@@ -240,7 +240,6 @@ static int pci_cmd646_ide_initfn(PCIDevice *dev)
 pci_conf[PCI_CLASS_PROG] = 0x8f;
 
 pci_config_set_class(pci_conf, PCI_CLASS_STORAGE_IDE);
-pci_conf[PCI_HEADER_TYPE] = PCI_HEADER_TYPE_NORMAL; // header_type
 
 pci_conf[0x51] = 0x04; // enable IDE0
 if (d->secondary) {
diff --git a/hw/ide/piix.c b/hw/ide/piix.c
index dad6e86..8817915 100644
--- a/hw/ide/piix.c
+++ b/hw/ide/piix.c
@@ -122,7 +122,6 @@ static int pci_piix_ide_initfn(PCIIDEState *d)
 
 pci_conf[PCI_CLASS_PROG] = 0x80; // legacy ATA mode
 pci_config_set_class(pci_conf, PCI_CLASS_STORAGE_IDE);
-pci_conf[PCI_HEADER_TYPE] = PCI_HEADER_TYPE_NORMAL; // header_type
 
 qemu_register_reset(piix3_reset, d);
 
diff --git a/hw/macio.c b/hw/macio.c
index e92e82a..789ca55 100644
--- a/hw/macio.c
+++ b/hw/macio.c
@@ -110,7 +110,6 @@ void macio_init (PCIBus *bus

Re: [Qemu-devel] Re: RFC v2: blockdev_add & friends, brief rationale, QMP docs

2010-06-18 Thread Markus Armbruster

Anthony Liguori  writes:

> On 06/17/2010 03:20 AM, Kevin Wolf wrote:
>> Am 16.06.2010 20:07, schrieb Anthony Liguori:
>>
But it requires that
 everything that -blockdev provides is accessible with -drive, too (or
 that we're okay with users hating us).

>>> I'm happy for -drive to die.  I think we should support -hda and
>>> -blockdev.
>>>  
>> -hda is not sufficient for most users. It doesn't provide any options.
>> It doesn't even support virtio. If -drive is going to die (and we seem
>> to agree all on that), then -blockdev needs to be usable for users (and
>> it's only you who contradicts so far).
>>
>
> I've always thought we should have a -vda argument and an -sda
> argument specifically for specifying virtio and scsi disks.

"-hda F" is a cognitive dead end: if you need more control, knowing -hda
doesn't give you a better idea where to look for it than not knowing it.

This is different with "-drive file=F".  It can grow with the user's
needs.

-blockdev's primary mission is clean host / guest separation, and clean
interface to block devices for QMP and for config files.

If it turns out to be too verbose and complex for everyday command line
use, we shouldn't take -hda & similar as excuse for leaving it there.
We still need a usable option that can grow with the user's needs.  And
it better be able to grow to the point where you have full control over
the device model, i.e. you use -device.

[...]

Re: [Qemu-devel] RFC v3: blockdev_add & friends, brief rationale, QMP docs

2010-06-18 Thread Markus Armbruster

Stefan Hajnoczi  writes:

> On Thu, Jun 17, 2010 at 1:49 PM, Markus Armbruster  wrote:
>> Stefan Hajnoczi  writes:
>>
>>> On Wed, Jun 16, 2010 at 6:27 PM, Markus Armbruster  
>>> wrote:
 blockdev_add
 

 Add host block device.

 Arguments:

 - "id": the host block device's ID, must be unique (json-string)
 - "format": image format (json-string, optional)
    - Possible values: "raw", "qcow2", ...
>>>
>>> What is the default when unset?  (I expect we'll auto-detect the
>>> format but this should be documented.)
>>
>> For command line and human monitor, we definitely want a sensible
>> default.  I sketched one in section "Command line syntax".  I'll quote
>> it for your convenience a few lines down.
>
> Ahem...the part that I skipped over ;).  I should have read your
> entire email, thanks for pointing it out.

:)

>> To let users ask for this explicitely, we could have pseudo-format
>> "auto".
>>
>> We also need a pseudo-format "probe", which guesses the format from the
>> image contents.  Can't be made the default, because it's insecure.
>
> In which scenario is probing the image format a security issue?  I'm
> trying to think up scenarios where a cloud user modifies the guest
> disk image and gets QEMU to re-open the image file as another format,
> perhaps this would make the cloud owner/admin unhappy.  I don't see a
> threat except for image format drivers have security bugs (corrupt
> images leading to arbitrary code execution).

User creates a raw image file.

VM starts.  Image file gets probed, it's raw.  Guest has access to the
complete file.

Guest writes a valid QCOW2 image to it, chosen to include the full
backing file in the resulting image contents.  Set the backing file to
/secret/treasure.

Reboot VM.  Image file gets probed, it's qcow2.  Backing file gets
probed, it's raw.  Guest has access to the contents of QCOW2 image.
This includes the backing file.  Oops.

 (2) It's possible to list supported disk formats and protocols by
    running QEMU with arguments "-blockdev_add \?".
>>>
>>> Is there an query-block-driver command or something in QMP to
>>> enumerate supported formats and protocols?  Not sure how useful this
>>> would be to the management stack - blockdev_add will probably return
>>> an error if an attempt is made to open an unsupported file.
>>
>> QMP should be "self-documenting": a client should be able to list
>> commands, their arguments, and possible argument values.  Listing
>> supported formats then becomes "list possible values of command
>> blockdev_add's argument format".
>
> Nice :).

We still need to deliver the niceties...

 blockdev_del
 

 Remove a host block device.

 Arguments:

 - "id": the host block device's ID (json-string)

 Example:

 -> { "execute": "blockdev_del", "arguments": { "id": "blk1" } }
 <- { "return": {} }
>>>
>>> What about an attached guest device?  Will this fail if the virtio-blk
>>> PCI device is still present?  For SCSI I imagine we can usually just
>>> remove the host block device.  For IDE there isn't hotplug support
>>> AFAIK, what happens?
>>
>> Command fails.  You have to device_del the device first.  Which is only
>> possible if its bus supports hot-plug.
>
> I think this deserves to be in the documentation.

Yes, QMP documentation should cover errors.  It generally doesn't so far.

Re: [Qemu-devel] Re: [PATCH 5/5] linux fbdev display driver.

2010-06-18 Thread Gerd Hoffmann


  Hi,


For some reason, the display is extremely slow when using vnc and
fbdev at the same time.


Gotcha.  Didn't notice, but it probably depends on the hardware.  Very 
likely the reason is that graphic cards usually are optimized for write 
access and reads might be slow as hell.   vnc must read though.


Hmm.  No easy way out.

cheers,
  Gerd

[Qemu-devel] Re: RFC qdev path semantics

2010-06-18 Thread Gerd Hoffmann


  Hi,


Bus names are chosen by the system as follows:

* If the driver of the parent device model provides a name, use that.

* Else, if the parent device has id ID, use ID.NUM, where NUM is the bus
   number, counting from zero in creation order.

* Else, use TYPE.NUM, where TYPE is derived from the bus type, and NUM
   is the bus number, as above.

### Paul proposes to drop ID.NUM.

### Paul proposes to either drop TYPE.NUM (and require drivers to
 provide bus names), or make NUM count separately for each bus type.


I agree, instance numbers are not stable.


NUM in the child bus name has *nothing* to do with the savevm instance 
number.


cheers,
  Gerd

[Qemu-devel] Re: [PATCH V4 2/3] qemu: Generic task offloading framework: threadlets

2010-06-18 Thread Paolo Bonzini


On 06/17/2010 08:05 PM, Anthony Liguori wrote:

On 06/17/2010 05:09 AM, Paolo Bonzini wrote:

+ while (QTAILQ_EMPTY(&(queue->request_list))&&
+ (ret != ETIMEDOUT)) {
+ ret = qemu_cond_timedwait(&(queue->cond),
+ &(queue->lock), 10*10);
+ }


Using qemu_cond_timedwait is a hack for not properly broadcasting the
condvar in flush_threadlet_queue.


I think Anthony answered this one.


I think he said that the code has been changed so I am right? :)


You're right about the condition we check in the exit path but the
timedwait is needed to expire an idle thread.


In posix-aio-compat, yes.  In threadlets you'll expire excess idle 
threads after each threadlet has completed.  If you want to keep the 
threads above the min_threads-th ready for 10 seconds, that's fine; but 
it's not what the v4 code does.


Paolo

[Qemu-devel] Re: [RFC][PATCH 2/2] qcow2: Use bdrv_(p)write_sync for metadata writes

2010-06-18 Thread Kevin Wolf

Am 17.06.2010 21:47, schrieb Stefan Hajnoczi:
> On Thu, Jun 17, 2010 at 3:39 PM, Kevin Wolf  wrote:
>> Am 17.06.2010 16:19, schrieb Stefan Hajnoczi:
>>> On Thu, Jun 17, 2010 at 1:03 PM, Kevin Wolf  wrote:
 Use bdrv_(p)write_sync to ensure metadata integrity in case of a crash.
>>>
>>> Any performance numbers?  This change is necessary for correctness but
>>> I wonder what the performance impact is for users.
>>
>> No numbers yet, but as you say we need to do it anyway. It should
>> definitely be better than any other option that I can think of
>> (cache=writethrough or some O_DIRECT|O_DSYNC mode) in that it only hurts
>> performance when metadata is actually changed. As long as we only write
>> guest data, there is no difference.
>>
>> Making it a barrier instead of a flush would probably be better, have
>> you already had a look at this since we talked about it?
> 
> No I haven't seen a userspace barrier solution.  I'm not actively
> working on that at the moment but am keeping an eye out for solutions.
>  I'm not sure what to make of sync_file_range(2), the man page
> suggests it cannot be relied on since only already allocated blocks
> will be flushed appropriately.  Filesystem metadata changes are not
> flushed, which is especially problematic for copy-on-write filesystems
> (according to the man page).

qcow2 files are expected to grow, so this probably doesn't help us. At
best we could try to detect if the file grows and decide if to use
sync_file_range or fdatasync.

Of course, it would be better to have a flag or something to include the
necessary metadata, but I have no idea how it's implemented and if this
is easily possible.

> Thinking about the performance more, I predict that guest OS
> installations will slow down but day-to-day usage (especially
> modifying already allocated blocks) will be alright for the reasons
> you mentioned.

Right, it's only growth that will get an impact. The pooint is that it's
not only installation (in most cases you could even use cache=unsafe for
that) but that you start to grow again after taking a snapshot - be it
internal (savevm) or external (backing file). This is where it hurts.

Kevin

[Qemu-devel] Re: [RFC][PATCH 2/2] qcow2: Use bdrv_(p)write_sync for metadata writes

2010-06-18 Thread Christoph Hellwig

On Fri, Jun 18, 2010 at 09:54:24AM +0200, Kevin Wolf wrote:
> qcow2 files are expected to grow, so this probably doesn't help us. At
> best we could try to detect if the file grows and decide if to use
> sync_file_range or fdatasync.

No.  sync_file_range is always unsafe for use with a real filesystem.
It never even calls into the filesystem and never flushes the disk write
cache.  For our use case it's exactly a no-op.

> Of course, it would be better to have a flag or something to include the
> necessary metadata, but I have no idea how it's implemented and if this
> is easily possible.

We could add a flag to it.  But rather than adding more crap to this
bastard that should never have been added I'd rather add a real
fsync_range system call.

Note that for our use case it doesn't matter anyway.  For an image file
accessed with cache=none there will be no data in the pagecache, and
the writeback of the inode and associated metadata (btree blocks or
indirect blocks) will be same wether we give it a range or not, and same
for the disk cache flush.

Re: [Qemu-devel] RFC v3: blockdev_add & friends, brief rationale, QMP docs

2010-06-18 Thread Stefan Hajnoczi

On Fri, Jun 18, 2010 at 8:27 AM, Markus Armbruster  wrote:
> Stefan Hajnoczi  writes:
>> On Thu, Jun 17, 2010 at 1:49 PM, Markus Armbruster  wrote:
>>> Stefan Hajnoczi  writes:
 On Wed, Jun 16, 2010 at 6:27 PM, Markus Armbruster  
 wrote:
>>> To let users ask for this explicitely, we could have pseudo-format
>>> "auto".
>>>
>>> We also need a pseudo-format "probe", which guesses the format from the
>>> image contents.  Can't be made the default, because it's insecure.
>>
>> In which scenario is probing the image format a security issue?  I'm
>> trying to think up scenarios where a cloud user modifies the guest
>> disk image and gets QEMU to re-open the image file as another format,
>> perhaps this would make the cloud owner/admin unhappy.  I don't see a
>> threat except for image format drivers have security bugs (corrupt
>> images leading to arbitrary code execution).
>
> User creates a raw image file.
>
> VM starts.  Image file gets probed, it's raw.  Guest has access to the
> complete file.
>
> Guest writes a valid QCOW2 image to it, chosen to include the full
> backing file in the resulting image contents.  Set the backing file to
> /secret/treasure.
>
> Reboot VM.  Image file gets probed, it's qcow2.  Backing file gets
> probed, it's raw.  Guest has access to the contents of QCOW2 image.
> This includes the backing file.  Oops.

I thought selinux/svirt was supposed to secure QEMU processes and
ensure they only have access to the resources they need?  If the
backing file doesn't belong to me then my QEMU process shouldn't be
able to open it.

Stefan

Re: [Qemu-devel] Re: RFC v2: blockdev_add & friends, brief rationale, QMP docs

2010-06-18 Thread Markus Armbruster

Kevin Wolf  writes:

> Am 17.06.2010 15:01, schrieb Anthony Liguori:
>> On 06/17/2010 03:20 AM, Kevin Wolf wrote:
>>> Am 16.06.2010 20:07, schrieb Anthony Liguori:
>>>
>But it requires that
> everything that -blockdev provides is accessible with -drive, too (or
> that we're okay with users hating us).
>
>
 I'm happy for -drive to die.  I think we should support -hda and
 -blockdev.

>>> -hda is not sufficient for most users. It doesn't provide any options.
>>> It doesn't even support virtio. If -drive is going to die (and we seem
>>> to agree all on that), then -blockdev needs to be usable for users (and
>>> it's only you who contradicts so far).
>>>
>> 
>> I've always thought we should have a -vda argument and an -sda argument 
>> specifically for specifying virtio and scsi disks.
>
> It would at least fix the most obvious problem. However, it still
> doesn't allow passing options.
>
 -blockdev should be optimized for config files, not single
 argument input.  IOW:

 [blockdev "blk2"]
format = "raw"
file = "/path/to/base.img"
cache = "writeback"

 [blockdev "blk1"]
 format = "qcow2"
 file = "/path/to/leaf.img"
 cache="off"
 backing_dev = "blk2"

 [device "disk1"]
 driver = "ide-drive"
 blockdev = "blk1"
 bus = "0"
 unit = "0"

>>> You don't specify the backing file of an image on the command line (or
>>> in the configuration file).
>> 
>> But we really ought to allow it.  Backing files are implemented as part 
>> of the core block layer, not the actual block formats.  
>
> The generic block layer knows the name of the backing file, so it can be
> displayed in tools, but that's about it. Calling this the
> "implementation" of backing files is daring.

You're both partly wrong :)

COW format drivers retrieve the "name" of the backing file from the
image on open, and store it in a well-known place in their
BlockDriverState.  qcow2 also retrieves the format.

Then generic block code opens the backing "file", and stores a pointer
to the backing BlockDriverState in the COW format's BlockDriverState.

The COW format driver then uses the backing "file" BlockDriverState
however it sees fit.

Generic code takes care of closing it, and also has a hand in commit and
encryption.

Aside: I put "name" and "file" in quotes, because the name is really an
encoding of protocol + arguments.  It works just like -drive with
file=NAME (and format=FORMAT if qcow2).  But for the COW format driver,
it's just a string (or two) it uses to get access to a backing image.
It's fine with any format + protocol combination, so it leaves that to
generic code.

Aside^2: We can define the semantics of QCOW2 however we please.  But is
it really sane to interpret a backing file name "fat:duck" we find in
some VMDK image as "vvfat over directory duck"?

> I see no use case for specifying it on the command line. The only thing
> you can achieve better with it is corrupting your image because you
> specify the wrong/no backing file next time.

Anthony has a point on the conceptual level: COW is a stacking of
BlockDriverStates.

But Kevin is right on usability.  We could permit overriding of backing
file on the command line / config file / QMP, but it would be a
dangerous expert feature, and no replacement for storing references to
backing files in the COW images.

>> Today the block 
>> layer queries the block format for the name of the backing file but gets 
>> no additional options from the block format.  File isn't necessarily 
>> enough information to successfully open the backing device so why treat 
>> it specially?

Parameters for opening a block device are flags, format, filename (which
is really an encoding of protocol + arguments).  So the only piece of
information that's missing for backing files is flags.

Flags encode -drive's parameters snapshot, cache, aio, readonly.  For
backing files, we force readonly on and snapshot off.  We inherit cache
and aio settings from the backed file.  Which of these do you want to
control separately?

>> I think we should keep the current ability to query the block format for 
>> a backing file name but we should also support hooking up the backing 
>> device without querying the block format at all.  It makes the model 
>> much more elegant IMHO because then we're just creating block devices 
>> and hooking them up.  All block devices are created equal more or less.
>> 
>>>   It's saved as part of the image. It's more
>>> like this (for a simple raw image file):
>>>
>>> [blockdev-protocol "proto1"]
>>> protocol = "file"
>>> file = "/path/to/image.img"
>>>
>>> [blockdev "blk1"]
>>> format = "raw"
>>> cache="off"
>>> protocol = "proto1"
>>>
>>> [device "disk1"]
>>> driver = "ide-drive"
>>> blockdev = "blk1"
>>> bus = "0"
>>> unit = "0"
>>>
>>> (This would be Markus' option 3, I thin

Re: [Qemu-devel] [RFC PATCH 1/5] qdev: Create qdev_get_dev_path()

2010-06-18 Thread Jan Kiszka

Alex Williamson wrote:
> On Wed, 2010-06-16 at 10:23 +0200, Markus Armbruster wrote:
>> Alex Williamson  writes:
>>
>>> On Tue, 2010-06-15 at 12:28 +0100, Paul Brook wrote:
>> Alex proposed to disambiguate by adding "identified properties of the
>> immediate parent bus and device" to the path component.  For PCI, these
>> are dev.fn.  Likewise for any other bus where devices have unambigous
>> bus address.  The driver name carries no information!
> From user POV, driver names are very handly to address a device
> intuitively - except for the case you have tones of devices on the same
> bus that are handled by the same driver. For that case we need to
> augment the device name with a useful per-bus ID, derived from the bus
> address where available, otherwise based on instance numbers.
 This is where I think you're missing a trick. We don't need to augment the 
 name, we just need to allow the bus id to be used instead.
>>> For the case of a hot remove, I agree.  If the user specifies "pci_del
>>> pci.0/03.0", that's completely sufficient because we don't care what's
>>> in that slot, just remove it.  However, I still see some use cases for
>>> device names in the path.  Take for example:
>>>
>>> (A): /i440FX-pcihost/pci.0/e1000.05.0
>>>
>>> vs
>>>
>>> (B): /pci.0/05.0
>>>
>>> (removing both the root bridge driver name and the device driver name)
>> / is the main system bus.  System bus defines no bus address at the
>> moment.  Therefore, you have to use the driver name i440FX-pcihost.
> 
> So is the general rule "If a device's parent bus does not provide an
> address, print device name"?

What is the device name?

> 
>> /i440FX-pcihost/pci.0 is the PCI bus.  PCI bus defines a bus address:
>> dev.fn.  Therefore, you can either use the bus address @05.0, or the
>> driver name e1000.
>>
>> We have "/i440FX-pcihost/pci.0/e1000" vs. "/i440FX-pcihost/pci.0/@05.0".
> 
> I object to being able to use anything but an address under a bus that
> supports hotplug, but that aside, I think the paths for my example
> system look like below.  Note that anything behind and including the $
> is not the canonical path, that begins the free form, usage specific
> string, here filled by missing device names (open to suggestions other
> than $ here).

What about ":" instead of "$"? Visually more appealing IMO.

>  Note that were isa devices do not have a bus identifier,
> I'm using the device name,

Don't understand. Why don't they have their I/O base as prefix? This is
inconsistent, and if we consider anything behind "$" (or ":")
informative, the bus address is mandatory for any device (on a bus with
an addressing scheme).

> which I think follows the same rule as
> i440FX-pcihost.  Can we agree this is the goal?  Thanks,

It looks almost acceptable from my POV. We just need to define how to
get hold of devices on buses without an addressing scheme (e.g. the
system bus). Using the driver name is insufficient once you have > 1
devices of the same type. Introducing device instance numbers to those
buses would be straightforward IMO.

Once this is done, the next step should be a specification for later use
in docs/qdev-device-use.txt.

Jan

> 
> Alex
> 
> /i440FX-pcihost/pci.0/@00.0$i440FX
> /i440FX-pcihost/pci.0/@01.0$PIIX3
> /i440FX-pcihost/pci.0/@02.0$cirrus-vga
> /i440FX-pcihost/pci.0/@01.0/isa.0/mc146818rtc
> /i440FX-pcihost/pci.0/@01.0/isa.0/@0x3f8$isa-serial
> /i440FX-pcihost/pci.0/@01.0/isa.0/@0x378$isa-parallel
> /i440FX-pcihost/pci.0/@01.0/isa.0/i8042
> /i440FX-pcihost/pci.0/@01.0/isa.0/isa-fdc
> /i440FX-pcihost/pci.0/@03.0$i82551
> /i440FX-pcihost/pci.0/@04.0$virtio-net-pci
> /i440FX-pcihost/pci.0/@05.0$e1000
> /i440FX-pcihost/pci.0/@06.0$rtl8139
> /i440FX-pcihost/pci.0/@07.0$pcnet
> /i440FX-pcihost/pci.0/@01.0/isa.0/@0x300$ne2k_isa
> /i440FX-pcihost/pci.0/@01.1$piix3-ide
> /i440FX-pcihost/pci.0/@01.1/ide.0/@0$ide-drive
> /i440FX-pcihost/pci.0/@01.1/ide.1/@0$ide-drive
> /i440FX-pcihost/pci.0/@01.2$piix3-usb-uhci
> /i440FX-pcihost/pci.0/@01.3$PIIX4_PM
> /i440FX-pcihost/pci.0/@01.3/i2c/@80$smbus-eeprom
> /i440FX-pcihost/pci.0/@01.3/i2c/@81$smbus-eeprom
> /i440FX-pcihost/pci.0/@01.3/i2c/@82$smbus-eeprom
> /i440FX-pcihost/pci.0/@01.3/i2c/@83$smbus-eeprom
> /i440FX-pcihost/pci.0/@01.3/i2c/@84$smbus-eeprom
> /i440FX-pcihost/pci.0/@01.3/i2c/@85$smbus-eeprom
> /i440FX-pcihost/pci.0/@01.3/i2c/@86$smbus-eeprom
> /i440FX-pcihost/pci.0/@01.3/i2c/@87$smbus-eeprom
> /i440FX-pcihost/pci.0/@08.0/scsi.0/@0$scsi-disk
> /i440FX-pcihost/pci.0/@08.0/scsi.0/@3$scsi-disk
> /i440FX-pcihost/pci.0/@08.0$lsi53c895a
> /i440FX-pcihost/pci.0/@01.2/usb.0/@usb0/scsi.0/@0$scsi-disk
> /i440FX-pcihost/pci.0/@01.2/usb.0/@usb0$usb-storage
> /i440FX-pcihost/pci.0/@09.0$virtio-blk-pci
> 

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux

Re: [Qemu-devel] RFC v3: blockdev_add & friends, brief rationale, QMP docs

2010-06-18 Thread Markus Armbruster

Stefan Hajnoczi  writes:

> On Fri, Jun 18, 2010 at 8:27 AM, Markus Armbruster  wrote:
>> Stefan Hajnoczi  writes:
>>> On Thu, Jun 17, 2010 at 1:49 PM, Markus Armbruster  
>>> wrote:
 Stefan Hajnoczi  writes:
> On Wed, Jun 16, 2010 at 6:27 PM, Markus Armbruster  
> wrote:
 To let users ask for this explicitely, we could have pseudo-format
 "auto".

 We also need a pseudo-format "probe", which guesses the format from the
 image contents.  Can't be made the default, because it's insecure.
>>>
>>> In which scenario is probing the image format a security issue?  I'm
>>> trying to think up scenarios where a cloud user modifies the guest
>>> disk image and gets QEMU to re-open the image file as another format,
>>> perhaps this would make the cloud owner/admin unhappy.  I don't see a
>>> threat except for image format drivers have security bugs (corrupt
>>> images leading to arbitrary code execution).
>>
>> User creates a raw image file.
>>
>> VM starts.  Image file gets probed, it's raw.  Guest has access to the
>> complete file.
>>
>> Guest writes a valid QCOW2 image to it, chosen to include the full
>> backing file in the resulting image contents.  Set the backing file to
>> /secret/treasure.
>>
>> Reboot VM.  Image file gets probed, it's qcow2.  Backing file gets
>> probed, it's raw.  Guest has access to the contents of QCOW2 image.
>> This includes the backing file.  Oops.
>
> I thought selinux/svirt was supposed to secure QEMU processes and
> ensure they only have access to the resources they need?  If the
> backing file doesn't belong to me then my QEMU process shouldn't be
> able to open it.

Yes, SELinux will protect you from security holes in applications.
Protecting from != fixing of :)

Re: [Qemu-devel] Re: RFC v2: blockdev_add & friends, brief rationale, QMP docs

2010-06-18 Thread Kevin Wolf

Am 18.06.2010 10:20, schrieb Markus Armbruster:
> -blockdev should be optimized for config files, not single
> argument input.  IOW:
>
> [blockdev "blk2"]
>format = "raw"
>file = "/path/to/base.img"
>cache = "writeback"
>
> [blockdev "blk1"]
> format = "qcow2"
> file = "/path/to/leaf.img"
> cache="off"
> backing_dev = "blk2"
>
> [device "disk1"]
> driver = "ide-drive"
> blockdev = "blk1"
> bus = "0"
> unit = "0"
>  
 You don't specify the backing file of an image on the command line (or
 in the configuration file).
>>>
>>> But we really ought to allow it.  Backing files are implemented as part 
>>> of the core block layer, not the actual block formats.  
>>
>> The generic block layer knows the name of the backing file, so it can be
>> displayed in tools, but that's about it. Calling this the
>> "implementation" of backing files is daring.
> 
> You're both partly wrong :)

Yes, it was not entirely correct. What I was thinking of (and it is
probably what I should have written) is that the real COW implementation
is part of the driver and not generic infrastructure. If the format
driver doesn't do COW, there's no use in specifying a backing file to
block.c.

> COW format drivers retrieve the "name" of the backing file from the
> image on open, and store it in a well-known place in their
> BlockDriverState.  qcow2 also retrieves the format.
> 
> Then generic block code opens the backing "file", and stores a pointer
> to the backing BlockDriverState in the COW format's BlockDriverState.
> 
> The COW format driver then uses the backing "file" BlockDriverState
> however it sees fit.
> 
> Generic code takes care of closing it, and also has a hand in commit and
> encryption.
> 
> Aside: I put "name" and "file" in quotes, because the name is really an
> encoding of protocol + arguments.  It works just like -drive with
> file=NAME (and format=FORMAT if qcow2).  But for the COW format driver,
> it's just a string (or two) it uses to get access to a backing image.
> It's fine with any format + protocol combination, so it leaves that to
> generic code.
> 
> Aside^2: We can define the semantics of QCOW2 however we please.  But is
> it really sane to interpret a backing file name "fat:duck" we find in
> some VMDK image as "vvfat over directory duck"?

Good point, though I don't think it's a real problem. But it's true that
we use VMDK in a very qemu-specific way. We can read VMDKs that VMware
can't read (with a qcow2 backing file, for example), but in the same way
it may happen that we interpret a file name wrong if it comes from
VMware rather than qemu. Disabling protocol parsing here would disable
additional features that were possible with qemu - but to be honest, I
don't think that either option is a real problem.

>> I see no use case for specifying it on the command line. The only thing
>> you can achieve better with it is corrupting your image because you
>> specify the wrong/no backing file next time.
> 
> Anthony has a point on the conceptual level: COW is a stacking of
> BlockDriverStates.

It is. But it's not the same stacking as the format/protocol one, it's a
second dimension. I doubt that it makes sense to let the user specify
this stacking on the command line.

> But Kevin is right on usability.  We could permit overriding of backing
> file on the command line / config file / QMP, but it would be a
> dangerous expert feature, and no replacement for storing references to
> backing files in the COW images.

I even refuse to call it a feature as long as you can't tell me a use
case. I think it's useless - there are cases that are already possible
with having the backing file name stored in the image file and you just
duplicate it on the command line; and the other cases don't even work.

>>> Today the block 
>>> layer queries the block format for the name of the backing file but gets 
>>> no additional options from the block format.  File isn't necessarily 
>>> enough information to successfully open the backing device so why treat 
>>> it specially?
> 
> Parameters for opening a block device are flags, format, filename (which
> is really an encoding of protocol + arguments).  So the only piece of
> information that's missing for backing files is flags.
> 
> Flags encode -drive's parameters snapshot, cache, aio, readonly.  For
> backing files, we force readonly on and snapshot off.  We inherit cache
> and aio settings from the backed file.  Which of these do you want to
> control separately?
> 
>>> I think we should keep the current ability to query the block format for 
>>> a backing file name but we should also support hooking up the backing 
>>> device without querying the block format at all.  It makes the model 
>>> much more elegant IMHO because then we're just creating block devices 
>>> and hooking them up.  All block devices are created equal more or less.
>>>

Re: [Qemu-devel] Re: [PATCH] ceph/rbd block driver for qemu-kvm (v3)

2010-06-18 Thread Kevin Wolf

Am 17.06.2010 21:05, schrieb Christian Brunner:
> Hi Simone,
> 
> sorry for the late reply. I've been on vacation for a week.
> 
> Thanks for sending the patch. At first sight your patch looks good.
> I'll do some testing by the weekend.
> 
> Kevin also sent me a note about the missing aio support, but I didn't
> have the time to implement it yet. Now it seems, that I don't have to
> do it, since you where quicker... :)

Are you going to send a final version which includes Simone's patch or
should I apply them as two patches and just accept that rbd is broken
after the first one? Or were there any other problems that need to be
solved first?

Kevin

[Qemu-devel] [PATCH 3/5] - preparing for release 0.6c

2010-06-18 Thread Gerd Hoffmann

From: Volker Ruppert 

---
 ChangeLog |   35 +++
 README|6 ++
 2 files changed, 41 insertions(+), 0 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 75be5bd..35bf00a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,38 @@
+2009-01-25 16:46  vruppert
+
+   * vbe.c (1.62), vbe.h (1.28), vbetables-gen.c (1.5):
+
+   - added support for a lot more non-standard VBE modes (e.g. widescreen 
modes)
+   - requires latest Bochs VBE code (16 MB video memory, VBE_DISPI_ID5, 
VRAM size
+ in 64k pages stored in VBE register)
+   - check if VBE mode is supported with current VRAM size
+
+2009-01-24 11:02  vruppert
+
+   * clext.c (1.14), vbe.c (1.61), vgabios.c (1.68):
+
+   - use VBE LFB address from PCI base address if present (rewrite of the 
cirrus
+ specific function in main vgabios code)
+   - removed unnecessary spaces
+
+2008-12-14 09:29  vruppert
+
+   * clext.c (1.13):
+
+   - added DPMS support to cirrus vgabios (patch from Gleb Natapov)
+
+2008-05-30 17:28  vruppert
+
+   * README (1.16):
+
+   - updated for release 0.6b
+
+2008-05-22 12:55  vruppert
+
+   * ChangeLog (1.27), README (1.15):
+
+   - preparations for release 0.6b
+
 2008-05-11 08:40  vruppert
 
* biossums.c (1.6):
diff --git a/README b/README
index 90141d4..ce67aeb 100644
--- a/README
+++ b/README
@@ -90,6 +90,12 @@ For any information on qemu, visit the website 
http://fabrice.bellard.free.fr/qe
 
 History
 ---
+vgabios-0.6c : not yet released
+  - Volker
+. added DPMS support to cirrus vgabios (patch from Gleb Natapov)
+. use VBE LFB address from PCI base address if present
+. added support for a lot more non-standard VBE modes (e.g. widescreen 
modes)
+
 vgabios-0.6b : May 30 2008
   - Volker
 . added PCI data structure for the Cirrus VGABIOS images
-- 
1.6.5.2

[Qemu-devel] [PATCH 1/5] - use VBE LFB address from PCI base address if present (rewrite of the cirrus specific function in main vgabios code) - removed unnecessary spaces

2010-06-18 Thread Gerd Hoffmann

From: Volker Ruppert 

---
 clext.c   |   51 ++-
 vbe.c |   59 ---
 vgabios.c |   58 ++
 3 files changed, 92 insertions(+), 76 deletions(-)

diff --git a/clext.c b/clext.c
index c7a2ad0..b0b6834 100644
--- a/clext.c
+++ b/clext.c
@@ -948,7 +948,8 @@ cirrus_vesa_01h_3:
   ;; 32-bit LFB address
   xor ax, ax
   stosw
-  call cirrus_get_lfb_addr
+  mov ax, #0x1013 ;; vendor Cirrus
+  call _pci_get_lfb_addr
   stosw
   or ax, ax
   jz cirrus_vesa_01h_4
@@ -1293,54 +1294,6 @@ cgm_2:
 cgm_3:
   ret
 
-  ; get LFB address
-  ; out - ax:LFB address (high 16 bit)
-  ;; NOTE - may be called in protected mode
-cirrus_get_lfb_addr:
-  push cx
-  push dx
-  push eax
-xor cx, cx
-mov dl, #0x00
-call cirrus_pci_read
-cmp ax, #0x
-jz cirrus_get_lfb_addr_5
- cirrus_get_lfb_addr_3:
-mov dl, #0x00
-call cirrus_pci_read
-cmp ax, #0x1013 ;; cirrus
-jz cirrus_get_lfb_addr_4
-add cx, #0x8
-cmp cx, #0x200 ;; search bus #0 and #1
-jb cirrus_get_lfb_addr_3
- cirrus_get_lfb_addr_5:
-xor dx, dx ;; no LFB
-jmp cirrus_get_lfb_addr_6
- cirrus_get_lfb_addr_4:
-mov dl, #0x10 ;; I/O space #0
-call cirrus_pci_read
-test ax, #0xfff1
-jnz cirrus_get_lfb_addr_5
-shr eax, #16
-mov dx, ax ;; LFB address
- cirrus_get_lfb_addr_6:
-  pop eax
-  mov ax, dx
-  pop dx
-  pop cx
-  ret
-
-cirrus_pci_read:
-  mov eax, #0x0080
-  mov ax, cx
-  shl eax, #8
-  mov al, dl
-  mov dx, #0xcf8
-  out dx, eax
-  add dl, #4
-  in  eax, dx
-  ret
-
 ;; out - al:bytes per pixel
 cirrus_get_bpp_bytes:
   push dx
diff --git a/vbe.c b/vbe.c
index 6173ca0..92e3d0d 100644
--- a/vbe.c
+++ b/vbe.c
@@ -766,9 +766,9 @@ Bit16u *AX;Bit16u ES;Bit16u DI;
 Bit16ucur_mode=0;
 Bit16ucur_ptr=34;
 ModeInfoListItem  *cur_info=&mode_info_list;
-
+
 status = read_word(ss, AX);
-
+
 #ifdef DEBUG
 printf("VBE vbe_biosfn_return_vbe_info ES%x DI%x AX%x\n",ES,DI,status);
 #endif
@@ -784,7 +784,7 @@ Bit16u *AX;Bit16u ES;Bit16u DI;
  (vbe_info_block.VbeSignature[1] == 'B') &&
  (vbe_info_block.VbeSignature[2] == 'E') &&
  (vbe_info_block.VbeSignature[3] == '2')) ||
- 
+
 ((vbe_info_block.VbeSignature[0] == 'V') &&
  (vbe_info_block.VbeSignature[1] == 'E') &&
  (vbe_info_block.VbeSignature[2] == 'S') &&
@@ -796,20 +796,20 @@ Bit16u *AX;Bit16u ES;Bit16u DI;
 #endif
 }
 #endif
-
+
 // VBE Signature
 vbe_info_block.VbeSignature[0] = 'V';
 vbe_info_block.VbeSignature[1] = 'E';
 vbe_info_block.VbeSignature[2] = 'S';
 vbe_info_block.VbeSignature[3] = 'A';
-
+
 // VBE Version supported
 vbe_info_block.VbeVersion = 0x0200;
-
+
 // OEM String
 vbe_info_block.OemStringPtr_Seg = 0xc000;
 vbe_info_block.OemStringPtr_Off = &vbebios_copyright;
-
+
 // Capabilities
 vbe_info_block.Capabilities[0] = VBE_CAPABILITY_8BIT_DAC;
 vbe_info_block.Capabilities[1] = 0;
@@ -824,7 +824,7 @@ Bit16u *AX;Bit16u ES;Bit16u DI;
 vbe_info_block.TotalMemory = VBE_TOTAL_VIDEO_MEMORY_DIV_64K;
 
 if (vbe2_info)
-   {
+{
 // OEM Stuff
 vbe_info_block.OemSoftwareRev = VBE_OEM_SOFTWARE_REV;
 vbe_info_block.OemVendorNamePtr_Seg = 0xc000;
@@ -837,12 +837,12 @@ Bit16u *AX;Bit16u ES;Bit16u DI;
 // copy updates in vbe_info_block back
 memcpyb(ES, DI, ss, &vbe_info_block, sizeof(vbe_info_block));
 }
-   else
-   {
+else
+{
 // copy updates in vbe_info_block back (VBE 1.x compatibility)
 memcpyb(ES, DI, ss, &vbe_info_block, 256);
-   }
-
+}
+
 do
 {
 if ((cur_info->info.XResolution <= dispi_get_max_xres()) &&
@@ -860,7 +860,7 @@ Bit16u *AX;Bit16u ES;Bit16u DI;
 }
 cur_info++;
 } while (cur_info->mode != VBE_VESA_MODE_END_OF_LIST);
-
+
 // Add vesa mode list terminator
 write_word(ES, DI + cur_ptr, cur_info->mode);
 
@@ -888,32 +888,37 @@ Bit16u *AX;Bit16u CX; Bit16u ES;Bit16u DI;
 ModeInfoBlock info;
 ModeInfoListItem  *cur_info;
 Boolean   using_lfb;
+Bit16ulfb_addr;
 
 #ifdef DEBUG
 printf("VBE vbe_biosfn_return_mode_information ES%x DI%x 
CX%x\n",ES,DI,CX);
 #endif
 
 using_lfb=((CX & VBE_MODE_LINEAR_FRAME_BUFFER) == 
VBE_MODE_LINEAR_FRAME_BUFFER);
-
+
 CX = (CX & 0x1ff);
-
+
 cur_info = mode_info_find_mode(CX, using_lfb, &cur_info);
 
 if (cur_info != 0)
 {
 #ifdef DEBUG

[Qemu-devel] [PATCH 4/5] - biosfn_write_teletype: fixed attribute when scrolling in text mode

2010-06-18 Thread Gerd Hoffmann

From: Volker Ruppert 

---
 vgabios.c |6 --
 1 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/vgabios.c b/vgabios.c
index fbc3588..ea3aae8 100644
--- a/vgabios.c
+++ b/vgabios.c
@@ -2039,7 +2039,9 @@ Bit8u car;Bit8u page;Bit8u attr;Bit8u flag;
   {
if(vga_modes[line].class==TEXT)
 {
- biosfn_scroll(0x01,0x07,0,0,nbrows-1,nbcols-1,page,SCROLL_UP);
+ address=SCREEN_MEM_START(nbcols,nbrows,page)+(xcurs+(ycurs-1)*nbcols)*2;
+ attr=read_byte(vga_modes[line].sstart,address+1);
+ biosfn_scroll(0x01,attr,0,0,nbrows-1,nbcols-1,page,SCROLL_UP);
 }
else
 {
@@ -2047,7 +2049,7 @@ Bit8u car;Bit8u page;Bit8u attr;Bit8u flag;
 }
ycurs-=1;
   }
- 
+
  // Set the cursor for the page
  cursor=ycurs; cursor<<=8; cursor+=xcurs;
  biosfn_set_cursor_pos(page,cursor);
-- 
1.6.5.2

[Qemu-devel] [PATCH 2/5] - added support for a lot more non-standard VBE modes (e.g. widescreen modes) - requires latest Bochs VBE code (16 MB video memory, VBE_DISPI_ID5, VRAM size in 64k pages store

2010-06-18 Thread Gerd Hoffmann

From: Volker Ruppert 

---
 vbe.c   |   31 ++--
 vbe.h   |   70 --
 vbetables-gen.c |   43 +
 3 files changed, 91 insertions(+), 53 deletions(-)

diff --git a/vbe.c b/vbe.c
index 92e3d0d..ecff90d 100644
--- a/vbe.c
+++ b/vbe.c
@@ -38,8 +38,6 @@
 #include "vbe.h"
 #include "vbetables.h"
 
-#define VBE_TOTAL_VIDEO_MEMORY_DIV_64K 
(VBE_DISPI_TOTAL_VIDEO_MEMORY_MB*1024/64)
-
 // The current OEM Software Revision of this VBE Bios
 #define VBE_OEM_SOFTWARE_REV 0x0002;
 
@@ -715,7 +713,7 @@ vbe_init:
   mov  [bx], al
   pop  bx
   pop  ds
-  mov  ax, # VBE_DISPI_ID4
+  mov  ax, # VBE_DISPI_ID5
   call dispi_set_id
 no_vbe_interface:
 #if defined(USE_BX_INFO) || defined(DEBUG)
@@ -742,7 +740,19 @@ no_vbe_flag:
   mov  ds, ax
   mov  si, #_no_vbebios_info_string
   jmp  _display_string
-ASM_END  
+
+; helper function for memory size calculation
+
+lmulul:
+  and eax, #0x
+  shl ebx, #16
+  or  eax, ebx
+  SEG SS
+  mul eax, dword ptr [di]
+  mov ebx, eax
+  shr ebx, #16
+  ret
+ASM_END
 
 /** Function 00h - Return VBE Controller Information
  * 
@@ -765,6 +775,7 @@ Bit16u *AX;Bit16u ES;Bit16u DI;
 Bit16uvbe2_info;
 Bit16ucur_mode=0;
 Bit16ucur_ptr=34;
+Bit16usize_64k;
 ModeInfoListItem  *cur_info=&mode_info_list;
 
 status = read_word(ss, AX);
@@ -820,8 +831,9 @@ Bit16u *AX;Bit16u ES;Bit16u DI;
 vbe_info_block.VideoModePtr_Seg= ES ;
 vbe_info_block.VideoModePtr_Off= DI + 34;
 
-// VBE Total Memory (in 64b blocks)
-vbe_info_block.TotalMemory = VBE_TOTAL_VIDEO_MEMORY_DIV_64K;
+// VBE Total Memory (in 64k blocks)
+outw(VBE_DISPI_IOPORT_INDEX, VBE_DISPI_INDEX_VIDEO_MEMORY_64K);
+vbe_info_block.TotalMemory = inw(VBE_DISPI_IOPORT_DATA);
 
 if (vbe2_info)
 {
@@ -845,8 +857,11 @@ Bit16u *AX;Bit16u ES;Bit16u DI;
 
 do
 {
+size_64k = (Bit16u)((Bit32u)cur_info->info.XResolution * 
cur_info->info.XResolution * cur_info->info.BitsPerPixel) >> 19;
+
 if ((cur_info->info.XResolution <= dispi_get_max_xres()) &&
-(cur_info->info.BitsPerPixel <= dispi_get_max_bpp())) {
+(cur_info->info.BitsPerPixel <= dispi_get_max_bpp()) &&
+(size_64k <= vbe_info_block.TotalMemory)) {
 #ifdef DEBUG
   printf("VBE found mode %x => %x\n", cur_info->mode,cur_mode);
 #endif
@@ -855,7 +870,7 @@ Bit16u *AX;Bit16u ES;Bit16u DI;
   cur_ptr+=2;
 } else {
 #ifdef DEBUG
-  printf("VBE mode %x (xres=%x / bpp=%02x) not supported by 
display\n", 
cur_info->mode,cur_info->info.XResolution,cur_info->info.BitsPerPixel);
+  printf("VBE mode %x (xres=%x / bpp=%02x) not supported \n", 
cur_info->mode,cur_info->info.XResolution,cur_info->info.BitsPerPixel);
 #endif
 }
 cur_info++;
diff --git a/vbe.h b/vbe.h
index 60434ac..72cb045 100644
--- a/vbe.h
+++ b/vbe.h
@@ -275,39 +275,41 @@ typedef struct ModeInfoListItem
 //like 0xE000
 
 
-  #define VBE_DISPI_BANK_ADDRESS  0xA
-  #define VBE_DISPI_BANK_SIZE_KB  64
-  
-  #define VBE_DISPI_MAX_XRES  1024
-  #define VBE_DISPI_MAX_YRES  768
-  
-  #define VBE_DISPI_IOPORT_INDEX  0x01CE
-  #define VBE_DISPI_IOPORT_DATA   0x01CF
-  
-  #define VBE_DISPI_INDEX_ID  0x0
-  #define VBE_DISPI_INDEX_XRES0x1
-  #define VBE_DISPI_INDEX_YRES0x2
-  #define VBE_DISPI_INDEX_BPP 0x3
-  #define VBE_DISPI_INDEX_ENABLE  0x4
-  #define VBE_DISPI_INDEX_BANK0x5
-  #define VBE_DISPI_INDEX_VIRT_WIDTH  0x6
-  #define VBE_DISPI_INDEX_VIRT_HEIGHT 0x7
-  #define VBE_DISPI_INDEX_X_OFFSET0x8
-  #define VBE_DISPI_INDEX_Y_OFFSET0x9
-  
-  #define VBE_DISPI_ID0   0xB0C0
-  #define VBE_DISPI_ID1   0xB0C1
-  #define VBE_DISPI_ID2   0xB0C2
-  #define VBE_DISPI_ID3   0xB0C3
-  #define VBE_DISPI_ID4   0xB0C4
-  
-  #define VBE_DISPI_DISABLED  0x00
-  #define VBE_DISPI_ENABLED   0x01
-  #define VBE_DISPI_GETCAPS   0x02
-  #define VBE_DISPI_8BIT_DAC  0x20
-  #define VBE_DISPI_LFB_ENABLED   0x40
-  #define VBE_DISPI_NOCLEARMEM0x80
-  
-  #define VBE_DISPI_LFB_PHYSICAL_ADDRESS  0xE000
+  #define VBE_DISPI_BANK_ADDRESS   0xA
+  #define VBE_DISPI_BANK_SIZE_KB   64
+
+  #define VBE_DISPI_MAX_XRES   2560
+  #define VBE_DISPI_MAX_YRES   1600
+
+  #define VBE_DISPI_IOPORT_INDEX   0x01CE
+  #define VBE_DISPI_IOPORT_DATA0x01CF
+
+  #define VBE_DISPI_INDEX_ID   0x0
+  #define VBE_DIS

[Qemu-devel] [PATCH 5/5] - updates for release 0.6c

2010-06-18 Thread Gerd Hoffmann

From: Volker Ruppert 

---
 ChangeLog |   12 
 README|3 ++-
 2 files changed, 14 insertions(+), 1 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 35bf00a..dbaed5d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2009-04-07 20:18  vruppert
+
+   * vgabios.c (1.69):
+
+   - biosfn_write_teletype: fixed attribute when scrolling in text mode
+
+2009-04-06 20:17  vruppert
+
+   * ChangeLog (1.28), README (1.17):
+
+   - preparing for release 0.6c
+
 2009-01-25 16:46  vruppert
 
* vbe.c (1.62), vbe.h (1.28), vbetables-gen.c (1.5):
diff --git a/README b/README
index ce67aeb..c68b573 100644
--- a/README
+++ b/README
@@ -90,11 +90,12 @@ For any information on qemu, visit the website 
http://fabrice.bellard.free.fr/qe
 
 History
 ---
-vgabios-0.6c : not yet released
+vgabios-0.6c : Apr 08 2009
   - Volker
 . added DPMS support to cirrus vgabios (patch from Gleb Natapov)
 . use VBE LFB address from PCI base address if present
 . added support for a lot more non-standard VBE modes (e.g. widescreen 
modes)
+. minor bugfixes
 
 vgabios-0.6b : May 30 2008
   - Volker
-- 
1.6.5.2

[Qemu-devel] [PATCH 0/5] update vgabios to v0.6c

2010-06-18 Thread Gerd Hoffmann

  Hi,

This patch series updates the vgabios git tree at
git://git.qemu.org/vgabios.git to vgabios v0.6c.

Patches are creates using 'git cvsimport' with some cleanups afterwards:
drop binary updates, drop cvs $Id$ changes from commits.  Replace
'vruppert' with Volkers real name.

Note that vgabios v0.6c depends on a newer revision of the bochs vbe
interface which is only supported in the qemu devel versions but not in
the 0.12.x stable branch.

please apply,
  Gerd

Volker Ruppert (5):
  - use VBE LFB address from PCI base address if present (rewrite of
the cirrus specific function in main vgabios code) - removed
unnecessary spaces
  - added support for a lot more non-standard VBE modes (e.g.
widescreen modes) - requires latest Bochs VBE code (16 MB video
memory, VBE_DISPI_ID5, VRAM size in 64k pages stored in VBE
register) - check if VBE mode is supported with current VRAM size
  - preparing for release 0.6c
  - biosfn_write_teletype: fixed attribute when scrolling in text mode
  - updates for release 0.6c

 ChangeLog   |   47 
 README  |7 
 clext.c |   51 +--
 vbe.c   |   90 +-
 vbe.h   |   70 ++-
 vbetables-gen.c |   43 +++---
 vgabios.c   |   64 ++-
 7 files changed, 241 insertions(+), 131 deletions(-)

[Qemu-devel] [Bug 595906] [NEW] [ARM] All variants of ADDSUBX, SUBADDX give incorrect results

2010-06-18 Thread Bradley Smith

Public bug reported:

All variants of the ADDSUBX/SUBADDX instructions seem to be implemented
incorrectly, i.e.

MOV r12, #0
LDR r0, =0x18004800
LDR r1, =0x30006000
QADDSUBX r12, r0, r1; Should give 0x78001800 - gives 0x4800e800

This happens with latest git HEAD.

** Affects: qemu
 Importance: Undecided
 Status: New

-- 
[ARM] All variants of ADDSUBX,SUBADDX give incorrect results
https://bugs.launchpad.net/bugs/595906
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.

Status in QEMU: New

Bug description:
All variants of the ADDSUBX/SUBADDX instructions seem to be implemented 
incorrectly, i.e.

MOV r12, #0
LDR r0, =0x18004800
LDR r1, =0x30006000
QADDSUBX r12, r0, r1; Should give 0x78001800 - gives 0x4800e800

This happens with latest git HEAD.

Re: [Qemu-devel] Re: [PATCH 5/5] linux fbdev display driver.

2010-06-18 Thread Julian Pidancet

On 06/18/2010 08:32 AM, Gerd Hoffmann wrote:
>Hi,
> 
>> For some reason, the display is extremely slow when using vnc and
>> fbdev at the same time.
> 
> Gotcha.  Didn't notice, but it probably depends on the hardware.  Very 
> likely the reason is that graphic cards usually are optimized for write 
> access and reads might be slow as hell.   vnc must read though.
> 

Access to the framebuffer are cached Write-Combining by default with fbdev, 
which is probably causing this latency.

One solution would be to disable the display allocator when vnc is present, and 
let it read from a software surface instead of reading from the framebuffer 
(like in your initial patch). It would probably decrease display performance, 
but not as much as it is now if we let the vnc driver read from the hardware 
framebuffer.

We can easily implement a surface usage counter in the display allocator code, 
so the fbdev driver can know whether or not the surface is read by other 
drivers at the same time.

-- 
Julian Pidancet

[Qemu-devel] Re: [PATCH 2/3] Monitor command 'info trace'

2010-06-18 Thread Prerna Saxena


Hi Stefan, Jan,
Thanks for taking a look.

On 06/17/2010 08:38 PM, Stefan Hajnoczi wrote:

On Wed, Jun 16, 2010 at 06:12:06PM +0530, Prerna Saxena wrote:

diff --git a/simpletrace.c b/simpletrace.c
index 2fec4d3..239ae3f 100644
--- a/simpletrace.c
+++ b/simpletrace.c
@@ -62,3 +62,16 @@ void trace4(TraceEvent event, unsigned long x1, unsigned 
long x2, unsigned long
  void trace5(TraceEvent event, unsigned long x1, unsigned long x2, unsigned 
long x3, unsigned long x4, unsigned long x5) {
  trace(event, x1, x2, x3, x4, x5);
  }
+
+void do_info_trace(Monitor *mon)
+{
+unsigned int i, max_idx;
+
+max_idx = trace_idx ? trace_idx : TRACE_BUF_LEN;


trace_idx is always in the range [0, TRACE_BUF_LEN).  There is no need
to perform this test.


I only display the logged contents in the trace buffer (till trace_idx) 
, and not the entire trace buffer. Only when the index is full that the 
entire buffer is displayed.





+
+for (i=0; i

Whitespace "i=0; i

I'll fix this.


+monitor_printf(mon, "Event %ld : %ld %ld %ld %ld %ld\n",
+  trace_buf[i].event, trace_buf[i].x1, trace_buf[i].x2,
+trace_buf[i].x3, trace_buf[i].x4, trace_buf[i].x5);


Getting only numeric output is the limitation of a binary trace.  It
would probably be possible to pretty-print without much additional code
by using the format strings from the trace-events file.

I think the numeric dump is good for now though.  Hex is more compact
than decimal and would make pointers easier to spot.  Want to change
this?



I agree, but we can let this be a todo till after the first prototype 
goes upstream ?



+}
+}
diff --git a/tracetool b/tracetool
index 9ea9c08..2c73bab 100755
--- a/tracetool
+++ b/tracetool
@@ -130,6 +130,7 @@ void trace2(TraceEvent event, unsigned long x1, unsigned 
long x2);
  void trace3(TraceEvent event, unsigned long x1, unsigned long x2, unsigned 
long x3);
  void trace4(TraceEvent event, unsigned long x1, unsigned long x2, unsigned 
long x3, unsigned long x4);
  void trace5(TraceEvent event, unsigned long x1, unsigned long x2, unsigned 
long x3, unsigned long x4, unsigned long x5);
+void do_info_trace(Monitor *mon);
  EOF

  simple_event_num=0
@@ -289,6 +290,7 @@ tracetoh()
  #define TRACE_H

  #include "qemu-common.h"
+#include "monitor.h"


qemu-common.h forward-declares Monitor, I don't think you need
monitor.h.



Right.


Stefan


I'll post patches by Monday that addresses your suggestions, and try to 
get it integrated with QMP.


--
Prerna Saxena

Linux Technology Centre,
IBM Systems and Technology Lab,
Bangalore, India

[Qemu-devel] Virtualization at Plumbers 2010 - Time to submit your proposals!

2010-06-18 Thread Jes Sorensen

Hi,

I would like to remind people about the Virtualization track at Linux
Plumbers Conference 2010, held in Cambridge, MA, November 3-5, 2010.

Please note the deadline for submissions is July 19, 2010.

LPC is particular well suited for technical presentations, work in
progress and subjects that needs discussion and collaboration between
communities (kernel, desktop/gfx, virtualization, etc.), so if you have
a contentious issue you would like to bring to a wider audience, this is
the ideal place to do it!

Note that this track is focusing on general Linux Virtualization, it
is not hypervisor specific. Submissions related to Xen, KVM, VMware,
containers, etc. are encouraged. Subjects could include:

 - Linux Kernel Virtualization enhancements
 - QEMU
 - IO performance work
 - Device management, hotplug
 - NUMA awareness
 - Live migration
 - Support for new hardware features, and/or provide guest access to
   these features.
 - Device emulation
 - Para-virtual enhancements: special filesystems, PMU, Windows
   drivers, etc.
 - Debugging and analysis tools
 - Containers
 - QMP and Spice
 - Virtualization management, user interfaces, and desktop integration
   (GNOME, KDE, etc)

If you have a subject you would like to present, please submit it as
soon as possible, and no later than July 19th. Please see the full Call
For Papers at http://www.linuxplumbersconf.org/2010/ for how to submit.

You may also want to list your ideas at the LPC Virtualization session
wiki page at http://wiki.linuxplumbersconf.org/2010:virtualization

Hope to see you in Cambridge in November!

Jes

Re: [Qemu-devel] Re: [PATCH 3/3] Toggle tracepoint state

2010-06-18 Thread Prerna Saxena


On 06/17/2010 09:33 PM, Stefan Hajnoczi wrote:

On Wed, Jun 16, 2010 at 06:14:35PM +0530, Prerna Saxena wrote:

This patch adds support for dynamically enabling/disabling of tracepoints.
This is done by internally maintaining each tracepoint's state, and
permitting logging of data from a tracepoint only if it is in an
'active' state.


  typedef struct {
+char *tp_name;
+bool state;
+unsigned int hash;
+} Tracepoint;


The tracing infrastructure avoids using the name 'tracepoint'.  It calls
them trace events.  I didn't deliberately choose that name, but was
unaware at the time that Linux tracing calls them tracepoints.  Given
that 'trace event' is currently used, it would be nice to remain
consistent/reduce confusion.

How about:
typedef struct {
 const char *name;
 bool enabled;
 unsigned int hash;
} TraceEventState;

Or a nicer overall change might be to rename enum TraceEvent to
TraceEventID and Tracepoint to TraceEvent.



I'll change that !


+
+typedef struct {
  unsigned long event;
  unsigned long x1;
  unsigned long x2;
@@ -18,11 +24,29 @@ enum {
  static TraceRecord trace_buf[TRACE_BUF_LEN];
  static unsigned int trace_idx;
  static FILE *trace_fp;
+static Tracepoint trace_list[NR_TRACEPOINTS];
+
+void init_tracepoint(const char *tname, TraceEvent tevent)
+{
+if (!tname || tevent>  NR_TRACEPOINTS) {
+return;
+}


I'd drop this check because only trace.c should use init_tracepoint()
and you have ensured it uses correct arguments.  Just a coding style
suggestion; having redundant checks makes the code more verbose, may
lead the reader to assume that this function really is called with junk
arguments, and silently returning will not help make the issue visible.


Ok.




+trace_list[tevent].tp_name = (char*)qemu_malloc(strlen(tname)+1);
+strncpy(trace_list[tevent].tp_name, tname, strlen(tname));


Or use qemmu_strdup() but we don't really need to allocate memory at all
here.  Just hold the const char* to a string literal since the trace
event is a static object that is built into the binary.



Ok


+trace_list[tevent].hash = qemu_hash(tname);
+trace_list[tevent].state = 1; /* Enable all by default */
+}
...
+void do_info_all_tracepoints(Monitor *mon)
+{
+unsigned int i;
+
+for (i=0; i

Whitespace and indentation.



Will fix.


+}
+
+static Tracepoint* find_tracepoint_by_name(const char *tname)
+{
+unsigned int i, name_hash;
+
+if (!tname) {
+return NULL;
+}
+
+name_hash = qemu_hash(tname);
+
+for (i=0; i

I don't see the need for hashing.  We don't actually have a hash table
and are doing a linear search.  There will be a smallish constant number
of trace events and only change_tracepoint_by_name() needs to do a
lookup.  There's no need to optimize that.



I was only optimising lookups. Instead of doing a strlen()-size 
comparison for each tracepoint, we end up doing a constant int-size 
comparison till there is possibility of hash collision. Strlen()-size 
lookups isnt really an issure right now, but will be more glaring if 
qemu ends up having a much larger number of tracepoints.



Is strncmp() used so looking up "foo" is like searching for foo*?  The
strlen(tname) should be outside the loop.  I think that could be useful
but we should document it or just use strcmp().



The hash would take care of most such cases. But this might be an issue 
in the rare event of a hash collision happening when foo_1 and foo_2 
match as well. I'll fix this.



+}
+
+void change_tracepoint_state(const char *tname, bool tstate)
+{
+Tracepoint *tp;
+
+tp = find_tracepoint_by_name(tname);
+if (tp) {
+tp->state = tstate;
+}
+}
diff --git a/tracetool b/tracetool
index 2c73bab..00af205 100755
--- a/tracetool
+++ b/tracetool
@@ -123,14 +123,20 @@ linetoc_end_nop()
  linetoh_begin_simple()
  {
  cat<
+
  typedef unsigned int TraceEvent;

+void init_tracepoint_table(void);
+void init_tracepoint(const char *tname, TraceEvent tevent);
  void trace1(TraceEvent event, unsigned long x1);
  void trace2(TraceEvent event, unsigned long x1, unsigned long x2);
  void trace3(TraceEvent event, unsigned long x1, unsigned long x2, unsigned 
long x3);
  void trace4(TraceEvent event, unsigned long x1, unsigned long x2, unsigned 
long x3, unsigned long x4);
  void trace5(TraceEvent event, unsigned long x1, unsigned long x2, unsigned 
long x3, unsigned long x4, unsigned long x5);
  void do_info_trace(Monitor *mon);
+void do_info_all_tracepoints(Monitor *mon);
+void change_tracepoint_state(const char *tname, bool tstate);
  EOF

  simple_event_num=0
@@ -163,22 +169,38 @@ EOF

  linetoh_end_simple()
  {
-return
+cat<

Have you looked at module.h?  Perhaps that provides a good solution for
initializing trace events.  It would allow the vl.c changes to be done
without an #ifdef.


Thanks for the tip, I'll check.




+simple_event_num=0
+
  }

  linetoc_simple()
  {
-return
+loca

[Qemu-devel] Re: [PATCH 2/3] Monitor command 'info trace'

2010-06-18 Thread Stefan Hajnoczi

On Fri, Jun 18, 2010 at 12:58 PM, Prerna Saxena
 wrote:
> Hi Stefan, Jan,
> Thanks for taking a look.
>
> On 06/17/2010 08:38 PM, Stefan Hajnoczi wrote:
>>
>> On Wed, Jun 16, 2010 at 06:12:06PM +0530, Prerna Saxena wrote:
>>>
>>> diff --git a/simpletrace.c b/simpletrace.c
>>> index 2fec4d3..239ae3f 100644
>>> --- a/simpletrace.c
>>> +++ b/simpletrace.c
>>> @@ -62,3 +62,16 @@ void trace4(TraceEvent event, unsigned long x1,
>>> unsigned long x2, unsigned long
>>>  void trace5(TraceEvent event, unsigned long x1, unsigned long x2,
>>> unsigned long x3, unsigned long x4, unsigned long x5) {
>>>      trace(event, x1, x2, x3, x4, x5);
>>>  }
>>> +
>>> +void do_info_trace(Monitor *mon)
>>> +{
>>> +    unsigned int i, max_idx;
>>> +
>>> +    max_idx = trace_idx ? trace_idx : TRACE_BUF_LEN;
>>
>> trace_idx is always in the range [0, TRACE_BUF_LEN).  There is no need
>> to perform this test.
>
> I only display the logged contents in the trace buffer (till trace_idx) ,
> and not the entire trace buffer. Only when the index is full that the entire
> buffer is displayed.

Thanks for explaining, I understand what you are doing now.  Due to
this special case, the code will dump out the empty trace buffer if
used before anything has been traced (trace_idx=0).

>>> +        monitor_printf(mon, "Event %ld : %ld %ld %ld %ld %ld\n",
>>> +                          trace_buf[i].event, trace_buf[i].x1,
>>> trace_buf[i].x2,
>>> +                            trace_buf[i].x3, trace_buf[i].x4,
>>> trace_buf[i].x5);
>>
>> Getting only numeric output is the limitation of a binary trace.  It
>> would probably be possible to pretty-print without much additional code
>> by using the format strings from the trace-events file.
>>
>> I think the numeric dump is good for now though.  Hex is more compact
>> than decimal and would make pointers easier to spot.  Want to change
>> this?
>>
>
> I agree, but we can let this be a todo till after the first prototype goes
> upstream ?

I still vote for hex instead of decimal :).  Since you're already
spinning a new patch it would be nice to put that change in, but no
worries.

> I'll post patches by Monday that addresses your suggestions, and try to get
> it integrated with QMP.

Excellent, thanks.  I'd like to put your patches onto my tracing
branch soon and test out the overall workflow of tracing QEMU.

Stefan

Re: [Qemu-devel] Re: [PATCH 3/3] Toggle tracepoint state

2010-06-18 Thread Stefan Hajnoczi

On Fri, Jun 18, 2010 at 1:24 PM, Prerna Saxena
 wrote:
> On 06/17/2010 09:33 PM, Stefan Hajnoczi wrote:
>>> +}
>>> +
>>> +static Tracepoint* find_tracepoint_by_name(const char *tname)
>>> +{
>>> +    unsigned int i, name_hash;
>>> +
>>> +    if (!tname) {
>>> +        return NULL;
>>> +    }
>>> +
>>> +    name_hash = qemu_hash(tname);
>>> +
>>> +    for (i=0; i>> +        if (trace_list[i].hash == name_hash&&
>>> +                       !strncmp(trace_list[i].tp_name, tname,
>>> strlen(tname))) {
>>> +            return&trace_list[i];
>>> +        }
>>> +    }
>>> +    return NULL; /* indicates end of list reached without a match */
>>
>> I don't see the need for hashing.  We don't actually have a hash table
>> and are doing a linear search.  There will be a smallish constant number
>> of trace events and only change_tracepoint_by_name() needs to do a
>> lookup.  There's no need to optimize that.
>>
>
> I was only optimising lookups. Instead of doing a strlen()-size comparison
> for each tracepoint, we end up doing a constant int-size comparison till
> there is possibility of hash collision. Strlen()-size lookups isnt really an
> issure right now, but will be more glaring if qemu ends up having a much
> larger number of tracepoints.

The thing is we only need to do lookups when tracepoints are
enabled/disabled.  That is a very rare operation, not critical path.

This feels like premature optimization.  If there is a performance
bottleneck down the road we can switch to a data structure that has
better time complexity than an array.  This is an easy change to make
later because it will not affect the user interface.

In the meantime linear strcmp() over an array keeps the code simple.
We can drop the tdb_hash() patch and remove some lines from this
patch.

>>> diff --git a/tracetool b/tracetool
>>> index 2c73bab..00af205 100755
>>> --- a/tracetool
>>> +++ b/tracetool
>>> @@ -123,14 +123,20 @@ linetoc_end_nop()
>>>  linetoh_begin_simple()
>>>  {
>>>      cat<>> +#include
>>> +
>>>  typedef unsigned int TraceEvent;
>>>
>>> +void init_tracepoint_table(void);
>>> +void init_tracepoint(const char *tname, TraceEvent tevent);
>>>  void trace1(TraceEvent event, unsigned long x1);
>>>  void trace2(TraceEvent event, unsigned long x1, unsigned long x2);
>>>  void trace3(TraceEvent event, unsigned long x1, unsigned long x2,
>>> unsigned long x3);
>>>  void trace4(TraceEvent event, unsigned long x1, unsigned long x2,
>>> unsigned long x3, unsigned long x4);
>>>  void trace5(TraceEvent event, unsigned long x1, unsigned long x2,
>>> unsigned long x3, unsigned long x4, unsigned long x5);
>>>  void do_info_trace(Monitor *mon);
>>> +void do_info_all_tracepoints(Monitor *mon);
>>> +void change_tracepoint_state(const char *tname, bool tstate);
>>>  EOF
>>>
>>>      simple_event_num=0
>>> @@ -163,22 +169,38 @@ EOF
>>>
>>>  linetoh_end_simple()
>>>  {
>>> -    return
>>> +    cat<>> +#define NR_TRACEPOINTS $simple_event_num
>>> +EOF
>>>  }
>>>
>>>  linetoc_begin_simple()
>>>  {
>>> -    return
>>> +    cat<>> +#include "trace.h"
>>> +
>>> +void init_tracepoint_table(void) {
>>> +EOF
>>
>> Have you looked at module.h?  Perhaps that provides a good solution for
>> initializing trace events.  It would allow the vl.c changes to be done
>> without an #ifdef.
>
> Thanks for the tip, I'll check.

I also wonder if it's possible to statically initialize the TraceEvent
array in the generated trace.c.  That way we need no vl.c startup
magic and tracing is available even in the very early stages of QEMU
startup.  What do you think?

Stefan

[Qemu-devel] Re: [PATCH 09/10] pci: set PCI multi-function bit appropriately.

2010-06-18 Thread Michael S. Tsirkin

On Fri, Jun 18, 2010 at 11:40:57AM +0900, Isaku Yamahata wrote:
> On Thu, Jun 17, 2010 at 12:37:21PM +0300, Michael S. Tsirkin wrote:
> > On Thu, Jun 17, 2010 at 03:15:51PM +0900, Isaku Yamahata wrote:
> > > set PCI multi-function bit appropriately.
> > > 
> > > Signed-off-by: Isaku Yamahata 
> > > 
> > > ---
> > > changes v1 -> v2:
> > > don't set header type register in configuration space.
> > > ---
> > >  hw/pci.c |   25 +
> > >  1 files changed, 25 insertions(+), 0 deletions(-)
> > > 
> > > diff --git a/hw/pci.c b/hw/pci.c
> > > index 5316aa5..ee391dc 100644
> > > --- a/hw/pci.c
> > > +++ b/hw/pci.c
> > > @@ -607,6 +607,30 @@ static void pci_init_wmask_bridge(PCIDevice *d)
> > >  pci_set_word(d->wmask + PCI_BRIDGE_CONTROL, 0x);
> > >  }
> > >  
> > > +static void pci_init_multifunction(PCIBus *bus, PCIDevice *dev)
> > > +{
> > > +uint8_t slot = PCI_SLOT(dev->devfn);
> > > +uint8_t func_max = 8;
> > 
> > enum or define would be better
> 
> Will fix.
> 
> 
> > > +uint8_t func;
> > 
> > If I understand correctly what this does, it goes over
> > other functions of the same device, and sets the MULTI_FUNCTION bit
> > for them if there's more than one function.
> > Instead, why don't we just set PCI_HEADER_TYPE_MULTI_FUNCTION
> > in relevant devices?
> 
> pci address, devfn ,is exported to users as addr property,
> so users can populate pci function(PCIDevice in qemu)
> at arbitrary devfn.
> It means each function(PCIDevice) don't know whether pci device
> (PCIDevice[8]) is multi function or not.
> So I chose to handle it in pci generic layer.
> 
> It can be argued that it's user operation fault and that
> the missing part is validation checks to catch such user errors.

Exactly. Another part that is missing is a way to hotplug
a multifunction device.

OTOH I think that hotplug of separate functions has no chance to work,
so users are better off getting an error.

> But I prefer more flexible and more user friendly way.

I think that most users would only add many functions
to a device as a result of an error.

If we really want the ability to put unrelated devices
as functions in a single one, let's just add
a 'multifunction' qdev property, and validate that
it is set appropriately.

> 
> > > +
> > > +for (func = 0; func < func_max; ++func) {
> > > +if (bus->devices[PCI_DEVFN(slot, func)]) {
> > > +break;
> > > +}
> > > +}
> > > +if (func == func_max) {
> > > +return;
> > > +}
> > > +
> > 
> > The above only works if the function is called before
> > device is added to bus.
> 
> That's right. This function is called before bus->devices[devfn] = dev.

This needs a comment.

> > 
> > > +for (func = 0; func < func_max; ++func) {
> > > +if (bus->devices[PCI_DEVFN(slot, func)]) {
> > > +bus->devices[PCI_DEVFN(slot, func)]->config[PCI_HEADER_TYPE] 
> > > |=
> > > +PCI_HEADER_TYPE_MULTI_FUNCTION;
> > > +}
> > > +}
> > > +dev->config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION;
> > 
> > Isn't the bit set above already?
> > 
> > > +}
> > > +
> > >  static void pci_config_alloc(PCIDevice *pci_dev)
> > >  {
> > >  int config_size = pci_config_size(pci_dev);
> > > @@ -660,6 +684,7 @@ static PCIDevice *do_pci_register_device(PCIDevice 
> > > *pci_dev, PCIBus *bus,
> > >  if (is_bridge) {
> > >  pci_init_wmask_bridge(pci_dev);
> > >  }
> > > +pci_init_multifunction(bus, pci_dev);
> > >  
> > >  if (!config_read)
> > >  config_read = pci_default_read_config;
> > > -- 
> > > 1.6.6.1
> > 
> 
> -- 
> yamahata

[Qemu-devel] Re: [PATCH 00/10] pci: pci to pci bridge clean up and enhancement

2010-06-18 Thread Michael S. Tsirkin

On Fri, Jun 18, 2010 at 12:26:10PM +0900, Isaku Yamahata wrote:
> On Thu, Jun 17, 2010 at 02:57:16PM +0300, Michael S. Tsirkin wrote:
> > On Thu, Jun 17, 2010 at 01:02:43PM +0300, Michael S. Tsirkin wrote:
> > > For example, forcing all devices to call pci_reset_default
> > > in their reset routines does not look like a good cleanup:
> > > the less boilerplate, the better IMO.
> > 
> > One thing that we need to address, is devices
> > which need to enable memory+master on init.
> > They should probably also enable this on reset.
> 
> Isn't it BIOS/firmware that initializes those bits?

memory yes, bus master no.

> And seabios does so.
> The pci spec explicitly says that they are 0 after RST#.

Mostly these are bugs in emulation, but could be bugs
in devices that we emulate (PBM).

> > One approach that was discussed several times
> > would be to call cleanup and then init again.
> > I expect this would be enough to get rid of reset
> > callbacks in most devices.
> 
> Oh, some devices set those bits on init.
> So cleanup + init again might be easier than
> sorting out initialization/reset of each devices.


Exactly.

> thanks,
> > 
> > -- 
> > MST
> > 
> 
> -- 
> yamahata

[Qemu-devel] block: format vs. protocol, and how they stack

2010-06-18 Thread Markus Armbruster

The code is pretty confused about format vs. protocol, and so are we.
Let's try to figure them out.

>From cruising altitude, all this format, protocol, stacking business
doesn't matter.  We provide a bunch of arguments, and get an image.

If you look more closely, providing that image involves sub-tasks.  One
is to haul bits.  Another one is to translate between bits in different
formats.

Working hypothesis:

* A protocol hauls image bits.  Examples: file, host_device, nbd.

* A format translates image formats.  Examples: raw, qcow2.

Note: this does *not* follow the code's use of the terms.  It better
doesn't.  Because the code is confused.

Both protocol and format provide an image.  That's why we can and in
fact do have a common abstraction for them: BlockDriver.  Our data type
for a block driver instance is BlockDriverState.

Nothing stops a block driver to translate and haul at the same time.  We
generally separate the two jobs, because it lets us combine the
different ways to translate with the different ways to haul.
Nevertheless, a block driver *can* be both format and protocol.

Example: vvfat arguably both translates and hauls.

Let's call a format that isn't also protocol a pure format, and a
protocol that isn't also a format a pure protocol.

Obviously, pure formats need to sit on top of something providing images
to translate.  Formats don't care whether those somethings translate or
haul.  Therefore, a pure format is always stacked on one or more
BlockDriverStates.

Example: raw is always stacked one exactly one BlockDriverState (stored
in bs->file).

Example: qcow2 is always stacked on exactly two BlockDriverStates
(stored in bs->file and bs->backing_hd).

Conversely, anything that isn't stacked on any BlockDriverState can't be
a pure format, and thus must be a protocol.

Example: file hauls an ordinary file's bits, nbd hauls bits over TCP
using the NBD protocol.

Summary so far:

1. BlockDriverStates form a tree.

2. The leaves of the tree are protocols, not pure formats.

3. The non-leaf nodes may be anything.  We haven't found a reason why
   not.


In general, a block driver needs some arguments to create an instance.
The current code provides two BlockDriver methods for that:

* bdrv_open() takes a flags argument.

* bdrv_file_open() takes a flags argument and a filename argument.

This is woefully inadequate for anything but the simplest block drivers.
Any driver taking more complex arguments has to extract them out of the
"filename".

Example: http extracts url and optional readahead.

A saner interface would pass flags and a suitable argument dictionary
such as QemuOpts.


Now, let's review our existing interface to create such a tree of block
drivers.  Beware, royal mess ahead.

There are two interfaces.  The first one is bdrv_open().  It takes three
arguments: filename, flags and an optional block driver argument.

If flag BDRV_O_SNAPSHOT is set, we do snapshot magic.  Omitted here in
an attempt to protect reader sanity.

If the block driver is missing, we guess one.  More on that below.

The block driver is instantiated to set up the root of the tree.  Let's
call it the root block driver, and its instance bs.

If the root block driver provides method bdrv_file_open(), it is used,
and gets the flags and filename argument.

Else, we first instantiate *another* driver.

We use the second interface for that: bdrv_file_open().  It takes
filename and flags arguments like bdrv_open(), but no block driver
argument.

It chooses the block driver by looking at filename.  If filename
names a host device, use the protocol for hauling that device's
bits.  If it starts with P:, where P is some driver's
"protocol_name", use that driver.  Else fail.  Except I just lied;
the actual rules are messier than that.

Unlike bdrv_open(), bdrv_file_open() ignores flag BDRV_O_SNAPSHOT,
and always behaves as if flag BDRV_O_NO_BACKING was set.

We store the instance in bs->file.

Then the root block driver is instantiated with method bdrv_open().  It
gets the flags argument.  It stacks on top of bs->file, but that's mere
convention.

Note: one of the code's ideas on format vs. protocol is "protocols
provide bdrv_file_open(), formats do not".  I don't think that idea is
helpful.

The root block driver may ask for a backing file.  To do that, it sets
bs->backing_filename and optionally bs->backing_format, both strings.

Example: qcow2 reads the two strings from the image header.

We instantiate the backing file bs->backing_hd with bdrv_open().
Recursion.  Arguments: bs->backing_filename, flags derived from our own
flags argument, and the driver named by bs->backing_format.  If
bs->backing_format is unset, pick one just like -drive does when its
format option is unset.

The root block driver stacks on top of bs->backing_hd, by convention.

Flag BDRV_O_NO_BACKING supresses backing file setup, but let's ignore
that here.

This provides for common stacking, but it's not gen

Re: [Qemu-devel] Virtualization at Plumbers 2010 - Time to submit your proposals!

2010-06-18 Thread Luiz Capitulino

On Fri, 18 Jun 2010 14:00:36 +0200
Jes Sorensen  wrote:

>  - QMP and Spice

 I think we're going to discuss most QMP related subjects in the KVM forum,
but I'm open to suggestions.

Re: [Qemu-devel] Re: [PATCH 09/10] pci: set PCI multi-function bit appropriately.

2010-06-18 Thread Isaku Yamahata

On Fri, Jun 18, 2010 at 03:44:04PM +0300, Michael S. Tsirkin wrote:
> > > If I understand correctly what this does, it goes over
> > > other functions of the same device, and sets the MULTI_FUNCTION bit
> > > for them if there's more than one function.
> > > Instead, why don't we just set PCI_HEADER_TYPE_MULTI_FUNCTION
> > > in relevant devices?
> > 
> > pci address, devfn ,is exported to users as addr property,
> > so users can populate pci function(PCIDevice in qemu)
> > at arbitrary devfn.
> > It means each function(PCIDevice) don't know whether pci device
> > (PCIDevice[8]) is multi function or not.
> > So I chose to handle it in pci generic layer.
> > 
> > It can be argued that it's user operation fault and that
> > the missing part is validation checks to catch such user errors.
> 
> Exactly. Another part that is missing is a way to hotplug
> a multifunction device.

Yes, multi function hot plug is also on my wish list.


> OTOH I think that hotplug of separate functions has no chance to work,
> so users are better off getting an error.
> 
> > But I prefer more flexible and more user friendly way.
> 
> I think that most users would only add many functions
> to a device as a result of an error.
>
> If we really want the ability to put unrelated devices
> as functions in a single one, let's just add
> a 'multifunction' qdev property, and validate that
> it is set appropriately.

I think "unrelated" is policy. There is no obvious way to determine
which functions can be in a same device.
For example, popular chipset contains isa bridge, ide controller,
usb controller, sound and modem in a single device as functions.
It's up to hardware designer policy which functions are grouped into
a device.

So qemu should be able to populate any function in a device,
and leave such policy check to higher level tool like libvirt/virt-manager.
-- 
yamahata

Re: [Qemu-devel] [RFC PATCH 1/5] qdev: Create qdev_get_dev_path()

2010-06-18 Thread Markus Armbruster

Alex Williamson  writes:

> On Wed, 2010-06-16 at 10:23 +0200, Markus Armbruster wrote:
>> Alex Williamson  writes:
>> 
>> > On Tue, 2010-06-15 at 12:28 +0100, Paul Brook wrote:
>> >> > > Alex proposed to disambiguate by adding "identified properties of the
>> >> > > immediate parent bus and device" to the path component.  For PCI, 
>> >> > > these
>> >> > > are dev.fn.  Likewise for any other bus where devices have unambigous
>> >> > > bus address.  The driver name carries no information!
>> >> > 
>> >> > From user POV, driver names are very handly to address a device
>> >> > intuitively - except for the case you have tones of devices on the same
>> >> > bus that are handled by the same driver. For that case we need to
>> >> > augment the device name with a useful per-bus ID, derived from the bus
>> >> > address where available, otherwise based on instance numbers.
>> >> 
>> >> This is where I think you're missing a trick. We don't need to augment 
>> >> the 
>> >> name, we just need to allow the bus id to be used instead.
>> >
>> > For the case of a hot remove, I agree.  If the user specifies "pci_del
>> > pci.0/03.0", that's completely sufficient because we don't care what's
>> > in that slot, just remove it.  However, I still see some use cases for
>> > device names in the path.  Take for example:
>> >
>> > (A): /i440FX-pcihost/pci.0/e1000.05.0
>> >
>> > vs
>> >
>> > (B): /pci.0/05.0
>> >
>> > (removing both the root bridge driver name and the device driver name)
>> 
>> / is the main system bus.  System bus defines no bus address at the
>> moment.  Therefore, you have to use the driver name i440FX-pcihost.
>
> So is the general rule "If a device's parent bus does not provide an
> address, print device name"?

I think the general rule for constructing a *canonical* qdev path should
be:

* If it's the main system bus, the path is /.

* If it's another bus, the path is P/B, where P is the canonical path of
  the device providing the bus, and B is the bus name.  Unambiguous,
  since no device ever defines two buses with the same name.

* If it's a device, the path is P/D, where P is the canonical path of
  the bus.  If the bus defines bus addresses, then D is @A, where A is
  the device's bus address.

  We haven't made up our minds whether the else case exists, or what to
  do if it does.  The simple "else D is the device model driver's name"
  works only if the bus can't take multiple device models with the same
  driver.

The canonical path is not the only path.  For instance, a qdev ID is a
valid path, but it's not canonical.  /i440FX-pcihost/pci.0/e1000 is
another valid, non-canonical path.

[...]

Re: [Qemu-devel] [RFC PATCH 1/5] qdev: Create qdev_get_dev_path()

2010-06-18 Thread Jan Kiszka

Markus Armbruster wrote:
> Alex Williamson  writes:
> 
>> On Wed, 2010-06-16 at 10:23 +0200, Markus Armbruster wrote:
>>> Alex Williamson  writes:
>>>
 On Tue, 2010-06-15 at 12:28 +0100, Paul Brook wrote:
>>> Alex proposed to disambiguate by adding "identified properties of the
>>> immediate parent bus and device" to the path component.  For PCI, these
>>> are dev.fn.  Likewise for any other bus where devices have unambigous
>>> bus address.  The driver name carries no information!
>> From user POV, driver names are very handly to address a device
>> intuitively - except for the case you have tones of devices on the same
>> bus that are handled by the same driver. For that case we need to
>> augment the device name with a useful per-bus ID, derived from the bus
>> address where available, otherwise based on instance numbers.
> This is where I think you're missing a trick. We don't need to augment 
> the 
> name, we just need to allow the bus id to be used instead.
 For the case of a hot remove, I agree.  If the user specifies "pci_del
 pci.0/03.0", that's completely sufficient because we don't care what's
 in that slot, just remove it.  However, I still see some use cases for
 device names in the path.  Take for example:

 (A): /i440FX-pcihost/pci.0/e1000.05.0

 vs

 (B): /pci.0/05.0

 (removing both the root bridge driver name and the device driver name)
>>> / is the main system bus.  System bus defines no bus address at the
>>> moment.  Therefore, you have to use the driver name i440FX-pcihost.
>> So is the general rule "If a device's parent bus does not provide an
>> address, print device name"?
> 
> I think the general rule for constructing a *canonical* qdev path should
> be:
> 
> * If it's the main system bus, the path is /.
> 
> * If it's another bus, the path is P/B, where P is the canonical path of
>   the device providing the bus, and B is the bus name.  Unambiguous,
>   since no device ever defines two buses with the same name.
> 
> * If it's a device, the path is P/D, where P is the canonical path of
>   the bus.  If the bus defines bus addresses, then D is @A, where A is
>   the device's bus address.
> 
>   We haven't made up our minds whether the else case exists, or what to
>   do if it does.  The simple "else D is the device model driver's name"
>   works only if the bus can't take multiple device models with the same
>   driver.

...which is already on x86 the case with multiple APICs or HPETs on the
system bus.

> 
> The canonical path is not the only path.  For instance, a qdev ID is a
> valid path, but it's not canonical.  /i440FX-pcihost/pci.0/e1000 is
> another valid, non-canonical path.

Not only canonical paths need to be specified, also alias like the
above. We should limit those alias to a required minimum ("required"
means to me: improves human-friendliness compared to canonical form and
preserves backward compatibility where relevant).

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux

[Qemu-devel] Re: RFC qdev path semantics

2010-06-18 Thread Markus Armbruster

Markus Armbruster  writes:

> A number of changes to qdev paths have been proposed in various threads.
> It's becoming harder to keep track of them, so let me sum them up in one
> place.  Please correct me if I misrepresent your ideas.
>
> I'm going to describe the current state of things, and the proposed
> changes (marked with ###).
>
>
> The device tree has the main system bus as root.  A child of a bus is a
> device.  A child of a device is a bus.
>
> A qdev path consists of qdev path components separated by '/'.  It
> resolves to a node in the device tree, either bus or device.
>
> The qdev path "/" resolves to the root, i.e. the main system bus.
>
> The qdev path IDENT, where IDENT is an identifier, resolves to the
> device whose id is IDENT.

Err, this is not what the current code does.  IDENT resolves to the
*bus* whose name is IDENT.  Device IDs don't come into play.  Yes, bus
names need not be globally unique.  We use whichever bus we find first.

We should really support device IDs as anchor of relative paths.  

As far as I can see, we don't currently support paths when we look for a
device, only device IDs.  But we'd like to change that.  Say we extend
device_del to accept a path, not just a device ID.  But how to interpret
an identifier argument then?  Backward compatibility and common sense
say it's a device ID.  But it's also a relative qdev path with just one
path component, a bus name.

What a mess!

> If PATH resolves to device DEV, and a child of DEV has the name IDENT,
> then we resolve to that bus.

then we resolve PATH/IDENT to that bus.

[...]

Re: [Qemu-devel] [RFC PATCH 1/5] qdev: Create qdev_get_dev_path()

2010-06-18 Thread Alex Williamson

On Fri, 2010-06-18 at 11:16 +0200, Jan Kiszka wrote:
> Alex Williamson wrote:
> > On Wed, 2010-06-16 at 10:23 +0200, Markus Armbruster wrote:
> >> Alex Williamson  writes:
> >>
> >>> On Tue, 2010-06-15 at 12:28 +0100, Paul Brook wrote:
> >> Alex proposed to disambiguate by adding "identified properties of the
> >> immediate parent bus and device" to the path component.  For PCI, these
> >> are dev.fn.  Likewise for any other bus where devices have unambigous
> >> bus address.  The driver name carries no information!
> > From user POV, driver names are very handly to address a device
> > intuitively - except for the case you have tones of devices on the same
> > bus that are handled by the same driver. For that case we need to
> > augment the device name with a useful per-bus ID, derived from the bus
> > address where available, otherwise based on instance numbers.
>  This is where I think you're missing a trick. We don't need to augment 
>  the 
>  name, we just need to allow the bus id to be used instead.
> >>> For the case of a hot remove, I agree.  If the user specifies "pci_del
> >>> pci.0/03.0", that's completely sufficient because we don't care what's
> >>> in that slot, just remove it.  However, I still see some use cases for
> >>> device names in the path.  Take for example:
> >>>
> >>> (A): /i440FX-pcihost/pci.0/e1000.05.0
> >>>
> >>> vs
> >>>
> >>> (B): /pci.0/05.0
> >>>
> >>> (removing both the root bridge driver name and the device driver name)
> >> / is the main system bus.  System bus defines no bus address at the
> >> moment.  Therefore, you have to use the driver name i440FX-pcihost.
> > 
> > So is the general rule "If a device's parent bus does not provide an
> > address, print device name"?
> 
> What is the device name?

dev->info->name

> > 
> >> /i440FX-pcihost/pci.0 is the PCI bus.  PCI bus defines a bus address:
> >> dev.fn.  Therefore, you can either use the bus address @05.0, or the
> >> driver name e1000.
> >>
> >> We have "/i440FX-pcihost/pci.0/e1000" vs. "/i440FX-pcihost/pci.0/@05.0".
> > 
> > I object to being able to use anything but an address under a bus that
> > supports hotplug, but that aside, I think the paths for my example
> > system look like below.  Note that anything behind and including the $
> > is not the canonical path, that begins the free form, usage specific
> > string, here filled by missing device names (open to suggestions other
> > than $ here).
> 
> What about ":" instead of "$"? Visually more appealing IMO.

Sure, that works.

> >  Note that were isa devices do not have a bus identifier,
> > I'm using the device name,
> 
> Don't understand. Why don't they have their I/O base as prefix? This is
> inconsistent, and if we consider anything behind "$" (or ":")
> informative, the bus address is mandatory for any device (on a bus with
> an addressing scheme).

Not all isa devices expose an iobase property.  isa doesn't have an
addressing scheme, it's basically a free for all of grabbing addresses
and interrupts and hoping they don't conflict with no means to uniquely
address a specific device.  That's why isa cards are loaded with jumpers
so you can try to move them around.

> > which I think follows the same rule as
> > i440FX-pcihost.  Can we agree this is the goal?  Thanks,
> 
> It looks almost acceptable from my POV. We just need to define how to
> get hold of devices on buses without an addressing scheme (e.g. the
> system bus). Using the driver name is insufficient once you have > 1
> devices of the same type. Introducing device instance numbers to those
> buses would be straightforward IMO.

Yep, as I mentioned, I can live with instance numbers for devices that
do not live on a hotpluggable bus.  Where will the instance number go?
A property on the parent bus, the device, field in the DeviceState?
Thanks,

Alex

[Qemu-devel] [PATCH 0/6] block: Add flush after metadata writes

2010-06-18 Thread Kevin Wolf

This addresses the data integrity problems which are described for qcow at
http://wiki.qemu.org/Features/Qcow2DataIntegrity#Metadata_update_ordering.2C_Part_2
These problems are the same for all writable image formats, so this series
contains a patch for each of them.

The only exception is VDI which uses AIO for writing its metadata. It needs
a different fix.

Kevin Wolf (6):
  block: Add bdrv_(p)write_sync
  cow: Use bdrv_(p)write_sync for metadata writes
  qcow: Use bdrv_(p)write_sync for metadata writes
  qcow2: Use bdrv_(p)write_sync for metadata writes
  vmdk: Use bdrv_(p)write_sync for metadata writes
  vpc: Use bdrv_(p)write_sync for metadata writes

 block.c|   37 +
 block.h|4 
 block/cow.c|   20 +++-
 block/qcow.c   |   18 ++
 block/qcow2-cluster.c  |   24 
 block/qcow2-refcount.c |   24 
 block/qcow2-snapshot.c |   23 +++
 block/qcow2.c  |   10 +-
 block/vmdk.c   |   10 +-
 block/vpc.c|9 +
 10 files changed, 112 insertions(+), 67 deletions(-)

[Qemu-devel] [PATCH 3/6] qcow: Use bdrv_(p)write_sync for metadata writes

2010-06-18 Thread Kevin Wolf

Use bdrv_(p)write_sync to ensure metadata integrity in case of a crash.

Signed-off-by: Kevin Wolf 
---
 block/qcow.c |   18 ++
 1 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/block/qcow.c b/block/qcow.c
index 449858f..816103d 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -273,8 +273,9 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
 /* update the L1 entry */
 s->l1_table[l1_index] = l2_offset;
 tmp = cpu_to_be64(l2_offset);
-if (bdrv_pwrite(bs->file, s->l1_table_offset + l1_index * sizeof(tmp),
-&tmp, sizeof(tmp)) != sizeof(tmp))
+if (bdrv_pwrite_sync(bs->file,
+s->l1_table_offset + l1_index * sizeof(tmp),
+&tmp, sizeof(tmp)) < 0)
 return 0;
 new_l2_table = 1;
 }
@@ -302,8 +303,8 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
 l2_table = s->l2_cache + (min_index << s->l2_bits);
 if (new_l2_table) {
 memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
-if (bdrv_pwrite(bs->file, l2_offset, l2_table, s->l2_size * 
sizeof(uint64_t)) !=
-s->l2_size * sizeof(uint64_t))
+if (bdrv_pwrite_sync(bs->file, l2_offset, l2_table,
+s->l2_size * sizeof(uint64_t)) < 0)
 return 0;
 } else {
 if (bdrv_pread(bs->file, l2_offset, l2_table, s->l2_size * 
sizeof(uint64_t)) !=
@@ -368,8 +369,8 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
 /* update L2 table */
 tmp = cpu_to_be64(cluster_offset);
 l2_table[l2_index] = tmp;
-if (bdrv_pwrite(bs->file,
-l2_offset + l2_index * sizeof(tmp), &tmp, sizeof(tmp)) 
!= sizeof(tmp))
+if (bdrv_pwrite_sync(bs->file, l2_offset + l2_index * sizeof(tmp),
+&tmp, sizeof(tmp)) < 0)
 return 0;
 }
 return cluster_offset;
@@ -835,8 +836,9 @@ static int qcow_make_empty(BlockDriverState *bs)
 int ret;
 
 memset(s->l1_table, 0, l1_length);
-if (bdrv_pwrite(bs->file, s->l1_table_offset, s->l1_table, l1_length) < 0)
-   return -1;
+if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, s->l1_table,
+l1_length) < 0)
+return -1;
 ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length);
 if (ret < 0)
 return ret;
-- 
1.6.6.1

[Qemu-devel] [PATCH 2/6] cow: Use bdrv_(p)write_sync for metadata writes

2010-06-18 Thread Kevin Wolf

Use bdrv_(p)write_sync to ensure metadata integrity in case of a crash.
While at it, correct the wrong usage of errno.

Signed-off-by: Kevin Wolf 
---
 block/cow.c |   20 +++-
 1 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/block/cow.c b/block/cow.c
index d146434..eedcc48 100644
--- a/block/cow.c
+++ b/block/cow.c
@@ -97,17 +97,18 @@ static inline int cow_set_bit(BlockDriverState *bs, int64_t 
bitnum)
 {
 uint64_t offset = sizeof(struct cow_header_v2) + bitnum / 8;
 uint8_t bitmap;
+int ret;
 
-if (bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap)) !=
-   sizeof(bitmap)) {
-   return -errno;
+ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap));
+if (ret < 0) {
+   return ret;
 }
 
 bitmap |= (1 << (bitnum % 8));
 
-if (bdrv_pwrite(bs->file, offset, &bitmap, sizeof(bitmap)) !=
-   sizeof(bitmap)) {
-   return -errno;
+ret = bdrv_pwrite_sync(bs->file, offset, &bitmap, sizeof(bitmap));
+if (ret < 0) {
+   return ret;
 }
 return 0;
 }
@@ -116,10 +117,11 @@ static inline int is_bit_set(BlockDriverState *bs, 
int64_t bitnum)
 {
 uint64_t offset = sizeof(struct cow_header_v2) + bitnum / 8;
 uint8_t bitmap;
+int ret;
 
-if (bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap)) !=
-   sizeof(bitmap)) {
-   return -errno;
+ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap));
+if (ret < 0) {
+   return ret;
 }
 
 return !!(bitmap & (1 << (bitnum % 8)));
-- 
1.6.6.1

[Qemu-devel] [PATCH 1/6] block: Add bdrv_(p)write_sync

2010-06-18 Thread Kevin Wolf

Add new functions that write and flush the written data to disk immediately.
This is what needs to be used for image format metadata to maintain integrity
for cache=... modes that don't use O_DSYNC. (Actually, we only need barriers,
and therefore the functions are defined as such, but flushes is what is
implemented in this patch - we can try to change that later)

Signed-off-by: Kevin Wolf 
---
 block.c |   37 +
 block.h |4 
 2 files changed, 41 insertions(+), 0 deletions(-)

diff --git a/block.c b/block.c
index 0765fbc..7b64c2d 100644
--- a/block.c
+++ b/block.c
@@ -1010,6 +1010,43 @@ int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
 return count1;
 }
 
+/*
+ * Writes to the file and ensures that no writes are reordered across this
+ * request (acts as a barrier)
+ *
+ * Returns 0 on success, -errno in error cases.
+ */
+int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
+const void *buf, int count)
+{
+int ret;
+
+ret = bdrv_pwrite(bs, offset, buf, count);
+if (ret < 0) {
+return ret;
+}
+
+/* No flush needed for cache=writethrough, it uses O_DSYNC */
+if ((bs->open_flags & BDRV_O_CACHE_MASK) != 0) {
+bdrv_flush(bs);
+}
+
+return 0;
+}
+
+/*
+ * Writes to the file and ensures that no writes are reordered across this
+ * request (acts as a barrier)
+ *
+ * Returns 0 on success, -errno in error cases.
+ */
+int bdrv_write_sync(BlockDriverState *bs, int64_t sector_num,
+const uint8_t *buf, int nb_sectors)
+{
+return bdrv_pwrite_sync(bs, BDRV_SECTOR_SIZE * sector_num,
+buf, BDRV_SECTOR_SIZE * nb_sectors);
+}
+
 /**
  * Truncate file to 'offset' bytes (needed only for file protocols)
  */
diff --git a/block.h b/block.h
index 9df9b38..6a157f4 100644
--- a/block.h
+++ b/block.h
@@ -80,6 +80,10 @@ int bdrv_pread(BlockDriverState *bs, int64_t offset,
void *buf, int count);
 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
 const void *buf, int count);
+int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
+const void *buf, int count);
+int bdrv_write_sync(BlockDriverState *bs, int64_t sector_num,
+const uint8_t *buf, int nb_sectors);
 int bdrv_truncate(BlockDriverState *bs, int64_t offset);
 int64_t bdrv_getlength(BlockDriverState *bs);
 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr);
-- 
1.6.6.1

[Qemu-devel] [PATCH 5/6] vmdk: Use bdrv_(p)write_sync for metadata writes

2010-06-18 Thread Kevin Wolf

Use bdrv_(p)write_sync to ensure metadata integrity in case of a crash.

Signed-off-by: Kevin Wolf 
---
 block/vmdk.c |   10 +-
 1 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/block/vmdk.c b/block/vmdk.c
index e659908..2d4ba42 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -150,7 +150,7 @@ static int vmdk_write_cid(BlockDriverState *bs, uint32_t 
cid)
 pstrcat(desc, sizeof(desc), tmp_desc);
 }
 
-if (bdrv_pwrite(bs->file, 0x200, desc, DESC_SIZE) != DESC_SIZE)
+if (bdrv_pwrite_sync(bs->file, 0x200, desc, DESC_SIZE) < 0)
 return -1;
 return 0;
 }
@@ -471,14 +471,14 @@ static int vmdk_L2update(BlockDriverState *bs, 
VmdkMetaData *m_data)
 BDRVVmdkState *s = bs->opaque;
 
 /* update L2 table */
-if (bdrv_pwrite(bs->file, ((int64_t)m_data->l2_offset * 512) + 
(m_data->l2_index * sizeof(m_data->offset)),
-&(m_data->offset), sizeof(m_data->offset)) != 
sizeof(m_data->offset))
+if (bdrv_pwrite_sync(bs->file, ((int64_t)m_data->l2_offset * 512) + 
(m_data->l2_index * sizeof(m_data->offset)),
+&(m_data->offset), sizeof(m_data->offset)) < 0)
 return -1;
 /* update backup L2 table */
 if (s->l1_backup_table_offset != 0) {
 m_data->l2_offset = s->l1_backup_table[m_data->l1_index];
-if (bdrv_pwrite(bs->file, ((int64_t)m_data->l2_offset * 512) + 
(m_data->l2_index * sizeof(m_data->offset)),
-&(m_data->offset), sizeof(m_data->offset)) != 
sizeof(m_data->offset))
+if (bdrv_pwrite_sync(bs->file, ((int64_t)m_data->l2_offset * 512) + 
(m_data->l2_index * sizeof(m_data->offset)),
+&(m_data->offset), sizeof(m_data->offset)) < 0)
 return -1;
 }
 
-- 
1.6.6.1

[Qemu-devel] [PATCH 4/6] qcow2: Use bdrv_(p)write_sync for metadata writes

2010-06-18 Thread Kevin Wolf

Use bdrv_(p)write_sync to ensure metadata integrity in case of a crash.

Signed-off-by: Kevin Wolf 
---
 block/qcow2-cluster.c  |   24 
 block/qcow2-refcount.c |   24 
 block/qcow2-snapshot.c |   23 +++
 block/qcow2.c  |   10 +-
 4 files changed, 40 insertions(+), 41 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index ff1fc26..b023148 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -64,8 +64,8 @@ int qcow2_grow_l1_table(BlockDriverState *bs, int min_size)
 BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE);
 for(i = 0; i < s->l1_size; i++)
 new_l1_table[i] = cpu_to_be64(new_l1_table[i]);
-ret = bdrv_pwrite(bs->file, new_l1_table_offset, new_l1_table, 
new_l1_size2);
-if (ret != new_l1_size2)
+ret = bdrv_pwrite_sync(bs->file, new_l1_table_offset, new_l1_table, 
new_l1_size2);
+if (ret < 0)
 goto fail;
 for(i = 0; i < s->l1_size; i++)
 new_l1_table[i] = be64_to_cpu(new_l1_table[i]);
@@ -74,8 +74,8 @@ int qcow2_grow_l1_table(BlockDriverState *bs, int min_size)
 BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ACTIVATE_TABLE);
 cpu_to_be32w((uint32_t*)data, new_l1_size);
 cpu_to_be64w((uint64_t*)(data + 4), new_l1_table_offset);
-ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, l1_size), 
data,sizeof(data));
-if (ret != sizeof(data)) {
+ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_size), 
data,sizeof(data));
+if (ret < 0) {
 goto fail;
 }
 qemu_free(s->l1_table);
@@ -87,7 +87,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, int min_size)
  fail:
 qemu_free(new_l1_table);
 qcow2_free_clusters(bs, new_l1_table_offset, new_l1_size2);
-return ret < 0 ? ret : -EIO;
+return ret;
 }
 
 void qcow2_l2_cache_reset(BlockDriverState *bs)
@@ -208,7 +208,7 @@ static int write_l1_entry(BlockDriverState *bs, int 
l1_index)
 }
 
 BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
-ret = bdrv_pwrite(bs->file, s->l1_table_offset + 8 * l1_start_index,
+ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset + 8 * l1_start_index,
 buf, sizeof(buf));
 if (ret < 0) {
 return ret;
@@ -264,7 +264,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, 
uint64_t **table)
 }
 /* write the l2 table to the file */
 BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE);
-ret = bdrv_pwrite(bs->file, l2_offset, l2_table,
+ret = bdrv_pwrite_sync(bs->file, l2_offset, l2_table,
 s->l2_size * sizeof(uint64_t));
 if (ret < 0) {
 goto fail;
@@ -414,8 +414,8 @@ static int copy_sectors(BlockDriverState *bs, uint64_t 
start_sect,
 &s->aes_encrypt_key);
 }
 BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
-ret = bdrv_write(bs->file, (cluster_offset >> 9) + n_start,
- s->cluster_data, n);
+ret = bdrv_write_sync(bs->file, (cluster_offset >> 9) + n_start,
+s->cluster_data, n);
 if (ret < 0)
 return ret;
 return 0;
@@ -632,10 +632,10 @@ uint64_t 
qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
 
 BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED);
 l2_table[l2_index] = cpu_to_be64(cluster_offset);
-if (bdrv_pwrite(bs->file,
+if (bdrv_pwrite_sync(bs->file,
 l2_offset + l2_index * sizeof(uint64_t),
 l2_table + l2_index,
-sizeof(uint64_t)) != sizeof(uint64_t))
+sizeof(uint64_t)) < 0)
 return 0;
 
 return cluster_offset;
@@ -656,7 +656,7 @@ static int write_l2_entries(BlockDriverState *bs, uint64_t 
*l2_table,
 int ret;
 
 BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
-ret = bdrv_pwrite(bs->file, l2_offset + start_offset,
+ret = bdrv_pwrite_sync(bs->file, l2_offset + start_offset,
 &l2_table[l2_start_index], len);
 if (ret < 0) {
 return ret;
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 41e1da9..c2d0e61 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -44,8 +44,8 @@ static int write_refcount_block(BlockDriverState *bs)
 }
 
 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE);
-if (bdrv_pwrite(bs->file, s->refcount_block_cache_offset,
-s->refcount_block_cache, size) != size)
+if (bdrv_pwrite_sync(bs->file, s->refcount_block_cache_offset,
+s->refcount_block_cache, size) < 0)
 {
 return -EIO;
 }
@@ -269,7 +269,7 @@ static int64_t alloc_refcount_block(BlockDriverState *bs, 
int64_t cluster_index)
 
 /* Now the new refcount block needs to be written to disk */
 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE);
-ret = bdrv_pwrite(bs->file, new_block, s->refcount_block_cache,
+ret = bdrv_pwrite_sync(bs->file, new_block, s->refcount_block_cache,
 s->cluster_size);
 if (ret < 0) {
 goto fail_block;
@@ -279,7 +27

[Qemu-devel] [PATCH 6/6] vpc: Use bdrv_(p)write_sync for metadata writes

2010-06-18 Thread Kevin Wolf

Use bdrv_(p)write_sync to ensure metadata integrity in case of a crash.

Signed-off-by: Kevin Wolf 
---
 block/vpc.c |9 +
 1 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/block/vpc.c b/block/vpc.c
index f1f73e2..e50509e 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -261,7 +261,7 @@ static inline int64_t get_sector_offset(BlockDriverState 
*bs,
 
 s->last_bitmap_offset = bitmap_offset;
 memset(bitmap, 0xff, s->bitmap_size);
-bdrv_pwrite(bs->file, bitmap_offset, bitmap, s->bitmap_size);
+bdrv_pwrite_sync(bs->file, bitmap_offset, bitmap, s->bitmap_size);
 }
 
 //printf("sector: %" PRIx64 ", index: %x, offset: %x, bioff: %" PRIx64 ", 
bloff: %" PRIx64 "\n",
@@ -311,7 +311,7 @@ static int rewrite_footer(BlockDriverState* bs)
 BDRVVPCState *s = bs->opaque;
 int64_t offset = s->free_data_block_offset;
 
-ret = bdrv_pwrite(bs->file, offset, s->footer_buf, HEADER_SIZE);
+ret = bdrv_pwrite_sync(bs->file, offset, s->footer_buf, HEADER_SIZE);
 if (ret < 0)
 return ret;
 
@@ -346,7 +346,8 @@ static int64_t alloc_block(BlockDriverState* bs, int64_t 
sector_num)
 
 // Initialize the block's bitmap
 memset(bitmap, 0xff, s->bitmap_size);
-bdrv_pwrite(bs->file, s->free_data_block_offset, bitmap, s->bitmap_size);
+bdrv_pwrite_sync(bs->file, s->free_data_block_offset, bitmap,
+s->bitmap_size);
 
 // Write new footer (the old one will be overwritten)
 s->free_data_block_offset += s->block_size + s->bitmap_size;
@@ -357,7 +358,7 @@ static int64_t alloc_block(BlockDriverState* bs, int64_t 
sector_num)
 // Write BAT entry to disk
 bat_offset = s->bat_offset + (4 * index);
 bat_value = be32_to_cpu(s->pagetable[index]);
-ret = bdrv_pwrite(bs->file, bat_offset, &bat_value, 4);
+ret = bdrv_pwrite_sync(bs->file, bat_offset, &bat_value, 4);
 if (ret < 0)
 goto fail;
 
-- 
1.6.6.1

Re: [Qemu-devel] Re: [PATCH 09/10] pci: set PCI multi-function bit appropriately.

2010-06-18 Thread Michael S. Tsirkin

On Fri, Jun 18, 2010 at 10:38:20PM +0900, Isaku Yamahata wrote:
> On Fri, Jun 18, 2010 at 03:44:04PM +0300, Michael S. Tsirkin wrote:
> > > > If I understand correctly what this does, it goes over
> > > > other functions of the same device, and sets the MULTI_FUNCTION bit
> > > > for them if there's more than one function.
> > > > Instead, why don't we just set PCI_HEADER_TYPE_MULTI_FUNCTION
> > > > in relevant devices?
> > > 
> > > pci address, devfn ,is exported to users as addr property,
> > > so users can populate pci function(PCIDevice in qemu)
> > > at arbitrary devfn.
> > > It means each function(PCIDevice) don't know whether pci device
> > > (PCIDevice[8]) is multi function or not.
> > > So I chose to handle it in pci generic layer.
> > > 
> > > It can be argued that it's user operation fault and that
> > > the missing part is validation checks to catch such user errors.
> > 
> > Exactly. Another part that is missing is a way to hotplug
> > a multifunction device.
> 
> Yes, multi function hot plug is also on my wish list.
> 
> 
> > OTOH I think that hotplug of separate functions has no chance to work,
> > so users are better off getting an error.
> > 
> > > But I prefer more flexible and more user friendly way.
> > 
> > I think that most users would only add many functions
> > to a device as a result of an error.
> >
> > If we really want the ability to put unrelated devices
> > as functions in a single one, let's just add
> > a 'multifunction' qdev property, and validate that
> > it is set appropriately.
> 
> I think "unrelated" is policy. There is no obvious way to determine
> which functions can be in a same device.
> For example, popular chipset contains isa bridge, ide controller,
> usb controller, sound and modem in a single device as functions.
> It's up to hardware designer policy which functions are grouped into
> a device.
> 
> So qemu should be able to populate any function in a device,
> and leave such policy check to higher level tool like libvirt/virt-manager.

Fine. But tweaking one device's config when another one is added does not
lead to robust code.  Let's just require that users declare devices as
multifunction, and verify this.

-- 
MST

Re: [Qemu-devel] [RFC PATCH 1/5] qdev: Create qdev_get_dev_path()

2010-06-18 Thread Alex Williamson

On Fri, 2010-06-18 at 16:03 +0200, Markus Armbruster wrote:
> Alex Williamson  writes:
> > On Wed, 2010-06-16 at 10:23 +0200, Markus Armbruster wrote:
> >> Alex Williamson  writes:
> >> >
> >> > (A): /i440FX-pcihost/pci.0/e1000.05.0
> >> >
> >> > vs
> >> >
> >> > (B): /pci.0/05.0
> >> >
> >> > (removing both the root bridge driver name and the device driver name)
> >> 
> >> / is the main system bus.  System bus defines no bus address at the
> >> moment.  Therefore, you have to use the driver name i440FX-pcihost.
> >
> > So is the general rule "If a device's parent bus does not provide an
> > address, print device name"?
> 
> I think the general rule for constructing a *canonical* qdev path should
> be:
> 
> * If it's the main system bus, the path is /.
> 
> * If it's another bus, the path is P/B, where P is the canonical path of
>   the device providing the bus, and B is the bus name.  Unambiguous,
>   since no device ever defines two buses with the same name.
> 
> * If it's a device, the path is P/D, where P is the canonical path of
>   the bus.  If the bus defines bus addresses, then D is @A, where A is
>   the device's bus address.
> 
>   We haven't made up our minds whether the else case exists, or what to
>   do if it does.  The simple "else D is the device model driver's name"
>   works only if the bus can't take multiple device models with the same
>   driver.

It certainly exists today, so do we make up bus addresses, do we use
driver name + instance number, or ...?  Both of these cases need to make
sure they're not artificially imposing a creation order than we can't
guarantee across migration.  Hopefully Paul has a better suggestion than
creation order.

> The canonical path is not the only path.  For instance, a qdev ID is a
> valid path, but it's not canonical.  /i440FX-pcihost/pci.0/e1000 is
> another valid, non-canonical path.

Is it not canonical because of "e1000" or because or "i440FX-pcihost".
It seems like both to me with the nuance that i440FX-pcihost doesn't
support multiple instances, so we know which one it is.  Is the correct
canonical path for this then:

//pci.0/@d.f

How do we make progress on defining that TBD and all the ones under isa
buses?  Thanks,

Alex

Re: [Qemu-devel] Re: [PATCH 09/10] pci: set PCI multi-function bit appropriately.

2010-06-18 Thread Jamie Lokier

Isaku Yamahata wrote:
> On Fri, Jun 18, 2010 at 03:44:04PM +0300, Michael S. Tsirkin wrote:
> > If we really want the ability to put unrelated devices
> > as functions in a single one, let's just add
> > a 'multifunction' qdev property, and validate that
> > it is set appropriately.
> 
> I think "unrelated" is policy. There is no obvious way to determine
> which functions can be in a same device.
> For example, popular chipset contains isa bridge, ide controller,
> usb controller, sound and modem in a single device as functions.
> It's up to hardware designer policy which functions are grouped into
> a device.

In hardware terms, quad-port ethernet controllers often present
themselves as a PCI bridge with four independent PCI ethernet
controllers, so they work with standard drivers.  Even though all four
are in a single chip.  Some USB devices do the same, presenting a
small bulk storage device to ship windows drivers [:roll-eyes: ;-)]
alongside the actual device.

-- Jamie

Re: [Qemu-devel] [RFC PATCH 1/5] qdev: Create qdev_get_dev_path()

2010-06-18 Thread Jan Kiszka

Alex Williamson wrote:
> On Fri, 2010-06-18 at 11:16 +0200, Jan Kiszka wrote:
>> Alex Williamson wrote:
>>> On Wed, 2010-06-16 at 10:23 +0200, Markus Armbruster wrote:
 Alex Williamson  writes:

> On Tue, 2010-06-15 at 12:28 +0100, Paul Brook wrote:
 Alex proposed to disambiguate by adding "identified properties of the
 immediate parent bus and device" to the path component.  For PCI, these
 are dev.fn.  Likewise for any other bus where devices have unambigous
 bus address.  The driver name carries no information!
>>> From user POV, driver names are very handly to address a device
>>> intuitively - except for the case you have tones of devices on the same
>>> bus that are handled by the same driver. For that case we need to
>>> augment the device name with a useful per-bus ID, derived from the bus
>>> address where available, otherwise based on instance numbers.
>> This is where I think you're missing a trick. We don't need to augment 
>> the 
>> name, we just need to allow the bus id to be used instead.
> For the case of a hot remove, I agree.  If the user specifies "pci_del
> pci.0/03.0", that's completely sufficient because we don't care what's
> in that slot, just remove it.  However, I still see some use cases for
> device names in the path.  Take for example:
>
> (A): /i440FX-pcihost/pci.0/e1000.05.0
>
> vs
>
> (B): /pci.0/05.0
>
> (removing both the root bridge driver name and the device driver name)
 / is the main system bus.  System bus defines no bus address at the
 moment.  Therefore, you have to use the driver name i440FX-pcihost.
>>> So is the general rule "If a device's parent bus does not provide an
>>> address, print device name"?
>> What is the device name?
> 
> dev->info->name

That's for me the driver name. However, this one needs extension by an
instance number.

> 
 /i440FX-pcihost/pci.0 is the PCI bus.  PCI bus defines a bus address:
 dev.fn.  Therefore, you can either use the bus address @05.0, or the
 driver name e1000.

 We have "/i440FX-pcihost/pci.0/e1000" vs. "/i440FX-pcihost/pci.0/@05.0".
>>> I object to being able to use anything but an address under a bus that
>>> supports hotplug, but that aside, I think the paths for my example
>>> system look like below.  Note that anything behind and including the $
>>> is not the canonical path, that begins the free form, usage specific
>>> string, here filled by missing device names (open to suggestions other
>>> than $ here).
>> What about ":" instead of "$"? Visually more appealing IMO.
> 
> Sure, that works.
> 
>>>  Note that were isa devices do not have a bus identifier,
>>> I'm using the device name,
>> Don't understand. Why don't they have their I/O base as prefix? This is
>> inconsistent, and if we consider anything behind "$" (or ":")
>> informative, the bus address is mandatory for any device (on a bus with
>> an addressing scheme).
> 
> Not all isa devices expose an iobase property.  isa doesn't have an
> addressing scheme, it's basically a free for all of grabbing addresses
> and interrupts and hoping they don't conflict with no means to uniquely
> address a specific device.  That's why isa cards are loaded with jumpers
> so you can try to move them around.

The question is if there are ISA devices that do not use io, thus have
no such property. Paul pointed out that this might be unrealistic, and
in fact we have no such devices in QEMU so far.

> 
>>> which I think follows the same rule as
>>> i440FX-pcihost.  Can we agree this is the goal?  Thanks,
>> It looks almost acceptable from my POV. We just need to define how to
>> get hold of devices on buses without an addressing scheme (e.g. the
>> system bus). Using the driver name is insufficient once you have > 1
>> devices of the same type. Introducing device instance numbers to those
>> buses would be straightforward IMO.
> 
> Yep, as I mentioned, I can live with instance numbers for devices that
> do not live on a hotpluggable bus.  Where will the instance number go?
> A property on the parent bus, the device, field in the DeviceState?

So far I'm generating them on the fly. We could store them in the
DeviceState if aliases based on . shall be
forbidden on hotplug buses, i.e. the instance numbers will not change
during VM lifetime. but the question is if such a restriction of alias
(even if the are unstable) wouldn't make things too much asymmetric.

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux

[Qemu-devel] [Bug 595906] Re: [ARM] All variants of ADDSUBX, SUBADDX give incorrect results

2010-06-18 Thread Chris Winter

I have been able to reproduce this issue using the attached C file on a
custom arm1136-based qemu device, with latest git HEAD as of about 15
minutes ago. Running my test program on real hardware -- an
arm1176-based board, and a cortex-a8-based board -- produces the desired
result (0x78001800).

I'll try to investigate further, but it may take me a while to find a
fix because I haven't spent much time looking at qemu's lower level
instruction translation code. If someone with experience in this area
can devise a fix more quickly, then I can help test.

** Attachment added: "Test code to reproduce the bug"
   http://launchpadlibrarian.net/50548546/test.c

-- 
[ARM] All variants of ADDSUBX,SUBADDX give incorrect results
https://bugs.launchpad.net/bugs/595906
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.

Status in QEMU: New

Bug description:
All variants of the ADDSUBX/SUBADDX instructions seem to be implemented 
incorrectly, i.e.

MOV r12, #0
LDR r0, =0x18004800
LDR r1, =0x30006000
QADDSUBX r12, r0, r1; Should give 0x78001800 - gives 0x4800e800

This happens with latest git HEAD.

[Qemu-devel] Re: [PATCH 2/2] qemu-io: check registered fds in command_loop()

2010-06-18 Thread Kevin Wolf

Am 15.06.2010 19:53, schrieb MORITA Kazutaka:
> Some block drivers use an aio handler and do I/O completion routines
> in it.  However, the handler is not invoked if we only do
> aio_read/write, because registered fds are not checked at all.
> 
> This patch registers a command processing function as a fd handler to
> STDIO, and calls qemu_aio_wait() in command_loop().  Any other
> handlers can be invoked when user input is idle.
> 
> Signed-off-by: MORITA Kazutaka 

This breaks qemu-iotests. The reason is that synchronous requests get
out of order. Previously, do_aio_readv/writev waited until the request
was completed, and only afterwards the next command was read from stdio.
Now the next command can start during the qemu_aio_wait() that
do_aio_readv/writev uses internally to wait.

So we either need to deregister the fd handler while a command is
running, or (more cleanly) have an async_context_push/pop for any
command except aio_*.

Kevin

[Qemu-devel] [PATCH][RESEND] qdev-properties: Fix (u)intXX parsers

2010-06-18 Thread Kevin Wolf

scanf calls must not use PRI constants, they have probably the wrong size and
corrupt memory. We could replace them by SCN ones, but strtol is simpler than
scanf here anyway. While at it, also fix the parsers to reject garbage after
the number ("4096xyz" was accepted before).

Signed-off-by: Kevin Wolf 
---
 hw/qdev-properties.c |   50 +++---
 1 files changed, 35 insertions(+), 15 deletions(-)

diff --git a/hw/qdev-properties.c b/hw/qdev-properties.c
index 5a8739d..5b7fd77 100644
--- a/hw/qdev-properties.c
+++ b/hw/qdev-properties.c
@@ -67,12 +67,14 @@ PropertyInfo qdev_prop_bit = {
 static int parse_uint8(DeviceState *dev, Property *prop, const char *str)
 {
 uint8_t *ptr = qdev_get_prop_ptr(dev, prop);
-const char *fmt;
+char *end;
 
 /* accept both hex and decimal */
-fmt = strncasecmp(str, "0x",2) == 0 ? "%" PRIx8 : "%" PRIu8;
-if (sscanf(str, fmt, ptr) != 1)
+*ptr = strtoul(str, &end, 0);
+if ((*end != '\0') || (end == str)) {
 return -EINVAL;
+}
+
 return 0;
 }
 
@@ -95,12 +97,14 @@ PropertyInfo qdev_prop_uint8 = {
 static int parse_uint16(DeviceState *dev, Property *prop, const char *str)
 {
 uint16_t *ptr = qdev_get_prop_ptr(dev, prop);
-const char *fmt;
+char *end;
 
 /* accept both hex and decimal */
-fmt = strncasecmp(str, "0x",2) == 0 ? "%" PRIx16 : "%" PRIu16;
-if (sscanf(str, fmt, ptr) != 1)
+*ptr = strtoul(str, &end, 0);
+if ((*end != '\0') || (end == str)) {
 return -EINVAL;
+}
+
 return 0;
 }
 
@@ -123,12 +127,14 @@ PropertyInfo qdev_prop_uint16 = {
 static int parse_uint32(DeviceState *dev, Property *prop, const char *str)
 {
 uint32_t *ptr = qdev_get_prop_ptr(dev, prop);
-const char *fmt;
+char *end;
 
 /* accept both hex and decimal */
-fmt = strncasecmp(str, "0x",2) == 0 ? "%" PRIx32 : "%" PRIu32;
-if (sscanf(str, fmt, ptr) != 1)
+*ptr = strtoul(str, &end, 0);
+if ((*end != '\0') || (end == str)) {
 return -EINVAL;
+}
+
 return 0;
 }
 
@@ -149,9 +155,13 @@ PropertyInfo qdev_prop_uint32 = {
 static int parse_int32(DeviceState *dev, Property *prop, const char *str)
 {
 int32_t *ptr = qdev_get_prop_ptr(dev, prop);
+char *end;
 
-if (sscanf(str, "%" PRId32, ptr) != 1)
+*ptr = strtol(str, &end, 10);
+if ((*end != '\0') || (end == str)) {
 return -EINVAL;
+}
+
 return 0;
 }
 
@@ -174,9 +184,13 @@ PropertyInfo qdev_prop_int32 = {
 static int parse_hex32(DeviceState *dev, Property *prop, const char *str)
 {
 uint32_t *ptr = qdev_get_prop_ptr(dev, prop);
+char *end;
 
-if (sscanf(str, "%" PRIx32, ptr) != 1)
+*ptr = strtoul(str, &end, 16);
+if ((*end != '\0') || (end == str)) {
 return -EINVAL;
+}
+
 return 0;
 }
 
@@ -199,12 +213,14 @@ PropertyInfo qdev_prop_hex32 = {
 static int parse_uint64(DeviceState *dev, Property *prop, const char *str)
 {
 uint64_t *ptr = qdev_get_prop_ptr(dev, prop);
-const char *fmt;
+char *end;
 
 /* accept both hex and decimal */
-fmt = strncasecmp(str, "0x",2) == 0 ? "%" PRIx64 : "%" PRIu64;
-if (sscanf(str, fmt, ptr) != 1)
+*ptr = strtoull(str, &end, 0);
+if ((*end != '\0') || (end == str)) {
 return -EINVAL;
+}
+
 return 0;
 }
 
@@ -227,9 +243,13 @@ PropertyInfo qdev_prop_uint64 = {
 static int parse_hex64(DeviceState *dev, Property *prop, const char *str)
 {
 uint64_t *ptr = qdev_get_prop_ptr(dev, prop);
+char *end;
 
-if (sscanf(str, "%" PRIx64, ptr) != 1)
+*ptr = strtoull(str, &end, 16);
+if ((*end != '\0') || (end == str)) {
 return -EINVAL;
+}
+
 return 0;
 }
 
-- 
1.6.6.1

[Qemu-devel] [PATCH v2] QMP: Introduce the documentation for query-netdev and info netdev

2010-06-18 Thread Miguel Di Ciurcio Filho

These commands show the information about active backend network devices.

Signed-off-by: Miguel Di Ciurcio Filho 
---
 qemu-monitor.hx |  105 +++
 1 files changed, 105 insertions(+), 0 deletions(-)

diff --git a/qemu-monitor.hx b/qemu-monitor.hx
index 9f62b94..8fc5ed6 100644
--- a/qemu-monitor.hx
+++ b/qemu-monitor.hx
@@ -1674,6 +1674,111 @@ show the various VLANs and the associated devices
 ETEXI
 
 STEXI
+...@item info netdev
+show information about the current backend network devices
+ETEXI
+SQMP
+query-netdev
+
+
+Each device is represented by a json-object. The returned value is a json-array
+of all devices.
+
+Each json-object contains the following:
+
+- "id": the device's ID, must be unique (json-string)
+- "type": device type (json-string)
+- Possible values: "tap", "user", "vde", "socket"
+- "vlan": QEMU's internal vlan identification. Only present if the device is
+  attached to a VLAN (json-int, optional)
+- "peer": ID of the frontend device when on a 1:1 relationship (json-string,
+  optional)
+- "info": json-object containing the configuration information about the 
device.
+- When "type" is "tap", the following values might be available:
+- "fd": available if connected to an already opened TAP interface
+  (json-int, optional)
+- "script": path to an script used to configure the interface, if
+  the value is "no" then no script is used. (json-string, only
+  present if "fd" is not present)
+- "downscript": path to an script used to deconfigure the 
interface,
+  if the value is "no" then no script is used. (json-string, only
+  present if "fd" is not present)
+- "ifname": name of the attached host interface (json-string, only
+  present if "fd" is not present)
+- "vhost": vhost acceleration status, true if enabled, false
+  otherwise (json-boolean)
+- "vnet_hdr": true if the IFF_VNET_HDR flag must be set, false 
+  otherwise (json-boolean)
+- "vhostfd": positive number if the device is connected to an
+  already opened vhost net device, -1 otherwise (json-int)
+- "sndbuf": send buffer size, in bytes (json-int)
+- When "type" is "vde", the following values might be available:
+- "sock": path to the VDE listening socket (json-string)
+- "port": port number connected to virtual switch (json-int)
+- "group": group name (json-string)
+- "mode": permission mode, in octal (json-int)
+- When "type" is "user", the following values might be available:
+- "hostname": client hostname reported by the builtin DHCP server,
+  (json-string, optional)
+- "restrict": true if guest is isolated from the host,
+  false otherwise (json-boolean)
+- "net": network address (json-string)
+- "netmask": netmask (json-string)
+- "host": guest-visible address of the host (json-string)
+- "tftp": root directory of the built-in TFTP server (json-string,
+  optional)
+- "bootfile": BOOTP filename (json-string, optional)
+- "dhcpstart": the first of the 16 IPs the built-in DHCP server can
+  assign (json-string)
+- "dns": guest-visible address of the virtual nameserver
+  (json-string)
+- "smb": root directory of the built-in SMB server (json-string,
+  optional)
+- "smbserver": IP address of the built-in SMB server (json-string,
+  optional)
+- "hostfwd": guest port number to forward incoming TCP or UDP
+  connections (json-int, optional)
+- "guestfwd": IP address and port to forward guest TCP connections
+  (json-int, optional)
+- When "type" is "socket", the following values might be available:
+- "host": IP address (json-string)
+- "service": port number (json-int)
+- "family": IP version, possible values "ipv4" or "ipv6"
+  (json-string)
+- "server": true if the socket is receiving incoming connections,
+  false otherwise (json-boolean)
+
+Example:
+
+-> { "execute": "query-netdev" }
+<- {
+  "return": [
+ {
+"id": "tap.0",
+"type": "tap",
+"vlan": 0,
+"info": {
+   "script": "/etc/qemu-ifup",
+   "downscript": "/etc/qemu-ifdown",
+   "ifname": "tap0",
+   "vhost": true
+},
+ },
+ {
+"id": "user.0",
+"type": "user",
+"peer": "e1000.0",
+"info": {
+   "net": "10.0.2.0",
+   "netmask": "255.255.255.0"
+},
+ },
+  ]
+

[Qemu-devel] smp broken in tcg mode with --enable-io-thread?

2010-06-18 Thread Jan Kiszka

Hi,

looks like SMP in emulation mode is broken once --enable-io-thread is
turned on. Linux SMP guests lock up early during boot, often the whole
QEMU process becomes uncontrollable after a while. That's at least the
case here with x86 targets. The problem disappears when using
-enable-kvm or building without --enable-io-thread.

It's not on my critical path (ie. no time to go into details), I just
stumbled over it and wanted to drop a note.

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux

Re: [Qemu-devel] ARM/system mode/stdin

2010-06-18 Thread Paul Brook

> $ qemu-system-arm -semihosting -cpu cortex-a9 -nographic -kernel
> ./input.exe

-nographic implies -monitor stdio -serial stdio. Don't do that if you want to 
access stdio via semihosting.

Paul

[Qemu-devel] [Bug 595906] Re: [ARM] All variants of ADDSUBX, SUBADDX give incorrect results

2010-06-18 Thread cmchao

The addsub/subadd implemetation is incorrect.  The patch can fixed it
and is verified by Chris's test case


** Patch added: "target-arm: fix addsub/subadd implementation"
   
http://launchpadlibrarian.net/50552359/0001-target-arm-fix-addsub-subadd-implementation.patch

-- 
[ARM] All variants of ADDSUBX,SUBADDX give incorrect results
https://bugs.launchpad.net/bugs/595906
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.

Status in QEMU: New

Bug description:
All variants of the ADDSUBX/SUBADDX instructions seem to be implemented 
incorrectly, i.e.

MOV r12, #0
LDR r0, =0x18004800
LDR r1, =0x30006000
QADDSUBX r12, r0, r1; Should give 0x78001800 - gives 0x4800e800

This happens with latest git HEAD.

[Qemu-devel] [Bug 595906] Re: [ARM] All variants of ADDSUBX, SUBADDX give incorrect results

2010-06-18 Thread Chris Winter

I tested Chih-Min's patch with my local git clone, and I can verify that
it appears to fix the issue.

-- 
[ARM] All variants of ADDSUBX,SUBADDX give incorrect results
https://bugs.launchpad.net/bugs/595906
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.

Status in QEMU: New

Bug description:
All variants of the ADDSUBX/SUBADDX instructions seem to be implemented 
incorrectly, i.e.

MOV r12, #0
LDR r0, =0x18004800
LDR r1, =0x30006000
QADDSUBX r12, r0, r1; Should give 0x78001800 - gives 0x4800e800

This happens with latest git HEAD.

[Qemu-devel] [PATCH 1/2] Add 'serial' attribute to virtio-blk devices

2010-06-18 Thread Ryan Harper

Create a new attribute for virtio-blk devices that will fetch the serial number
of the block device.  This attribute can be used by udev to create disk/by-id
symlinks for devices that don't have a UUID (filesystem) associated with them.

ATA_IDENTIFY strings are special in that they can be up to 20 chars long
and aren't required to be NULL-terminated.  The buffer is also zero-padded
meaning that if the serial is 19 chars or less that we get a NULL terminated
string.  When copying this value into a string buffer, we must be careful to
copy up to the NULL (if it present) and only 20 if it is longer and not to
attempt to NULL terminate; this isn't needed.

Signed-off-by: Ryan Harper 
Signed-off-by: john cooper 
---
 drivers/block/virtio_blk.c |   32 
 1 files changed, 32 insertions(+), 0 deletions(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 258bc2a..f1ef26f 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -281,6 +281,31 @@ static int index_to_minor(int index)
return index << PART_BITS;
 }
 
+/* Copy serial number from *s to *d.  Copy operation terminates on either
+ * encountering a nul in *s or after n bytes have been copied, whichever
+ * occurs first.  *d is not forcibly nul terminated.  Return # of bytes copied.
+ */
+static inline int serial_sysfs(char *d, char *s, int n)
+{
+   char *di = d;
+
+   while (*s && n--)
+   *d++ = *s++;
+   return d - di;
+}
+
+static ssize_t virtblk_serial_show(struct device *dev,
+   struct device_attribute *attr, char *buf)
+{
+   struct gendisk *disk = dev_to_disk(dev);
+   char id_str[VIRTIO_BLK_ID_BYTES];
+
+   if (IS_ERR(virtblk_get_id(disk, id_str)))
+   return 0;
+   return serial_sysfs(buf, id_str, min(VIRTIO_BLK_ID_BYTES, PAGE_SIZE));
+}
+DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL);
+
 static int __devinit virtblk_probe(struct virtio_device *vdev)
 {
struct virtio_blk *vblk;
@@ -445,8 +470,15 @@ static int __devinit virtblk_probe(struct virtio_device 
*vdev)
 
 
add_disk(vblk->disk);
+   err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial);
+   if (err)
+   goto out_del_disk;
+
return 0;
 
+out_del_disk:
+   del_gendisk(vblk->disk);
+   blk_cleanup_queue(vblk->disk->queue);
 out_put_disk:
put_disk(vblk->disk);
 out_mempool:
-- 
1.6.3.3

[Qemu-devel] [PATCH 2/2] Remove virtio_blk VBID ioctl

2010-06-18 Thread Ryan Harper

With the availablility of a sysfs device attribute for examining disk serial
numbers the ioctl is no longer needed.  The user-space changes for this aren't
upstream yet so we don't have any users to worry about.

Signed-off-by: Ryan Harper 
---
 drivers/block/virtio_blk.c |   10 --
 1 files changed, 0 insertions(+), 10 deletions(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index f1ef26f..9e382dd 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -225,16 +225,6 @@ static int virtblk_ioctl(struct block_device *bdev, 
fmode_t mode,
struct gendisk *disk = bdev->bd_disk;
struct virtio_blk *vblk = disk->private_data;
 
-   if (cmd == 0x56424944) { /* 'VBID' */
-   void __user *usr_data = (void __user *)data;
-   char id_str[VIRTIO_BLK_ID_BYTES];
-   int err;
-
-   err = virtblk_get_id(disk, id_str);
-   if (!err && copy_to_user(usr_data, id_str, VIRTIO_BLK_ID_BYTES))
-   err = -EFAULT;
-   return err;
-   }
/*
 * Only allow the generic SCSI ioctls if the host can support it.
 */
-- 
1.6.3.3

[Qemu-devel] emulating SMP/manycore processor

2010-06-18 Thread Sheng Li

Hi All,

I am a newbie to QEMU. Could anybody tell me how to run an emulation with
255 CPUs? I am talking about the PC target with linux. Any advise will be
greatly appreciated.

Thanks!
-Sheng Li

[Qemu-devel] Re: [PATCH v4 0/4] XSAVE enabling in QEmu

2010-06-18 Thread Marcelo Tosatti

On Thu, Jun 17, 2010 at 03:18:12PM +0800, Sheng Yang wrote:
> Notice the first three patches applied to uq/master branch of qemu-kvm, the 
> last one
> applied to qemu-kvm master branch. And the last one would only apply after the
> first three merged in master branch.

Applied, thanks.

Re: [Qemu-devel] smp broken in tcg mode with --enable-io-thread?

2010-06-18 Thread Anthony Liguori


On 06/18/2010 11:37 AM, Jan Kiszka wrote:

Hi,

looks like SMP in emulation mode is broken once --enable-io-thread is
turned on. Linux SMP guests lock up early during boot, often the whole
QEMU process becomes uncontrollable after a while. That's at least the
case here with x86 targets. The problem disappears when using
-enable-kvm or building without --enable-io-thread.
   


That's unfortunate.  I've been thinking we should enable io thread by 
default but this sort of regression would certainly prevent that.


Glauber/Marcelo, any guesses about what's happening?

Regards,

Anthony Liguori


It's not on my critical path (ie. no time to go into details), I just
stumbled over it and wanted to drop a note.

Jan

Re: [Qemu-devel] [PATCH v2] QMP: Introduce the documentation for query-netdev and info netdev

2010-06-18 Thread Luiz Capitulino

On Fri, 18 Jun 2010 15:28:42 -0500
Anthony Liguori  wrote:

> On 06/18/2010 11:26 AM, Miguel Di Ciurcio Filho wrote:
> > These commands show the information about active backend network devices.
> >
> > Signed-off-by: Miguel Di Ciurcio Filho
> > ---
> >   qemu-monitor.hx |  105 
> > +++
> >   1 files changed, 105 insertions(+), 0 deletions(-)
> >
> > diff --git a/qemu-monitor.hx b/qemu-monitor.hx
> > index 9f62b94..8fc5ed6 100644
> > --- a/qemu-monitor.hx
> > +++ b/qemu-monitor.hx
> > @@ -1674,6 +1674,111 @@ show the various VLANs and the associated devices
> >   ETEXI
> >
> >   STEXI
> > +...@item info netdev
> > +show information about the current backend network devices
> > +ETEXI
> > +SQMP
> > +query-netdev
> > +
> > +
> > +Each device is represented by a json-object. The returned value is a 
> > json-array
> > +of all devices.
> > +
> > +Each json-object contains the following:
> > +
> > +- "id": the device's ID, must be unique (json-string)
> > +- "type": device type (json-string)
> > +- Possible values: "tap", "user", "vde", "socket"
> > +- "vlan": QEMU's internal vlan identification. Only present if the device 
> > is
> > +  attached to a VLAN (json-int, optional)
> > +- "peer": ID of the frontend device when on a 1:1 relationship 
> > (json-string,
> > +  optional)
> >
> 
> I think we should only return items with a valid peer property and drop 
> anything attached to vlans.

 The reason is that we're going to completely drop the vlan stuff, right?

> The current info network already provides vlan information.

 What did you mean by that? info network is not going to be converted to QMP,
that's why we're doing query-netdev.

 If I got it right:

 - query-netdev: backend info
 - query-qdm: device info

[Qemu-devel] Handling the O-type

2010-06-18 Thread Luiz Capitulino

On Wed, 02 Jun 2010 09:31:24 +0200
Markus Armbruster  wrote:

> Luiz Capitulino  writes:

[...]

> >  static void check_mandatory_args(const char *cmd_arg_name,
> > @@ -4344,6 +4413,9 @@ out:
> >   * Client argument checking rules:
> >   *
> >   * 1. Client must provide all mandatory arguments
> > + * 2. Each argument provided by the client must be valid
> > + * 3. Each argument provided by the client must have the type expected
> > + *by the command
> >   */
> >  static int qmp_check_client_args(const mon_cmd_t *cmd, QDict *client_args)
> >  {
> > @@ -4355,7 +4427,10 @@ static int qmp_check_client_args(const mon_cmd_t 
> > *cmd, QDict *client_args)
> >  res.qdict = client_args;
> >  qdict_iter(cmd_args, check_mandatory_args, &res);
> >  
> > -/* TODO: Check client args type */
> > +if (!res.result && !res.skip) {
> > +res.qdict = cmd_args;
> > +qdict_iter(client_args, check_client_args_type, &res);
> > +}
> 
> What if we have both an O-type argument and other arguments?  Then the
> 'O' makes check_client_args_type() set res.skip, and we duly skip
> checking the other arguments here.

I was working on this and it seems a bad idea to allow mixing O-type and
other monitor types*.

The reason is that you can't easily tell if an argument passed by the client
is part of the O-type or the monitor type. We could workaround this by trying to
ensure that an argument exists only in one of them, but I really feel this will
get messy.

I think we should disallow mixing O-type with other argument types and maintain
the skip trick, ie. skip any checking in the top level if the argument is an
O-type one.

 * Does this work with the current argument checker?

Re: [Qemu-devel] [PATCH v2] QMP: Introduce the documentation for query-netdev and info netdev

2010-06-18 Thread Anthony Liguori


On 06/18/2010 11:26 AM, Miguel Di Ciurcio Filho wrote:

These commands show the information about active backend network devices.

Signed-off-by: Miguel Di Ciurcio Filho
---
  qemu-monitor.hx |  105 +++
  1 files changed, 105 insertions(+), 0 deletions(-)

diff --git a/qemu-monitor.hx b/qemu-monitor.hx
index 9f62b94..8fc5ed6 100644
--- a/qemu-monitor.hx
+++ b/qemu-monitor.hx
@@ -1674,6 +1674,111 @@ show the various VLANs and the associated devices
  ETEXI

  STEXI
+...@item info netdev
+show information about the current backend network devices
+ETEXI
+SQMP
+query-netdev
+
+
+Each device is represented by a json-object. The returned value is a json-array
+of all devices.
+
+Each json-object contains the following:
+
+- "id": the device's ID, must be unique (json-string)
+- "type": device type (json-string)
+- Possible values: "tap", "user", "vde", "socket"
+- "vlan": QEMU's internal vlan identification. Only present if the device is
+  attached to a VLAN (json-int, optional)
+- "peer": ID of the frontend device when on a 1:1 relationship (json-string,
+  optional)
   


I think we should only return items with a valid peer property and drop 
anything attached to vlans.  The current info network already provides 
vlan information.


Regards,

Anthony Liguori

Re: [Qemu-devel] [PATCH v2] QMP: Introduce the documentation for query-netdev and info netdev

2010-06-18 Thread Miguel Di Ciurcio Filho

On Fri, Jun 18, 2010 at 5:28 PM, Anthony Liguori  wrote:
> On 06/18/2010 11:26 AM, Miguel Di Ciurcio Filho wrote:
>>
>> These commands show the information about active backend network devices.
>>
>> Signed-off-by: Miguel Di Ciurcio Filho
>> ---
>>  qemu-monitor.hx |  105
>> +++
>>  1 files changed, 105 insertions(+), 0 deletions(-)
>>
>> diff --git a/qemu-monitor.hx b/qemu-monitor.hx
>> index 9f62b94..8fc5ed6 100644
>> --- a/qemu-monitor.hx
>> +++ b/qemu-monitor.hx
>> @@ -1674,6 +1674,111 @@ show the various VLANs and the associated devices
>>  ETEXI
>>
>>  STEXI
>> +...@item info netdev
>> +show information about the current backend network devices
>> +ETEXI
>> +SQMP
>> +query-netdev
>> +
>> +
>> +Each device is represented by a json-object. The returned value is a
>> json-array
>> +of all devices.
>> +
>> +Each json-object contains the following:
>> +
>> +- "id": the device's ID, must be unique (json-string)
>> +- "type": device type (json-string)
>> +    - Possible values: "tap", "user", "vde", "socket"
>> +- "vlan": QEMU's internal vlan identification. Only present if the device
>> is
>> +  attached to a VLAN (json-int, optional)
>> +- "peer": ID of the frontend device when on a 1:1 relationship
>> (json-string,
>> +  optional)
>>
>
> I think we should only return items with a valid peer property and drop
> anything attached to vlans.  The current info network already provides vlan
> information.
>

We need a strong compromise that sometime in the future, the qemu
"vlan" concept will be removed, deprecated, or something like that.
Otherwise we will end up with QMP not exporting networking
configuration that can be setup by a user or management software.

Previous discussions about the "vlan" removal:
http://lists.gnu.org/archive/html/qemu-devel/2010-06/msg00947.html

Will it break for some people? Yes, but the current status quo is not
good. Interconnecting virtual network devices should be done by
software like VDE or Open vSwitch on the host.

I think even devices without a peer must be listed. After a netdev_add
command, there is going to be no way to verify that the netdev is
there, only after adding a guest NIC associated with the netdev if we
put a restriction like that.

Regards,

Miguel

Re: [Qemu-devel] [PATCH v2] QMP: Introduce the documentation for query-netdev and info netdev

2010-06-18 Thread Anthony Liguori


On 06/18/2010 04:15 PM, Miguel Di Ciurcio Filho wrote:

On Fri, Jun 18, 2010 at 5:28 PM, Anthony Liguori  wrote:
   

On 06/18/2010 11:26 AM, Miguel Di Ciurcio Filho wrote:
 

These commands show the information about active backend network devices.

Signed-off-by: Miguel Di Ciurcio Filho
---
  qemu-monitor.hx |  105
+++
  1 files changed, 105 insertions(+), 0 deletions(-)

diff --git a/qemu-monitor.hx b/qemu-monitor.hx
index 9f62b94..8fc5ed6 100644
--- a/qemu-monitor.hx
+++ b/qemu-monitor.hx
@@ -1674,6 +1674,111 @@ show the various VLANs and the associated devices
  ETEXI

  STEXI
+...@item info netdev
+show information about the current backend network devices
+ETEXI
+SQMP
+query-netdev
+
+
+Each device is represented by a json-object. The returned value is a
json-array
+of all devices.
+
+Each json-object contains the following:
+
+- "id": the device's ID, must be unique (json-string)
+- "type": device type (json-string)
+- Possible values: "tap", "user", "vde", "socket"
+- "vlan": QEMU's internal vlan identification. Only present if the device
is
+  attached to a VLAN (json-int, optional)
+- "peer": ID of the frontend device when on a 1:1 relationship
(json-string,
+  optional)

   

I think we should only return items with a valid peer property and drop
anything attached to vlans.  The current info network already provides vlan
information.

 

We need a strong compromise that sometime in the future, the qemu
"vlan" concept will be removed, deprecated, or something like that.
Otherwise we will end up with QMP not exporting networking
configuration that can be setup by a user or management software.
   


One way or another, vlan will go away in the form it exists today.  I'm 
not sure how we're going to replace it yet but the notion that every 
device is always connected to a hub is not something that we can 
reasonably live with.


netdev has already killed it.  TBH, I don't think netdev should have a 
vlan tag.  In fact, I think a patch to remove that from the user 
interface would be a very good thing.


Regards,

Anthony Liguori

[Qemu-devel] [Bug 596106] [NEW] kvm to emulate 64 bit cpu on 32 bit host

2010-06-18 Thread dennyhalim.com

Public bug reported:

i wish kvm can run 64bit guest os on 32bit host just like qemu does.

** Affects: qemu
 Importance: Undecided
 Status: New


** Tags: kvm

** Tags added: kvm

-- 
kvm to emulate 64 bit cpu on 32 bit host
https://bugs.launchpad.net/bugs/596106
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.

Status in QEMU: New

Bug description:
i wish kvm can run 64bit guest os on 32bit host just like qemu does.

[Qemu-devel] [Bug 596106] Re: kvm to emulate 64 bit cpu on 32 bit host

2010-06-18 Thread Iggy

KVM isn't an emulator. It virtualizes the CPU. A 32bit CPU can't be
forced to run as a 64bit cpu. This is a wontfix.

-- 
kvm to emulate 64 bit cpu on 32 bit host
https://bugs.launchpad.net/bugs/596106
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.

Status in QEMU: New

Bug description:
i wish kvm can run 64bit guest os on 32bit host just like qemu does.

[Qemu-devel] [Bug 596106] Re: kvm to emulate 64 bit cpu on 32 bit host

2010-06-18 Thread Anthony Liguori

Thanks for the request.  It's certainly possible with most modern
distributions to just install a 64-bit kernel with a 32-bit userspace.
Technically speaking, 64-bit guests in a 32-bit kernel is prohibitively
difficult.

** Changed in: qemu
   Status: New => Won't Fix

-- 
kvm to emulate 64 bit cpu on 32 bit host
https://bugs.launchpad.net/bugs/596106
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.

Status in QEMU: Won't Fix

Bug description:
i wish kvm can run 64bit guest os on 32bit host just like qemu does.

71 matches

Mail list logo