[Qemu-devel] [PATCH 06/13] ide/ahci: Use universal DMA helper functions

2012-06-19 Thread Benjamin Herrenschmidt
From: David Gibson 

The AHCI device can provide both PCI and SysBus AHCI device
emulations.  For this reason, it wasn't previously converted to use
the pci_dma_*() helper functions.  Now that we have universal DMA
helper functions, this converts AHCI to use them.

The DMAContext is obtained from pci_dma_context() in the PCI case and
set to NULL in the SysBus case (i.e. we assume for now that a SysBus
AHCI has no IOMMU translation).

Cc: Kevin Wolf 
Cc: Michael S. Tsirkin 

Signed-off-by: David Gibson 
Signed-off-by: Benjamin Herrenschmidt 
---
 hw/ide/ahci.c |   34 --
 hw/ide/ahci.h |3 ++-
 hw/ide/ich.c  |2 +-
 3 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index 6c4226d..efea93f 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -588,7 +588,7 @@ static void ahci_write_fis_d2h(AHCIDevice *ad, uint8_t 
*cmd_fis)
 AHCIPortRegs *pr = &ad->port_regs;
 uint8_t *d2h_fis;
 int i;
-target_phys_addr_t cmd_len = 0x80;
+dma_addr_t cmd_len = 0x80;
 int cmd_mapped = 0;
 
 if (!ad->res_fis || !(pr->cmd & PORT_CMD_FIS_RX)) {
@@ -598,7 +598,8 @@ static void ahci_write_fis_d2h(AHCIDevice *ad, uint8_t 
*cmd_fis)
 if (!cmd_fis) {
 /* map cmd_fis */
 uint64_t tbl_addr = le64_to_cpu(ad->cur_cmd->tbl_addr);
-cmd_fis = cpu_physical_memory_map(tbl_addr, &cmd_len, 0);
+cmd_fis = dma_memory_map(ad->hba->dma, tbl_addr, &cmd_len,
+ DMA_DIRECTION_TO_DEVICE);
 cmd_mapped = 1;
 }
 
@@ -630,7 +631,8 @@ static void ahci_write_fis_d2h(AHCIDevice *ad, uint8_t 
*cmd_fis)
 ahci_trigger_irq(ad->hba, ad, PORT_IRQ_D2H_REG_FIS);
 
 if (cmd_mapped) {
-cpu_physical_memory_unmap(cmd_fis, cmd_len, 0, cmd_len);
+dma_memory_unmap(ad->hba->dma, cmd_fis, cmd_len,
+ DMA_DIRECTION_TO_DEVICE, cmd_len);
 }
 }
 
@@ -640,8 +642,8 @@ static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList 
*sglist)
 uint32_t opts = le32_to_cpu(cmd->opts);
 uint64_t prdt_addr = le64_to_cpu(cmd->tbl_addr) + 0x80;
 int sglist_alloc_hint = opts >> AHCI_CMD_HDR_PRDT_LEN;
-target_phys_addr_t prdt_len = (sglist_alloc_hint * sizeof(AHCI_SG));
-target_phys_addr_t real_prdt_len = prdt_len;
+dma_addr_t prdt_len = (sglist_alloc_hint * sizeof(AHCI_SG));
+dma_addr_t real_prdt_len = prdt_len;
 uint8_t *prdt;
 int i;
 int r = 0;
@@ -652,7 +654,8 @@ static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList 
*sglist)
 }
 
 /* map PRDT */
-if (!(prdt = cpu_physical_memory_map(prdt_addr, &prdt_len, 0))){
+if (!(prdt = dma_memory_map(ad->hba->dma, prdt_addr, &prdt_len,
+DMA_DIRECTION_TO_DEVICE))){
 DPRINTF(ad->port_no, "map failed\n");
 return -1;
 }
@@ -667,8 +670,7 @@ static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList 
*sglist)
 if (sglist_alloc_hint > 0) {
 AHCI_SG *tbl = (AHCI_SG *)prdt;
 
-/* FIXME: pass the correct DMAContext */
-qemu_sglist_init(sglist, sglist_alloc_hint, NULL);
+qemu_sglist_init(sglist, sglist_alloc_hint, ad->hba->dma);
 for (i = 0; i < sglist_alloc_hint; i++) {
 /* flags_size is zero-based */
 qemu_sglist_add(sglist, le64_to_cpu(tbl[i].addr),
@@ -677,7 +679,8 @@ static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList 
*sglist)
 }
 
 out:
-cpu_physical_memory_unmap(prdt, prdt_len, 0, prdt_len);
+dma_memory_unmap(ad->hba->dma, prdt, prdt_len,
+ DMA_DIRECTION_TO_DEVICE, prdt_len);
 return r;
 }
 
@@ -787,7 +790,7 @@ static int handle_cmd(AHCIState *s, int port, int slot)
 uint64_t tbl_addr;
 AHCICmdHdr *cmd;
 uint8_t *cmd_fis;
-target_phys_addr_t cmd_len;
+dma_addr_t cmd_len;
 
 if (s->dev[port].port.ifs[0].status & (BUSY_STAT|DRQ_STAT)) {
 /* Engine currently busy, try again later */
@@ -809,7 +812,8 @@ static int handle_cmd(AHCIState *s, int port, int slot)
 tbl_addr = le64_to_cpu(cmd->tbl_addr);
 
 cmd_len = 0x80;
-cmd_fis = cpu_physical_memory_map(tbl_addr, &cmd_len, 1);
+cmd_fis = dma_memory_map(s->dma, tbl_addr, &cmd_len,
+ DMA_DIRECTION_FROM_DEVICE);
 
 if (!cmd_fis) {
 DPRINTF(port, "error: guest passed us an invalid cmd fis\n");
@@ -935,7 +939,8 @@ static int handle_cmd(AHCIState *s, int port, int slot)
 }
 
 out:
-cpu_physical_memory_unmap(cmd_fis, cmd_len, 1, cmd_len);
+dma_memory_unmap(s->dma, cmd_fis, cmd_len, DMA_DIRECTION_FROM_DEVICE,
+ cmd_len);
 
 if (s->dev[port].port.ifs[0].status & (BUSY_STAT|DRQ_STAT)) {
 /* async command, complete later */
@@ -1115,11 +1120,12 @@ static const IDEDMAOps ahci_dma_ops = {
 .reset = ahci_dma_reset,
 };
 
-void ahci_init(AHCIState *s, DeviceState *qdev, int ports)
+void ahci_init(AHCIState *s, DeviceState *qdev, DMAContext *dm

[Qemu-devel] [PATCH 02/13] Implement cpu_physical_memory_set()

2012-06-19 Thread Benjamin Herrenschmidt
From: David Gibson 

This patch adds cpu_physical_memory_set() function.  This is equivalent to
calling cpu_physical_memory_write() with a buffer filled with a character,
ie, a memset of target memory.

It uses a small temporary buffer on the stack.

Signed-off-by: David Gibson 
Signed-off-by: Benjamin Herrenschmidt 
---
 cpu-common.h |1 +
 exec.c   |   15 +++
 2 files changed, 16 insertions(+)

diff --git a/cpu-common.h b/cpu-common.h
index 1fe3280..8d3596a 100644
--- a/cpu-common.h
+++ b/cpu-common.h
@@ -53,6 +53,7 @@ void qemu_ram_set_idstr(ram_addr_t addr, const char *name, 
DeviceState *dev);
 
 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
 int len, int is_write);
+void cpu_physical_memory_set(target_phys_addr_t addr, uint8_t c, int len);
 static inline void cpu_physical_memory_read(target_phys_addr_t addr,
 void *buf, int len)
 {
diff --git a/exec.c b/exec.c
index b5d6885..cfd7008 100644
--- a/exec.c
+++ b/exec.c
@@ -3601,6 +3601,21 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, 
uint8_t *buf,
 }
 }
 
+void cpu_physical_memory_set(target_phys_addr_t addr, uint8_t c, int len)
+{
+#define FILLBUF_SIZE 512
+uint8_t fillbuf[FILLBUF_SIZE];
+int l;
+
+memset(fillbuf, c, FILLBUF_SIZE);
+while (len > 0) {
+l = len < FILLBUF_SIZE ? len : FILLBUF_SIZE;
+cpu_physical_memory_rw(addr, fillbuf, l, true);
+len -= len;
+addr += len;
+}
+}
+
 /* used for ROM loading : can write in RAM and ROM */
 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
const uint8_t *buf, int len)
-- 
1.7.9.5




[Qemu-devel] [PATCH 01/13] Better support for dma_addr_t variables

2012-06-19 Thread Benjamin Herrenschmidt
From: David Gibson 

A while back, we introduced the dma_addr_t type, which is supposed to
be used for bus visible memory addresses.  At present, this is an
alias for target_phys_addr_t, but this will change when we eventually
add support for guest visible IOMMUs.

There are some instances of target_phys_addr_t in the code now which
should really be dma_addr_t, but can't be trivially converted due to
missing features which this patch corrects.

 * We add DMA_ADDR_BITS analagous to TARGET_PHYS_ADDR_BITS.  This is
   important where we need to make a compile-time (#if) based on the
   size of dma_addr_t.

 * We add a new helper macro to create device properties which take a
   dma_addr_t, currently an alias to DEFINE_PROP_TADDR().

Signed-off-by: David Gibson 
Signed-off-by: Benjamin Herrenschmidt 
---
 dma.h |1 +
 hw/qdev-dma.h |   12 
 2 files changed, 13 insertions(+)
 create mode 100644 hw/qdev-dma.h

diff --git a/dma.h b/dma.h
index 8c1ec8f..fe08b72 100644
--- a/dma.h
+++ b/dma.h
@@ -31,6 +31,7 @@ struct QEMUSGList {
 #if defined(TARGET_PHYS_ADDR_BITS)
 typedef target_phys_addr_t dma_addr_t;
 
+#define DMA_ADDR_BITS TARGET_PHYS_ADDR_BITS
 #define DMA_ADDR_FMT TARGET_FMT_plx
 
 struct ScatterGatherEntry {
diff --git a/hw/qdev-dma.h b/hw/qdev-dma.h
new file mode 100644
index 000..f0ff558
--- /dev/null
+++ b/hw/qdev-dma.h
@@ -0,0 +1,12 @@
+/*
+ * Support for dma_addr_t typed properties
+ *
+ * Copyright (C) 2012 David Gibson, IBM Corporation.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#include "qdev-addr.h"
+
+#define DEFINE_PROP_DMAADDR(_n, _s, _f, _d)   \
+DEFINE_PROP_TADDR(_n, _s, _f, _d)
-- 
1.7.9.5




[Qemu-devel] [Bug 1014681] Re: BSOD with newer host kernels (x64) and W2k8S guest (x64)

2012-06-19 Thread Arndt Kritzner
With e1000 and ide I also get BSOD (tried this already), but I don't
have a matching dump by hand at the moment. I will "produce" and provide
a dump till tomorrow morning (germany).

Arndt

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1014681

Title:
  BSOD with newer host kernels (x64) and W2k8S guest (x64)

Status in QEMU:
  New

Bug description:
  Hallo, I attempted to move virtual machines from one host to another
  but got stuck with Windows-BSODs on the target host. The host-side
  console message is "virtio_ioport_write: unexpected address 0x13 value
  0x1". Eventually there are overlaps to bug #990364, but I'm not sure.

  Host machine: 2x Opteron 4238 a 6 cores, 32GB RAM, Linux x86_64
  Guest machine(s): Windows 2008 Server R2 x64

  I tried different combinations of component versions, but only kernel
  2.6.34 could run the guests (but has other difficulties). See testet
  variants in comment.

  Run arguments are attached. Minidump follows immediately.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1014681/+subscriptions



[Qemu-devel] [PATCH 07/13] usb: Convert usb_packet_{map, unmap} to universal DMA helpers

2012-06-19 Thread Benjamin Herrenschmidt
From: David Gibson 

The USB UHCI and EHCI drivers were converted some time ago to use the
pci_dma_*() helper functions.  However, this conversion was not complete
because in some places both these drivers do DMA via the usb_packet_map()
function in usb-libhw.c.  That function directly used
cpu_physical_memory_map().

Now that the sglist code uses DMA wrappers properly, we can convert the
functions in usb-libhw.c, thus conpleting the conversion of UHCI and EHCI
to use the DMA wrappers.

Note that usb_packet_map() invokes dma_memory_map() with a NULL invalidate
callback function.  When IOMMU support is added, this will mean that
usb_packet_map() and the corresponding usb_packet_unmap() must be called in
close proximity without dropping the qemu device lock - otherwise the guest
might invalidate IOMMU mappings while they are still in use by the device
code.

Signed-off-by: David Gibson 
Signed-off-by: Benjamin Herrenschmidt 
---
 hw/usb.h  |2 +-
 hw/usb/hcd-ehci.c |4 ++--
 hw/usb/hcd-uhci.c |2 +-
 hw/usb/libhw.c|   21 +++--
 4 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/hw/usb.h b/hw/usb.h
index 2a56fe5..a5623d3 100644
--- a/hw/usb.h
+++ b/hw/usb.h
@@ -345,7 +345,7 @@ void usb_packet_check_state(USBPacket *p, USBPacketState 
expected);
 void usb_packet_setup(USBPacket *p, int pid, USBEndpoint *ep);
 void usb_packet_addbuf(USBPacket *p, void *ptr, size_t len);
 int usb_packet_map(USBPacket *p, QEMUSGList *sgl);
-void usb_packet_unmap(USBPacket *p);
+void usb_packet_unmap(USBPacket *p, QEMUSGList *sgl);
 void usb_packet_copy(USBPacket *p, void *ptr, size_t bytes);
 void usb_packet_skip(USBPacket *p, size_t bytes);
 void usb_packet_cleanup(USBPacket *p);
diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
index 5298204..81bbc54 100644
--- a/hw/usb/hcd-ehci.c
+++ b/hw/usb/hcd-ehci.c
@@ -1422,8 +1422,8 @@ static void ehci_execute_complete(EHCIQueue *q)
 set_field(&q->qh.token, p->tbytes, QTD_TOKEN_TBYTES);
 }
 ehci_finish_transfer(q, p->usb_status);
+usb_packet_unmap(&p->packet, &p->sgl);
 qemu_sglist_destroy(&p->sgl);
-usb_packet_unmap(&p->packet);
 
 q->qh.token ^= QTD_TOKEN_DTOGGLE;
 q->qh.token &= ~QTD_TOKEN_ACTIVE;
@@ -1547,7 +1547,7 @@ static int ehci_process_itd(EHCIState *ehci,
 usb_packet_map(&ehci->ipacket, &ehci->isgl);
 ret = usb_handle_packet(dev, &ehci->ipacket);
 assert(ret != USB_RET_ASYNC);
-usb_packet_unmap(&ehci->ipacket);
+usb_packet_unmap(&ehci->ipacket, &ehci->isgl);
 } else {
 DPRINTF("ISOCH: attempt to addess non-iso endpoint\n");
 ret = USB_RET_NAK;
diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
index 9871e24..86888ce 100644
--- a/hw/usb/hcd-uhci.c
+++ b/hw/usb/hcd-uhci.c
@@ -871,7 +871,7 @@ static int uhci_handle_td(UHCIState *s, uint32_t addr, 
UHCI_TD *td,
 
 done:
 len = uhci_complete_td(s, td, async, int_mask);
-usb_packet_unmap(&async->packet);
+usb_packet_unmap(&async->packet, &async->sgl);
 uhci_async_free(async);
 return len;
 }
diff --git a/hw/usb/libhw.c b/hw/usb/libhw.c
index 2462351..c0de30e 100644
--- a/hw/usb/libhw.c
+++ b/hw/usb/libhw.c
@@ -26,15 +26,15 @@
 
 int usb_packet_map(USBPacket *p, QEMUSGList *sgl)
 {
-int is_write = (p->pid == USB_TOKEN_IN);
-target_phys_addr_t len;
+DMADirection dir = (p->pid == USB_TOKEN_IN) ?
+DMA_DIRECTION_FROM_DEVICE : DMA_DIRECTION_TO_DEVICE;
+dma_addr_t len;
 void *mem;
 int i;
 
 for (i = 0; i < sgl->nsg; i++) {
 len = sgl->sg[i].len;
-mem = cpu_physical_memory_map(sgl->sg[i].base, &len,
-  is_write);
+mem = dma_memory_map(sgl->dma, sgl->sg[i].base, &len, dir);
 if (!mem) {
 goto err;
 }
@@ -46,18 +46,19 @@ int usb_packet_map(USBPacket *p, QEMUSGList *sgl)
 return 0;
 
 err:
-usb_packet_unmap(p);
+usb_packet_unmap(p, sgl);
 return -1;
 }
 
-void usb_packet_unmap(USBPacket *p)
+void usb_packet_unmap(USBPacket *p, QEMUSGList *sgl)
 {
-int is_write = (p->pid == USB_TOKEN_IN);
+DMADirection dir = (p->pid == USB_TOKEN_IN) ?
+DMA_DIRECTION_FROM_DEVICE : DMA_DIRECTION_TO_DEVICE;
 int i;
 
 for (i = 0; i < p->iov.niov; i++) {
-cpu_physical_memory_unmap(p->iov.iov[i].iov_base,
-  p->iov.iov[i].iov_len, is_write,
-  p->iov.iov[i].iov_len);
+dma_memory_unmap(sgl->dma, p->iov.iov[i].iov_base,
+ p->iov.iov[i].iov_len, dir,
+ p->iov.iov[i].iov_len);
 }
 }
-- 
1.7.9.5




[Qemu-devel] [PATCH 11/13] iommu: Allow PCI to use IOMMU infrastructure

2012-06-19 Thread Benjamin Herrenschmidt
From: David Gibson 

This patch adds some hooks to let PCI devices and busses use the new IOMMU
infrastructure.  When IOMMU support is enabled, each PCI device now
contains a DMAContext * which is used by the pci_dma_*() wrapper functions.

By default, the contexts are initialized to NULL, assuming no IOMMU.
However the platform or host bridge code which sets up the PCI bus can use
pci_setup_iommu() to set a function which will determine the correct
DMAContext for a given PCI device.

Cc: Michael S. Tsirkin 
Cc: Richard Henderson 

Signed-off-by: David Gibson 
Signed-off-by: Eduard - Gabriel Munteanu 
Signed-off-by: Benjamin Herrenschmidt 
---
 hw/pci.c   |9 +
 hw/pci.h   |9 +++--
 hw/pci_internals.h |2 ++
 3 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/hw/pci.c b/hw/pci.c
index bdfb3d6..c8d16a4 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -775,6 +775,9 @@ static PCIDevice *do_pci_register_device(PCIDevice 
*pci_dev, PCIBus *bus,
 return NULL;
 }
 pci_dev->bus = bus;
+if (bus->dma_context_fn) {
+pci_dev->dma = bus->dma_context_fn(bus, bus->dma_context_opaque, 
devfn);
+}
 pci_dev->devfn = devfn;
 pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
 pci_dev->irq_state = 0;
@@ -2021,6 +2024,12 @@ static void pci_device_class_init(ObjectClass *klass, 
void *data)
 k->props = pci_props;
 }
 
+void pci_setup_iommu(PCIBus *bus, PCIDMAContextFunc fn, void *opaque)
+{
+bus->dma_context_fn = fn;
+bus->dma_context_opaque = opaque;
+}
+
 static TypeInfo pci_device_type_info = {
 .name = TYPE_PCI_DEVICE,
 .parent = TYPE_DEVICE,
diff --git a/hw/pci.h b/hw/pci.h
index 99b7e61..c099766 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -179,6 +179,7 @@ typedef void (*MSIVectorReleaseNotifier)(PCIDevice *dev, 
unsigned int vector);
 
 struct PCIDevice {
 DeviceState qdev;
+
 /* PCI config space */
 uint8_t *config;
 
@@ -200,6 +201,7 @@ struct PCIDevice {
 int32_t devfn;
 char name[64];
 PCIIORegion io_regions[PCI_NUM_REGIONS];
+DMAContext *dma;
 
 /* do not access the following fields */
 PCIConfigReadFunc *config_read;
@@ -324,6 +326,10 @@ int pci_read_devaddr(Monitor *mon, const char *addr, int 
*domp, int *busp,
 
 void pci_device_deassert_intx(PCIDevice *dev);
 
+typedef DMAContext *(*PCIDMAContextFunc)(PCIBus *, void *, int);
+
+void pci_setup_iommu(PCIBus *bus, PCIDMAContextFunc fn, void *opaque);
+
 static inline void
 pci_set_byte(uint8_t *config, uint8_t val)
 {
@@ -560,8 +566,7 @@ static inline uint32_t pci_config_size(const PCIDevice *d)
 /* DMA access functions */
 static inline DMAContext *pci_dma_context(PCIDevice *dev)
 {
-/* Stub for when we have no PCI iommu support */
-return NULL;
+return dev->dma;
 }
 
 static inline int pci_dma_rw(PCIDevice *dev, dma_addr_t addr,
diff --git a/hw/pci_internals.h b/hw/pci_internals.h
index 399c6d4..e8bc9f6 100644
--- a/hw/pci_internals.h
+++ b/hw/pci_internals.h
@@ -17,6 +17,8 @@
 
 struct PCIBus {
 BusState qbus;
+PCIDMAContextFunc dma_context_fn;
+void *dma_context_opaque;
 uint8_t devfn_min;
 pci_set_irq_fn set_irq;
 pci_map_irq_fn map_irq;
-- 
1.7.9.5




[Qemu-devel] [PATCH 10/13] pseries: Convert sPAPR TCEs to use generic IOMMU infrastructure

2012-06-19 Thread Benjamin Herrenschmidt
From: David Gibson 

The pseries platform already contains an IOMMU implementation, since it is
essential for the platform's paravirtualized VIO devices.  This IOMMU
support is currently built into the implementation of the VIO "bus" and
the various VIO devices.

This patch converts this code to make use of the new common IOMMU
infrastructure.

We don't yet handle synchronization of map/unmap callbacks vs. invalidations,
this will require some complex interaction with the kernel and is not a
major concern at this stage.

Cc: Alex Graf 

Signed-off-by: David Gibson 
Signed-off-by: Benjamin Herrenschmidt 
---
 hw/ppc/Makefile.objs |2 +-
 hw/spapr.c   |3 +
 hw/spapr.h   |   16 +++
 hw/spapr_iommu.c |  242 +++
 hw/spapr_llan.c  |   63 +--
 hw/spapr_vio.c   |  281 --
 hw/spapr_vio.h   |   73 ++---
 hw/spapr_vscsi.c |   26 ++---
 hw/spapr_vty.c   |2 +-
 target-ppc/kvm.c |4 +-
 10 files changed, 369 insertions(+), 343 deletions(-)
 create mode 100644 hw/spapr_iommu.c

diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
index 44a1e8c..f573a95 100644
--- a/hw/ppc/Makefile.objs
+++ b/hw/ppc/Makefile.objs
@@ -10,7 +10,7 @@ obj-y += ppc_newworld.o
 # IBM pSeries (sPAPR)
 obj-$(CONFIG_PSERIES) += spapr.o spapr_hcall.o spapr_rtas.o spapr_vio.o
 obj-$(CONFIG_PSERIES) += xics.o spapr_vty.o spapr_llan.o spapr_vscsi.o
-obj-$(CONFIG_PSERIES) += spapr_pci.o pci-hotplug.o
+obj-$(CONFIG_PSERIES) += spapr_pci.o pci-hotplug.o spapr_iommu.o
 # PowerPC 4xx boards
 obj-y += ppc4xx_devs.o ppc4xx_pci.o ppc405_uc.o ppc405_boards.o
 obj-y += ppc440_bamboo.o
diff --git a/hw/spapr.c b/hw/spapr.c
index d0bddbc..8bdf0d1 100644
--- a/hw/spapr.c
+++ b/hw/spapr.c
@@ -628,6 +628,9 @@ static void ppc_spapr_init(ram_addr_t ram_size,
 spapr->icp = xics_system_init(XICS_IRQS);
 spapr->next_irq = 16;
 
+/* Set up IOMMU */
+spapr_iommu_init();
+
 /* Set up VIO bus */
 spapr->vio_bus = spapr_vio_bus_init();
 
diff --git a/hw/spapr.h b/hw/spapr.h
index 654a7a8..df3e8b1 100644
--- a/hw/spapr.h
+++ b/hw/spapr.h
@@ -319,4 +319,20 @@ target_ulong spapr_rtas_call(sPAPREnvironment *spapr,
 int spapr_rtas_device_tree_setup(void *fdt, target_phys_addr_t rtas_addr,
  target_phys_addr_t rtas_size);
 
+#define SPAPR_TCE_PAGE_SHIFT   12
+#define SPAPR_TCE_PAGE_SIZE(1ULL << SPAPR_TCE_PAGE_SHIFT)
+#define SPAPR_TCE_PAGE_MASK(SPAPR_TCE_PAGE_SIZE - 1)
+
+typedef struct sPAPRTCE {
+uint64_t tce;
+} sPAPRTCE;
+
+#define SPAPR_VIO_BASE_LIOBN0x
+
+void spapr_iommu_init(void);
+DMAContext *spapr_tce_new_dma_context(uint32_t liobn, size_t window_size);
+void spapr_tce_free(DMAContext *dma);
+int spapr_dma_dt(void *fdt, int node_off, const char *propname,
+ DMAContext *dma);
+
 #endif /* !defined (__HW_SPAPR_H__) */
diff --git a/hw/spapr_iommu.c b/hw/spapr_iommu.c
new file mode 100644
index 000..5a769b9
--- /dev/null
+++ b/hw/spapr_iommu.c
@@ -0,0 +1,242 @@
+/*
+ * QEMU sPAPR IOMMU (TCE) code
+ *
+ * Copyright (c) 2010 David Gibson, IBM Corporation 
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+#include "hw.h"
+#include "kvm.h"
+#include "qdev.h"
+#include "kvm_ppc.h"
+#include "dma.h"
+
+#include "hw/spapr.h"
+
+#include 
+
+/* #define DEBUG_TCE */
+
+enum sPAPRTCEAccess {
+SPAPR_TCE_FAULT = 0,
+SPAPR_TCE_RO = 1,
+SPAPR_TCE_WO = 2,
+SPAPR_TCE_RW = 3,
+};
+
+typedef struct sPAPRTCETable sPAPRTCETable;
+
+struct sPAPRTCETable {
+DMAContext dma;
+uint32_t liobn;
+uint32_t window_size;
+sPAPRTCE *table;
+int fd;
+QLIST_ENTRY(sPAPRTCETable) list;
+};
+
+
+QLIST_HEAD(spapr_tce_tables, sPAPRTCETable) spapr_tce_tables;
+
+static sPAPRTCETable *spapr_tce_find_by_liobn(uint32_t liobn)
+{
+sPAPRTCETable *tcet;
+
+QLIST_FOREACH(tcet, &spapr_tce_tables, list) {
+if (tcet->liobn == liobn) {
+return tcet;
+}
+}
+
+return NULL;
+}
+
+static int spapr_tce_translate(DMAContext *dma,
+   dma_addr_t addr,
+   target_phys_addr_t *paddr,
+   target_phys_addr_t *len,
+   DMADirection dir)
+{
+sPAPRTCETable 

[Qemu-devel] [PATCH 13/13] Add a memory barrier to DMA functions

2012-06-19 Thread Benjamin Herrenschmidt
The emulated devices can run simultaneously with the guest, so
we need to be careful with ordering of load and stores done by
them to the guest system memory, which need to be observed in
the right order by the guest operating system.

This adds a barrier call to the basic DMA read/write ops which
is currently implemented as a smp_mb(), but could be later
improved for more fine grained control of barriers.

Additionally, a _relaxed() variant of the accessors is provided
to easily convert devices who would be performance sensitive
and negatively impacted by the change.

Signed-off-by: Benjamin Herrenschmidt 
---
 dma.h |   54 --
 1 file changed, 52 insertions(+), 2 deletions(-)

diff --git a/dma.h b/dma.h
index f1fcb71..0d57e50 100644
--- a/dma.h
+++ b/dma.h
@@ -13,6 +13,7 @@
 #include 
 #include "hw/hw.h"
 #include "block.h"
+#include "kvm.h"
 
 typedef struct DMAContext DMAContext;
 typedef struct ScatterGatherEntry ScatterGatherEntry;
@@ -70,6 +71,30 @@ typedef struct DMAContext {
 DMAUnmapFunc *unmap;
 } DMAContext;
 
+static inline void dma_barrier(DMAContext *dma, DMADirection dir)
+{
+/*
+ * This is called before DMA read and write operations
+ * unless the _relaxed form is used and is responsible
+ * for providing some sane ordering of accesses vs
+ * concurrently running VCPUs.
+ *
+ * Users of map(), unmap() or lower level st/ld_*
+ * operations are responsible for providing their own
+ * ordering via barriers.
+ *
+ * This primitive implementation does a simple smp_mb()
+ * before each operation which provides pretty much full
+ * ordering.
+ *
+ * A smarter implementation can be devised if needed to
+ * use lighter barriers based on the direction of the
+ * transfer, the DMA context, etc...
+ */
+if (kvm_enabled())
+smp_mb();
+}
+
 static inline bool dma_has_iommu(DMAContext *dma)
 {
 return !!dma;
@@ -93,8 +118,9 @@ static inline bool dma_memory_valid(DMAContext *dma,
 
 int iommu_dma_memory_rw(DMAContext *dma, dma_addr_t addr,
 void *buf, dma_addr_t len, DMADirection dir);
-static inline int dma_memory_rw(DMAContext *dma, dma_addr_t addr,
-void *buf, dma_addr_t len, DMADirection dir)
+static inline int dma_memory_rw_relaxed(DMAContext *dma, dma_addr_t addr,
+void *buf, dma_addr_t len,
+DMADirection dir)
 {
 if (!dma_has_iommu(dma)) {
 /* Fast-path for no IOMMU */
@@ -106,6 +132,28 @@ static inline int dma_memory_rw(DMAContext *dma, 
dma_addr_t addr,
 }
 }
 
+static inline int dma_memory_read_relaxed(DMAContext *dma, dma_addr_t addr,
+  void *buf, dma_addr_t len)
+{
+return dma_memory_rw_relaxed(dma, addr, buf, len, DMA_DIRECTION_TO_DEVICE);
+}
+
+static inline int dma_memory_write_relaxed(DMAContext *dma, dma_addr_t addr,
+   const void *buf, dma_addr_t len)
+{
+return dma_memory_rw_relaxed(dma, addr, (void *)buf, len,
+ DMA_DIRECTION_FROM_DEVICE);
+}
+
+static inline int dma_memory_rw(DMAContext *dma, dma_addr_t addr,
+void *buf, dma_addr_t len,
+DMADirection dir)
+{
+dma_barrier(dma, dir);
+
+return dma_memory_rw_relaxed(dma, addr, buf, len, dir);
+}
+
 static inline int dma_memory_read(DMAContext *dma, dma_addr_t addr,
   void *buf, dma_addr_t len)
 {
@@ -124,6 +172,8 @@ int iommu_dma_memory_set(DMAContext *dma, dma_addr_t addr, 
uint8_t c,
 static inline int dma_memory_set(DMAContext *dma, dma_addr_t addr,
  uint8_t c, dma_addr_t len)
 {
+dma_barrier(dma, DMA_DIRECTION_FROM_DEVICE);
+
 if (!dma_has_iommu(dma)) {
 /* Fast-path for no IOMMU */
 cpu_physical_memory_set(addr, c, len);
-- 
1.7.9.5




[Qemu-devel] [PATCH 09/13] iommu: Add facility to cancel in-use dma memory maps

2012-06-19 Thread Benjamin Herrenschmidt
From: David Gibson 

One new complication raised by IOMMU support over only handling DMA
directly to physical addresses is handling dma_memory_map() case
(replacing cpu_physical_memory_map()) when the IOMMU translation the
IOVAs covered by such a map are invalidated or changed while the map
is active.  This should never happen with correct guest software, but
we do need to handle buggy guests.  This case might also occur during
handovers between different guest software stages if the handover
protocols aren't fully seamless.

The iommu implementation will have to wait for maps to be removed
before it can "complete" an invalidation of a translation, which
can take a long time. In order to make it possible to speed that
process up, we add a "Cancel" callback to the map function which
the clients can optionally provide.

The core makes no use of that, but the iommu backend implementation
may choose to keep track of maps and call the respective cancel
callback whenever a translation within a map is removed, allowing
the driver to do things like cancel async IOs etc.

Signed-off-by: David Gibson 
Signed-off-by: Benjamin Herrenschmidt 
---
 dma-helpers.c |   49 -
 dma.h |   23 +++
 2 files changed, 47 insertions(+), 25 deletions(-)

diff --git a/dma-helpers.c b/dma-helpers.c
index b4ee827..6e6c7b3 100644
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -107,6 +107,28 @@ static void dma_complete(DMAAIOCB *dbs, int ret)
 }
 }
 
+static void dma_aio_cancel(BlockDriverAIOCB *acb)
+{
+DMAAIOCB *dbs = container_of(acb, DMAAIOCB, common);
+
+trace_dma_aio_cancel(dbs);
+
+if (dbs->acb) {
+BlockDriverAIOCB *acb = dbs->acb;
+dbs->acb = NULL;
+dbs->in_cancel = true;
+bdrv_aio_cancel(acb);
+dbs->in_cancel = false;
+}
+dbs->common.cb = NULL;
+dma_complete(dbs, 0);
+}
+
+static void dma_bdrv_cancel_cb(void *opaque)
+{
+dma_aio_cancel(&((DMAAIOCB *)opaque)->common);
+}
+
 static void dma_bdrv_cb(void *opaque, int ret)
 {
 DMAAIOCB *dbs = (DMAAIOCB *)opaque;
@@ -127,7 +149,8 @@ static void dma_bdrv_cb(void *opaque, int ret)
 while (dbs->sg_cur_index < dbs->sg->nsg) {
 cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte;
 cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte;
-mem = dma_memory_map(dbs->sg->dma, cur_addr, &cur_len, dbs->dir);
+mem = dma_memory_map_with_cancel(dbs->sg->dma, dma_bdrv_cancel_cb, dbs,
+ cur_addr, &cur_len, dbs->dir);
 if (!mem)
 break;
 qemu_iovec_add(&dbs->iov, mem, cur_len);
@@ -149,23 +172,6 @@ static void dma_bdrv_cb(void *opaque, int ret)
 assert(dbs->acb);
 }
 
-static void dma_aio_cancel(BlockDriverAIOCB *acb)
-{
-DMAAIOCB *dbs = container_of(acb, DMAAIOCB, common);
-
-trace_dma_aio_cancel(dbs);
-
-if (dbs->acb) {
-BlockDriverAIOCB *acb = dbs->acb;
-dbs->acb = NULL;
-dbs->in_cancel = true;
-bdrv_aio_cancel(acb);
-dbs->in_cancel = false;
-}
-dbs->common.cb = NULL;
-dma_complete(dbs, 0);
-}
-
 static AIOPool dma_aio_pool = {
 .aiocb_size = sizeof(DMAAIOCB),
 .cancel = dma_aio_cancel,
@@ -353,7 +359,9 @@ void dma_context_init(DMAContext *dma, DMATranslateFunc 
translate,
 dma->unmap = unmap;
 }
 
-void *iommu_dma_memory_map(DMAContext *dma, dma_addr_t addr, dma_addr_t *len,
+void *iommu_dma_memory_map(DMAContext *dma,
+   DMACancelMapFunc cb, void *cb_opaque,
+   dma_addr_t addr, dma_addr_t *len,
DMADirection dir)
 {
 int err;
@@ -361,7 +369,7 @@ void *iommu_dma_memory_map(DMAContext *dma, dma_addr_t 
addr, dma_addr_t *len,
 void *buf;
 
 if (dma->map) {
-return dma->map(dma, addr, len, dir);
+return dma->map(dma, cb, cb_opaque, addr, len, dir);
 }
 
 plen = *len;
@@ -397,5 +405,4 @@ void iommu_dma_memory_unmap(DMAContext *dma, void *buffer, 
dma_addr_t len,
 cpu_physical_memory_unmap(buffer, len,
   dir == DMA_DIRECTION_FROM_DEVICE,
   access_len);
-
 }
diff --git a/dma.h b/dma.h
index 14fe17d..f1fcb71 100644
--- a/dma.h
+++ b/dma.h
@@ -49,10 +49,15 @@ typedef int DMATranslateFunc(DMAContext *dma,
  target_phys_addr_t *paddr,
  target_phys_addr_t *len,
  DMADirection dir);
+
+typedef void DMACancelMapFunc(void *);
 typedef void* DMAMapFunc(DMAContext *dma,
+ DMACancelMapFunc cb,
+ void *cb_opaque,   
  dma_addr_t addr,
  dma_addr_t *len,
  DMADirection dir);
+
 typedef void DMAUnmapFunc(DMAContext *dma,
   void *buffer,

[Qemu-devel] [PATCH 05/13] iommu: Make sglists and dma_bdrv helpers use new universal DMA helpers

2012-06-19 Thread Benjamin Herrenschmidt
From: David Gibson 

dma-helpers.c contains a number of helper functions for doing
scatter/gather DMA, and various block device related DMA.  Currently,
these directly access guest memory using cpu_physical_memory_*(),
assuming no IOMMU translation.

This patch updates this code to use the new universal DMA helper
functions.  qemu_sglist_init() now takes a DMAContext * to describe
the DMA address space in which the scatter/gather will take place.

We minimally update the callers qemu_sglist_init() to pass NULL
(i.e. no translation, same as current behaviour).  Some of those
callers should pass something else in some cases to allow proper IOMMU
translation in future, but that will be fixed in later patches.

Cc: Kevin Wolf 
Cc: Michael S. Tsirkin 
Cc: Paolo Bonzini 

Signed-off-by: David Gibson 
Signed-off-by: Benjamin Herrenschmidt 
---
 dma-helpers.c  |   24 
 dma.h  |3 ++-
 hw/ide/ahci.c  |3 ++-
 hw/ide/macio.c |4 ++--
 hw/pci.h   |2 +-
 5 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/dma-helpers.c b/dma-helpers.c
index 7971a89..2dc4691 100644
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -10,12 +10,13 @@
 #include "dma.h"
 #include "trace.h"
 
-void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint)
+void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint, DMAContext *dma)
 {
 qsg->sg = g_malloc(alloc_hint * sizeof(ScatterGatherEntry));
 qsg->nsg = 0;
 qsg->nalloc = alloc_hint;
 qsg->size = 0;
+qsg->dma = dma;
 }
 
 void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len)
@@ -74,10 +75,9 @@ static void dma_bdrv_unmap(DMAAIOCB *dbs)
 int i;
 
 for (i = 0; i < dbs->iov.niov; ++i) {
-cpu_physical_memory_unmap(dbs->iov.iov[i].iov_base,
-  dbs->iov.iov[i].iov_len,
-  dbs->dir != DMA_DIRECTION_TO_DEVICE,
-  dbs->iov.iov[i].iov_len);
+dma_memory_unmap(dbs->sg->dma, dbs->iov.iov[i].iov_base,
+ dbs->iov.iov[i].iov_len, dbs->dir,
+ dbs->iov.iov[i].iov_len);
 }
 qemu_iovec_reset(&dbs->iov);
 }
@@ -106,7 +106,7 @@ static void dma_complete(DMAAIOCB *dbs, int ret)
 static void dma_bdrv_cb(void *opaque, int ret)
 {
 DMAAIOCB *dbs = (DMAAIOCB *)opaque;
-target_phys_addr_t cur_addr, cur_len;
+dma_addr_t cur_addr, cur_len;
 void *mem;
 
 trace_dma_bdrv_cb(dbs, ret);
@@ -123,8 +123,7 @@ static void dma_bdrv_cb(void *opaque, int ret)
 while (dbs->sg_cur_index < dbs->sg->nsg) {
 cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte;
 cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte;
-mem = cpu_physical_memory_map(cur_addr, &cur_len,
-  dbs->dir != DMA_DIRECTION_TO_DEVICE);
+mem = dma_memory_map(dbs->sg->dma, cur_addr, &cur_len, dbs->dir);
 if (!mem)
 break;
 qemu_iovec_add(&dbs->iov, mem, cur_len);
@@ -209,7 +208,8 @@ BlockDriverAIOCB *dma_bdrv_write(BlockDriverState *bs,
 }
 
 
-static uint64_t dma_buf_rw(uint8_t *ptr, int32_t len, QEMUSGList *sg, bool 
to_dev)
+static uint64_t dma_buf_rw(uint8_t *ptr, int32_t len, QEMUSGList *sg,
+   DMADirection dir)
 {
 uint64_t resid;
 int sg_cur_index;
@@ -220,7 +220,7 @@ static uint64_t dma_buf_rw(uint8_t *ptr, int32_t len, 
QEMUSGList *sg, bool to_de
 while (len > 0) {
 ScatterGatherEntry entry = sg->sg[sg_cur_index++];
 int32_t xfer = MIN(len, entry.len);
-cpu_physical_memory_rw(entry.base, ptr, xfer, !to_dev);
+dma_memory_rw(sg->dma, entry.base, ptr, xfer, dir);
 ptr += xfer;
 len -= xfer;
 resid -= xfer;
@@ -231,12 +231,12 @@ static uint64_t dma_buf_rw(uint8_t *ptr, int32_t len, 
QEMUSGList *sg, bool to_de
 
 uint64_t dma_buf_read(uint8_t *ptr, int32_t len, QEMUSGList *sg)
 {
-return dma_buf_rw(ptr, len, sg, 0);
+return dma_buf_rw(ptr, len, sg, DMA_DIRECTION_FROM_DEVICE);
 }
 
 uint64_t dma_buf_write(uint8_t *ptr, int32_t len, QEMUSGList *sg)
 {
-return dma_buf_rw(ptr, len, sg, 1);
+return dma_buf_rw(ptr, len, sg, DMA_DIRECTION_TO_DEVICE);
 }
 
 void dma_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie,
diff --git a/dma.h b/dma.h
index 4449a0c..cd002c7 100644
--- a/dma.h
+++ b/dma.h
@@ -26,6 +26,7 @@ struct QEMUSGList {
 int nsg;
 int nalloc;
 size_t size;
+DMAContext *dma;
 };
 
 #if defined(TARGET_PHYS_ADDR_BITS)
@@ -139,7 +140,7 @@ struct ScatterGatherEntry {
 dma_addr_t len;
 };
 
-void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint);
+void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint, DMAContext *dma);
 void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len);
 void qemu_sglist_destroy(QEMUSGList *qsg);
 #endif
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index e275e68..6c4226d 100644
--- a/hw/ide/ahci.c
+++ b/h

Re: [Qemu-devel] Adding errno to QMP errors

2012-06-19 Thread Kevin Wolf
Am 18.06.2012 20:31, schrieb Anthony Liguori:
> Irrespective of anything else, I think it's safe to say the experiment of 
> "rich 
> errors" has been a failure.  We still have way too many places using 
> error_report.
> 
> As I mentioned in another thread, I think we should:
> 
> 1) Introduce a GENERIC_ERROR QError type.  It could have a 'domain' and a 
> 'msg' 
> field.
> 
> 2) Focus on converting users of error_report over to use propagated Error 
> objects.
> 
> We shouldn't/can't change existing QError users.  We also shouldn't consider 
> changing the wire protocol.  But for new error users, we should/can relax the 
> reported errors.
> 
> We need a clear support policy on whether the contents of 'msg' are stable or 
> not too.

Another point that you used to bring up in earlier discussions is
translated error messages. If we start returning error messages that are
meant to displayed to the user, should we get your gettext patches
applied which you did for the GTK backend? libvirt would then have to
pay attention to start qemu with the same locale as the client has.

Kevin



[Qemu-devel] [Bug 1014681] Re: BSOD with newer host kernels (x64) and W2k8S guest (x64)

2012-06-19 Thread vrozenfe
"virtio_ioport_write: unexpected address 0x13 value 0x1" indicates that
you got a BSOD.

Could you try switching from virtio to e1000, and ide  and check if you still 
getting 
DRIVER_CORRUPTED_EXPOOL (c5) bug check error?

Vadim.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1014681

Title:
  BSOD with newer host kernels (x64) and W2k8S guest (x64)

Status in QEMU:
  New

Bug description:
  Hallo, I attempted to move virtual machines from one host to another
  but got stuck with Windows-BSODs on the target host. The host-side
  console message is "virtio_ioport_write: unexpected address 0x13 value
  0x1". Eventually there are overlaps to bug #990364, but I'm not sure.

  Host machine: 2x Opteron 4238 a 6 cores, 32GB RAM, Linux x86_64
  Guest machine(s): Windows 2008 Server R2 x64

  I tried different combinations of component versions, but only kernel
  2.6.34 could run the guests (but has other difficulties). See testet
  variants in comment.

  Run arguments are attached. Minidump follows immediately.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1014681/+subscriptions



Re: [Qemu-devel] [PATCH 2/2] fdc: Move floppy geometry guessing back from block.c

2012-06-19 Thread Markus Armbruster
Blue Swirl  writes:

> On Mon, Jun 18, 2012 at 9:10 AM, Markus Armbruster  wrote:
>> Commit 5bbdbb46 moved it to block.c because "other geometry guessing
>> functions already reside in block.c".  Device-specific functionality
>> should be kept in device code, not the block layer.  Move it back.
>
> As discussed earlier, this is media specific, not device specific
> (except FDriveType). How about media.c?

It's floppy-(media-)specific, isn't it?

We discussed separating floppy drive emulation (fdd) from floppy
controller emulation.  Right now, they're mixed up in qdevs isa-fdc,
sysbus-fdc and SUNW,fdtwo.  Separating fdd involves splitting up those
qdevs.  I tried, but ran into QOM infrastructure difficulties.  Since
that part of QOM is being improved, I decided to postpone the splitting
work for a bit.

I don't remember discussing a separation of floppy drive and floppy
media emulation.

Related project: moving hard disk geometry out of the block layer.
Can't move into a device model, because we have three of them sporting
geometry: IDE, SCSI and virtio disks.  I guess I'll move them into a new
file in hw/.  media.c doesn't sound right for hard disks.  disk-geo.c?

I could move floppy geometry to the same file.  But there's zero overlap
between hard disk and floppy disk geometry, and the only consumer of
floppy geometry is the floppy disk device.  I don't expect that to
change, and that's why I'd prefer to make floppy geometry an
implementation detail of the floppy disk device, and hide it in fdc.c
now, fdd.c after the split.

But if you insist, I can unhide it.

Comments?



[Qemu-devel] [Bug 1014823] Re: qemu-kvm-1.0.1 compilation error on Ubuntu 12.04

2012-06-19 Thread Serge Hallyn
The bug is actually in libc (see bug 1010069).  You can work around it
using the same patch we are using in the ubuntu package,
define_AT_EMPTY_PATH.patch.  It probably won't apply cleanly upstream,
but just make sure to add

#ifndef AT_REMOVEDIR
#define AT_REMOVEDIR0x200
#endif
#ifndef AT_EMPTY_PATH
#define AT_EMPTY_PATH   0x1000  /* Allow empty relative pathname */
#endif
#ifndef O_PATH
#define O_PATH01000
#endif

near the top of hw/9pfs/virtio-9p-handle.c

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1014823

Title:
  qemu-kvm-1.0.1 compilation error on Ubuntu 12.04

Status in QEMU:
  Confirmed

Bug description:
CClibhw64/9pfs/virtio-9p-handle.o
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c: In function 
‘handle_update_file_cred’:
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c:70:58: error: 
‘AT_EMPTY_PATH’ undeclared (first use in this function)
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c:70:58: note: each 
undeclared identifier is reported only once for each function it appears in
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c: In function 
‘handle_lstat’:
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c:87:34: error: 
‘AT_EMPTY_PATH’ undeclared (first use in this function)
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c: In function 
‘handle_symlink’:
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c:314:62: error: 
‘AT_EMPTY_PATH’ undeclared (first use in this function)
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c: In function ‘handle_link’:
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c:337:45: error: 
‘AT_EMPTY_PATH’ undeclared (first use in this function)
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c: In function 
‘handle_chown’:
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c:373:58: error: 
‘AT_EMPTY_PATH’ undeclared (first use in this function)
  make[1]: *** [9pfs/virtio-9p-handle.o] Error 1
  make: *** [subdir-libhw64] Error 2

  It compiled okay on 11.04.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1014823/+subscriptions



[Qemu-devel] [Bug 712416] Re: kvm_intel kernel module crash with via nano vmx

2012-06-19 Thread khetzal
Hello, yes i still have this bug on ubuntu 12.04 (kernel 3.2)

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/712416

Title:
  kvm_intel kernel module crash with via nano vmx

Status in QEMU:
  New
Status in “linux” package in Ubuntu:
  Incomplete
Status in “kvm” package in Debian:
  New

Bug description:
  kvm module for hardware virtualisation not work properly on via nano
  processors.

  Tested with processor: VIA Nano processor U2250.
  Processors flags (visible in /proc/cpuinfo): fpu vme de pse tsc msr pae mce 
cx8 apic sep mtrr pge mca cmov pat clflush acpi mmx fxsr sse sse2 ss tm syscall 
nx lm constant_tsc up rep_good pni monitor vmx est tm2 ssse3 cx16 xtpr rng 
rng_en ace ace_en ace2 phe phe_en lahf_lm

  With kernel 2.6.32: kvm not work and dmesg contains a lot of:
  handle_exception: unexpected, vectoring info 0x800d intr info 0x8b0d

  With kernel 2.6.35: all the system crash. Nothing visible in logs

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/712416/+subscriptions



Re: [Qemu-devel] How to measure guest memory access (qemu_ld/qemu_st) time?

2012-06-19 Thread Wei-Ren Chen
> But if QEMU/TCG is doing a GVA->GPA translation as Wei-Ren said, I don't see 
> how
> KVM can help.

  Just want to clarify. QEMU maintain a TLB (env->tlb_table) which stores GVA ->
HVA mapping, it is used to speedup the address translation. If TLB miss, QEMU
will call cpu_arm_handle_mmu_fault (take ARM as an example) doing GVA -> GPA
translation.
 
> I could understand having multiple 32bit regions in QEMU's virtual space (no
> need for KVM), one per guest page table, and then simply adding an offset to
> every memory access to redirect it to the appropriate 32-bit region (1 region
> per guest page table).
> 
> This could translate a single guest ld/st into a host ld+add+ld/st (the first
> load is to get the "region" offset for the currently executing guest context).

  It differs from what QEMU's doing? Each time we fill TLB, we add an offset to
the GPA to get HVA, then store GVA -> HVA mapping into the TLB (IIUC). I don't
see much differences here.
 
Regards,
chenwj

-- 
Wei-Ren Chen (陳韋任)
Computer Systems Lab, Institute of Information Science,
Academia Sinica, Taiwan (R.O.C.)
Tel:886-2-2788-3799 #1667
Homepage: http://people.cs.nctu.edu.tw/~chenwj



[Qemu-devel] [PATCH 03/13] iommu: Add universal DMA helper functions

2012-06-19 Thread Benjamin Herrenschmidt
From: David Gibson 

Not that long ago, every device implementation using DMA directly
accessed guest memory using cpu_physical_memory_*().  This meant that
adding support for a guest visible IOMMU would require changing every
one of these devices to go through IOMMU translation.

Shortly before qemu 1.0, I made a start on fixing this by providing
helper functions for PCI DMA.  These are currently just stubs which
call the direct access functions, but mean that an IOMMU can be
implemented in one place, rather than for every PCI device.

Clearly, this doesn't help for non PCI devices, which could also be
IOMMU translated on some platforms.  It is also problematic for the
devices which have both PCI and non-PCI version (e.g. OHCI, AHCI) - we
cannot use the the pci_dma_*() functions, because they assume the
presence of a PCIDevice, but we don't want to have to check between
pci_dma_*() and cpu_physical_memory_*() every time we do a DMA in the
device code.

This patch makes the first step on addressing both these problems, by
introducing new (stub) dma helper functions which can be used for any
DMA capable device.

These dma functions take a DMAContext *, a new (currently empty)
variable describing the DMA address space in which the operation is to
take place.  NULL indicates untranslated DMA directly into guest
physical address space.  The intention is that in future non-NULL
values will given information about any necessary IOMMU translation.

DMA using devices must obtain a DMAContext (or, potentially, contexts)
from their bus or platform.  For now this patch just converts the PCI
wrappers to be implemented in terms of the universal wrappers,
converting other drivers can take place over time.

Cc: Michael S. Tsirkin 
Cc: Eduard - Gabriel Munteanu 
Cc: Richard Henderson 

Signed-off-by: David Gibson 
Signed-off-by: Benjamin Herrenschmidt 
---
 dma.h |  100 +
 hw/pci.h  |   21 ++--
 qemu-common.h |1 +
 3 files changed, 113 insertions(+), 9 deletions(-)

diff --git a/dma.h b/dma.h
index fe08b72..4449a0c 100644
--- a/dma.h
+++ b/dma.h
@@ -34,6 +34,106 @@ typedef target_phys_addr_t dma_addr_t;
 #define DMA_ADDR_BITS TARGET_PHYS_ADDR_BITS
 #define DMA_ADDR_FMT TARGET_FMT_plx
 
+/* Checks that the given range of addresses is valid for DMA.  This is
+ * useful for certain cases, but usually you should just use
+ * dma_memory_{read,write}() and check for errors */
+static inline bool dma_memory_valid(DMAContext *dma, dma_addr_t addr,
+dma_addr_t len, DMADirection dir)
+{
+/* Stub version, with no iommu we assume all bus addresses are valid */
+return true;
+}
+
+static inline int dma_memory_rw(DMAContext *dma, dma_addr_t addr,
+void *buf, dma_addr_t len, DMADirection dir)
+{
+/* Stub version when we have no iommu support */
+cpu_physical_memory_rw(addr, buf, (target_phys_addr_t)len,
+   dir == DMA_DIRECTION_FROM_DEVICE);
+return 0;
+}
+
+static inline int dma_memory_read(DMAContext *dma, dma_addr_t addr,
+  void *buf, dma_addr_t len)
+{
+return dma_memory_rw(dma, addr, buf, len, DMA_DIRECTION_TO_DEVICE);
+}
+
+static inline int dma_memory_write(DMAContext *dma, dma_addr_t addr,
+   const void *buf, dma_addr_t len)
+{
+return dma_memory_rw(dma, addr, (void *)buf, len,
+ DMA_DIRECTION_FROM_DEVICE);
+}
+
+static inline int dma_memory_set(DMAContext *dma, dma_addr_t addr,
+ uint8_t c, dma_addr_t len)
+{
+/* Stub version when we have no iommu support */
+cpu_physical_memory_set(addr, c, len);
+return 0;
+}
+
+static inline void *dma_memory_map(DMAContext *dma,
+   dma_addr_t addr, dma_addr_t *len,
+   DMADirection dir)
+{
+target_phys_addr_t xlen = *len;
+void *p;
+
+p = cpu_physical_memory_map(addr, &xlen,
+dir == DMA_DIRECTION_FROM_DEVICE);
+*len = xlen;
+return p;
+}
+
+static inline void dma_memory_unmap(DMAContext *dma,
+void *buffer, dma_addr_t len,
+DMADirection dir, dma_addr_t access_len)
+{
+return cpu_physical_memory_unmap(buffer, (target_phys_addr_t)len,
+ dir == DMA_DIRECTION_FROM_DEVICE,
+ access_len);
+}
+
+#define DEFINE_LDST_DMA(_lname, _sname, _bits, _end) \
+static inline uint##_bits##_t ld##_lname##_##_end##_dma(DMAContext *dma, \
+dma_addr_t addr) \
+{   \
+uint##_bits##_t val;\
+dma_memory_read(dma, addr, &val, (_bits) / 8);   

[Qemu-devel] [Bug 1014823] [NEW] qemu-kvm-1.0.1 compilation error on Ubuntu 12.04

2012-06-19 Thread Whit Blauvelt
Public bug reported:

  CClibhw64/9pfs/virtio-9p-handle.o
/usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c: In function 
‘handle_update_file_cred’:
/usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c:70:58: error: 
‘AT_EMPTY_PATH’ undeclared (first use in this function)
/usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c:70:58: note: each undeclared 
identifier is reported only once for each function it appears in
/usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c: In function ‘handle_lstat’:
/usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c:87:34: error: 
‘AT_EMPTY_PATH’ undeclared (first use in this function)
/usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c: In function 
‘handle_symlink’:
/usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c:314:62: error: 
‘AT_EMPTY_PATH’ undeclared (first use in this function)
/usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c: In function ‘handle_link’:
/usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c:337:45: error: 
‘AT_EMPTY_PATH’ undeclared (first use in this function)
/usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c: In function ‘handle_chown’:
/usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c:373:58: error: 
‘AT_EMPTY_PATH’ undeclared (first use in this function)
make[1]: *** [9pfs/virtio-9p-handle.o] Error 1
make: *** [subdir-libhw64] Error 2

It compiled okay on 11.04.

** Affects: qemu
 Importance: Undecided
 Status: New

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1014823

Title:
  qemu-kvm-1.0.1 compilation error on Ubuntu 12.04

Status in QEMU:
  New

Bug description:
CClibhw64/9pfs/virtio-9p-handle.o
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c: In function 
‘handle_update_file_cred’:
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c:70:58: error: 
‘AT_EMPTY_PATH’ undeclared (first use in this function)
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c:70:58: note: each 
undeclared identifier is reported only once for each function it appears in
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c: In function 
‘handle_lstat’:
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c:87:34: error: 
‘AT_EMPTY_PATH’ undeclared (first use in this function)
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c: In function 
‘handle_symlink’:
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c:314:62: error: 
‘AT_EMPTY_PATH’ undeclared (first use in this function)
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c: In function ‘handle_link’:
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c:337:45: error: 
‘AT_EMPTY_PATH’ undeclared (first use in this function)
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c: In function 
‘handle_chown’:
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c:373:58: error: 
‘AT_EMPTY_PATH’ undeclared (first use in this function)
  make[1]: *** [9pfs/virtio-9p-handle.o] Error 1
  make: *** [subdir-libhw64] Error 2

  It compiled okay on 11.04.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1014823/+subscriptions



Re: [Qemu-devel] Windows Vista 64 bit on QEMU

2012-06-19 Thread Edward Tisdale
Anup Gangwar  gmail.com> writes:

> 
> Hello All,Has anyone had success installing (and runnning) Vista 64 bit on
QEMU. I tried it and landed into a variety of windows blue screen errors. The
EFI BIOS also does not seem to be working with the QEMU version in CVS.
> Thanks for the help.Regards,Anup


Yes. I was having the error saying that it couldn't install because it was not
setup for 64bit, then I changed some of the settings in qemu:

Machine Name: Vista
Emulator Type: KVM
Emulator Version: Default
Computer Type: KVM (Intel VT/AMD SV...)
Machine Type: Standard PC
Boot Priority: CD-ROM
CPU Type: Default
Number of CPU: 1
Video Card: Cirrus CLGD 5446 (D...)
Keyboard Layout: en-us


I hope this helps.


http://www.progave.com   (my website)




Re: [Qemu-devel] [PATCH v5 12/16] target-or32: Add system instructions

2012-06-19 Thread Jia Liu
Hi Blue,

Thank you for review.
Is this code OK?

void HELPER(mtspr)(CPUOpenRISCState * env,
   target_ulong ra, target_ulong rb, target_ulong offset)
{
#ifndef CONFIG_USER_ONLY
int spr = (ra | offset);
int idx;

switch (spr) {
case TO_SPR(0, 0): /* VR */
env->vr = rb;
break;

case TO_SPR(0, 16): /* NPC */
env->npc = rb;
break;

case TO_SPR(0, 17): /* SR */
if ((env->sr & (SR_IME | SR_DME | SR_SM)) ^
(rb & (SR_IME | SR_DME | SR_SM))) {
tlb_flush(env, 1);
}
env->sr = rb;
env->sr |= SR_FO;  /* FO is const equal to 1 */
if (env->sr & SR_DME) {
env->tlb->map_address_data = &get_phys_data;
} else {
env->tlb->map_address_data = &get_phys_nommu;
}

if (env->sr & SR_IME) {
env->tlb->map_address_code = &get_phys_code;
} else {
env->tlb->map_address_code = &get_phys_nommu;
}
break;

case TO_SPR(0, 18): /* PPC */
env->ppc = rb;
break;

case TO_SPR(0, 32): /* EPCR */
env->epcr = rb;
break;

case TO_SPR(0, 48): /* EEAR */
env->eear = rb;
break;

case TO_SPR(0, 64): /* ESR */
env->esr = rb;
break;
case TO_SPR(1, 512) ... TO_SPR(1, 639): /* DTLBW0MR 0-127 */
idx = spr - TO_SPR(1, 512);
if (!(rb & 1)) {
tlb_flush_page(env, env->tlb->dtlb[0][idx].mr & TARGET_PAGE_MASK);
}
env->tlb->dtlb[0][idx].mr = rb;
break;

case TO_SPR(1, 640) ... TO_SPR(1, 767): /* DTLBW0TR 0-127 */
idx = spr - TO_SPR(1, 640);
env->tlb->dtlb[0][idx].tr = rb;
break;
case TO_SPR(1, 768) ... TO_SPR(1, 895):   /* DTLBW1MR 0-127 */
case TO_SPR(1, 896) ... TO_SPR(1, 1023):  /* DTLBW1TR 0-127 */
case TO_SPR(1, 1024) ... TO_SPR(1, 1151): /* DTLBW2MR 0-127 */
case TO_SPR(1, 1152) ... TO_SPR(1, 1279): /* DTLBW2TR 0-127 */
case TO_SPR(1, 1280) ... TO_SPR(1, 1407): /* DTLBW3MR 0-127 */
case TO_SPR(1, 1408) ... TO_SPR(1, 1535): /* DTLBW3TR 0-127 */
break;
case TO_SPR(2, 512) ... TO_SPR(2, 639):   /* ITLBW0MR 0-127 */
idx = spr - TO_SPR(2, 512);
if (!(rb & 1)) {
tlb_flush_page(env, env->tlb->itlb[0][idx].mr & TARGET_PAGE_MASK);
}
env->tlb->itlb[0][idx].mr = rb;
break;

case TO_SPR(2, 640) ... TO_SPR(2, 767): /* ITLBW0TR 0-127 */
idx = spr - TO_SPR(2, 640);
env->tlb->itlb[0][idx].tr = rb;
break;
case TO_SPR(2, 768) ... TO_SPR(2, 895):   /* ITLBW1MR 0-127 */
case TO_SPR(2, 896) ... TO_SPR(2, 1023):  /* ITLBW1TR 0-127 */
case TO_SPR(2, 1024) ... TO_SPR(2, 1151): /* ITLBW2MR 0-127 */
case TO_SPR(2, 1152) ... TO_SPR(2, 1279): /* ITLBW2TR 0-127 */
case TO_SPR(2, 1280) ... TO_SPR(2, 1407): /* ITLBW3MR 0-127 */
case TO_SPR(2, 1408) ... TO_SPR(2, 1535): /* ITLBW3TR 0-127 */
break;
case TO_SPR(9, 0):  /* PICMR */
cpu_openrisc_store_picmr(env, rb);
break;
case TO_SPR(9, 2):  /* PICSR */
cpu_openrisc_store_picsr(env, rb);
break;
case TO_SPR(10, 0): /* TTMR */
cpu_openrisc_store_compare(env, rb);
break;
case TO_SPR(10, 1): /* TTCR */
cpu_openrisc_store_count(env, rb);
break;
default:
break;
}
#endif
}

target_ulong HELPER(mfspr)(CPUOpenRISCState * env,
   target_ulong rd, target_ulong ra, uint32_t offset)
{
#ifndef CONFIG_USER_ONLY
int spr = (ra | offset);
int idx;

switch (spr) {
case TO_SPR(0, 0): /* VR */
return (env->vr & SPR_VR);

case TO_SPR(0, 1): /* UPR */
return (env->upr);/* TT, DM, IM, UP present */

case TO_SPR(0, 2): /* CPUCFGR */
return (env->cpucfgr);

case TO_SPR(0, 3): /* DMMUCFGR */
return (env->dmmucfgr);/* 1Way, 64 entries */

case TO_SPR(0, 4): /* IMMUCFGR */
return (env->immucfgr);

case TO_SPR(0, 16): /* NPC */
return (env->npc);

case TO_SPR(0, 17): /* SR */
return (env->sr);

case TO_SPR(0, 18): /* PPC */
return (env->ppc);

case TO_SPR(0, 32): /* EPCR */
return (env->epcr);

case TO_SPR(0, 48): /* EEAR */
return (env->eear);

case TO_SPR(0, 64): /* ESR */
return (env->esr);

case TO_SPR(1, 512) ... TO_SPR(1, 639): /* DTLBW0MR 0-127 */
idx = spr - TO_SPR(1, 512);
return (env->tlb->dtlb[0][idx].mr);

case TO_SPR(1, 640) ... TO_SPR(1, 767): /* DTLBW0TR 0-127 */
idx = spr - TO_SPR(1, 640);
return (env->tlb->dtlb[0][idx].tr);

case TO_SPR(1, 768) ... TO_SPR(1, 895):   /* DTLBW1MR 0-127 */
case TO_SPR(1, 896) ... TO_SPR(1, 1023):  /* DTLBW1TR 0-127 */
case TO_SPR(1, 1024) ... TO_SPR(1, 1151): /* DTLBW2MR 0-127 */
case TO_SPR(1, 1152) ... TO_SPR(1, 1279): /* DTLBW2TR 0-127 */
case TO_SPR(1, 1280)

[Qemu-devel] [Bug 1014823] Re: qemu-kvm-1.0.1 compilation error on Ubuntu 12.04

2012-06-19 Thread Serge Hallyn
(Note, the patch has Subject:

Subject: [Qemu-devel] [PATCH] configure: Fix build for some versions of
glibc (9pfs)
)

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1014823

Title:
  qemu-kvm-1.0.1 compilation error on Ubuntu 12.04

Status in QEMU:
  Confirmed

Bug description:
CClibhw64/9pfs/virtio-9p-handle.o
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c: In function 
‘handle_update_file_cred’:
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c:70:58: error: 
‘AT_EMPTY_PATH’ undeclared (first use in this function)
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c:70:58: note: each 
undeclared identifier is reported only once for each function it appears in
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c: In function 
‘handle_lstat’:
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c:87:34: error: 
‘AT_EMPTY_PATH’ undeclared (first use in this function)
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c: In function 
‘handle_symlink’:
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c:314:62: error: 
‘AT_EMPTY_PATH’ undeclared (first use in this function)
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c: In function ‘handle_link’:
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c:337:45: error: 
‘AT_EMPTY_PATH’ undeclared (first use in this function)
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c: In function 
‘handle_chown’:
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c:373:58: error: 
‘AT_EMPTY_PATH’ undeclared (first use in this function)
  make[1]: *** [9pfs/virtio-9p-handle.o] Error 1
  make: *** [subdir-libhw64] Error 2

  It compiled okay on 11.04.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1014823/+subscriptions



[Qemu-devel] [Bug 1014823] Re: qemu-kvm-1.0.1 compilation error on Ubuntu 12.04

2012-06-19 Thread Serge Hallyn
Oh, actually I was about to mark this invalid, but in fact it is a valid
bug that it is trying to compile this bit of code when the AT* were not
defined.  There is a patch sent and acked upstream to fix this, so I'll
mark it confirmed for now.

** Changed in: qemu
   Status: New => Confirmed

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1014823

Title:
  qemu-kvm-1.0.1 compilation error on Ubuntu 12.04

Status in QEMU:
  Confirmed

Bug description:
CClibhw64/9pfs/virtio-9p-handle.o
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c: In function 
‘handle_update_file_cred’:
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c:70:58: error: 
‘AT_EMPTY_PATH’ undeclared (first use in this function)
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c:70:58: note: each 
undeclared identifier is reported only once for each function it appears in
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c: In function 
‘handle_lstat’:
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c:87:34: error: 
‘AT_EMPTY_PATH’ undeclared (first use in this function)
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c: In function 
‘handle_symlink’:
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c:314:62: error: 
‘AT_EMPTY_PATH’ undeclared (first use in this function)
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c: In function ‘handle_link’:
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c:337:45: error: 
‘AT_EMPTY_PATH’ undeclared (first use in this function)
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c: In function 
‘handle_chown’:
  /usr/src/qemu-kvm-1.0.1/hw/9pfs/virtio-9p-handle.c:373:58: error: 
‘AT_EMPTY_PATH’ undeclared (first use in this function)
  make[1]: *** [9pfs/virtio-9p-handle.o] Error 1
  make: *** [subdir-libhw64] Error 2

  It compiled okay on 11.04.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1014823/+subscriptions



[Qemu-devel] [PATCH] Add missing check for host_from_stream_offset return value for RAM_SAVE_FLAG_PAGE

2012-06-19 Thread Orit Wasserman
Signed-off-by: Orit Wasserman 
---
 arch_init.c |3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/arch_init.c b/arch_init.c
index a9e8b74..81c2e54 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -483,6 +483,9 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
 void *host;
 
 host = host_from_stream_offset(f, addr, flags);
+if (!host) {
+return -EINVAL;
+}
 
 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
 }
-- 
1.7.7.6




Re: [Qemu-devel] Any better way to access CPUArchState in vl.c?

2012-06-19 Thread Wei-Ren Chen
Hi Andreas,

> Poisoned is the "env" variable. You cannot just #include "dyngen-exec.h"
> and expect it to be usable since AREG0 targets don't guarantee it's set
> properly (may be NULL even with traditional targets at times).

  Oops, I miss that point.
 
> CPUArchState should currently be usable in vl.c, you just need explicit
> access to it (e.g., a function argument).
> Question is, what are you trying to do? In particular, of which CPU
> (think SMP) are you trying to print ->some_field? :)

  Currently we only consider single CPU ARM guest, so there should be only one
env we need to take care of. We add some fields into CPUState and want to print
their value when the VM is terminated. For example,

---
static void main_loop(void)
{
do {
nonblocking = !kvm_enabled() && last_io > 0;
last_io = main_loop_wait(nonblocking);
} while (!main_loop_should_exit());

// print env->some_field1
// print env->some_field2
}
---

If we can access env in vl.c directly, it would make the task easier.

Regards,
chenwj

-- 
Wei-Ren Chen (陳韋任)
Computer Systems Lab, Institute of Information Science,
Academia Sinica, Taiwan (R.O.C.)
Tel:886-2-2788-3799 #1667
Homepage: http://people.cs.nctu.edu.tw/~chenwj



Re: [Qemu-devel] How to measure guest memory access (qemu_ld/qemu_st) time?

2012-06-19 Thread Orit Wasserman
On 06/19/2012 11:49 AM, 陳韋任 (Wei-Ren Chen) wrote:
>   Mind me CC this to ML? :)
sure I will read the threads to understand more.

Orit
> 
>> Well it was a while back (2008-9) ,the company was acquired by IBM a year 
>> later :
>> http://www.linux-kvm.org/wiki/images/9/98/KvmForum2008%24kdf2008_2.pdf
>> I think stefan Hanjoczi worked there ...
>> The company used the technology for cross platform guest support but claim 
>> to get speedup too
>> (for ppc) don't think the speedup was related to mmu but more to the 
>> instruction stream.
>> I hope this is helpful.
> 
>   Thanks.
>  
>> Do you have performance result for the cost of the address translation ?
>> If I understand you are concentrating on ARM ?
> 
>   The whole discussion thread is on [1], and you can get some feel about
> the cost of address translation here [2]. Yes, ARM is our target right now,
> but I think we are not limit to it. 
> 
> Regards,
> chenwj
> 
> [1] http://www.mail-archive.com/qemu-devel@nongnu.org/msg116159.html
> [2] http://www.mail-archive.com/qemu-devel@nongnu.org/msg116404.html
> 



Re: [Qemu-devel] [RFC PATCH 0/3] GlusterFS support in QEMU

2012-06-19 Thread Bharata B Rao
On Mon, Jun 18, 2012 at 04:36:04PM +0100, Stefan Hajnoczi wrote:
> On Mon, Jun 11, 2012 at 3:18 PM, Bharata B Rao
>  wrote:
> > 4. Creating VM image
> >
> > # qemu-img create -f gluster gluster:c-qemu.vol:/F16 5G
> 
> Do you really need "-f gluster"?

I realized that we don't. I was picked it up from the semantics of rbd.

> The format should be "raw" (default)
> and the protocol should be "gluster".  Specifying "gluster:..." as the
> filename takes care of hooking up the GlusterFS protocol.

You are right. The current patches work w/o explicit specification of gluster
as format, but I will update the documentation in the next version.

Regards,
Bharata.




Re: [Qemu-devel] Adding errno to QMP errors

2012-06-19 Thread Daniel P. Berrange
On Tue, Jun 19, 2012 at 09:39:34AM +0200, Kevin Wolf wrote:
> Am 18.06.2012 20:31, schrieb Anthony Liguori:
> > Irrespective of anything else, I think it's safe to say the experiment of 
> > "rich 
> > errors" has been a failure.  We still have way too many places using 
> > error_report.
> > 
> > As I mentioned in another thread, I think we should:
> > 
> > 1) Introduce a GENERIC_ERROR QError type.  It could have a 'domain' and a 
> > 'msg' 
> > field.
> > 
> > 2) Focus on converting users of error_report over to use propagated Error 
> > objects.
> > 
> > We shouldn't/can't change existing QError users.  We also shouldn't 
> > consider 
> > changing the wire protocol.  But for new error users, we should/can relax 
> > the 
> > reported errors.
> > 
> > We need a clear support policy on whether the contents of 'msg' are stable 
> > or 
> > not too.
> 
> Another point that you used to bring up in earlier discussions is
> translated error messages. If we start returning error messages that are
> meant to displayed to the user, should we get your gettext patches
> applied which you did for the GTK backend? libvirt would then have to
> pay attention to start qemu with the same locale as the client has.

You can't really start the VM in the same locale as the client app,
because there's no persistent 1:N relationship between libvirt clients
and VMs - it is M:N, so you can't choose a single VM. In addition there
is a bunch of work that libvirt does against VMs in contexts that have
no associated client. You just have to have 1 system wide locale for
all QEMU VMs on a host and libvirt.

Regards,
Daniel
-- 
|: http://berrange.com  -o-http://www.flickr.com/photos/dberrange/ :|
|: http://libvirt.org  -o- http://virt-manager.org :|
|: http://autobuild.org   -o- http://search.cpan.org/~danberr/ :|
|: http://entangle-photo.org   -o-   http://live.gnome.org/gtk-vnc :|



Re: [Qemu-devel] [RFC] [PATCHv2 2/2] Adding basic calls to libseccomp in vl.c

2012-06-19 Thread Daniel P. Berrange
On Mon, Jun 18, 2012 at 08:15:37PM +, Blue Swirl wrote:
> On Mon, Jun 18, 2012 at 8:31 AM, Daniel P. Berrange  
> wrote:
> > On Fri, Jun 15, 2012 at 05:02:19PM -0400, Paul Moore wrote:
> >> On Friday, June 15, 2012 07:06:10 PM Blue Swirl wrote:
> >> > I think allowing execve() would render seccomp pretty much useless.
> >>
> >> Not necessarily.
> >>
> >> I'll agree that it does seem a bit odd to allow execve(), but there is 
> >> still
> >> value in enabling seccomp to disable potentially buggy/exploitable 
> >> syscalls.
> >> Let's not forget that we have over 300 syscalls on x86_64, not including 
> >> the
> >> 32 bit versions, and even if we add all of the new syscalls suggested in 
> >> this
> >> thread we are still talking about a small subset of syscalls.  As far as
> >> security goes, the old adage of "less is more" applies.
> >
> > I can sort of see this argument, but *only* if the QEMU process is being
> > run under a dedicated, fully unprivileged (from a DAC pov) user, completely
> > separate from anything else on the system.
> >
> > If QEMU were being run as root, then even with seccomp, it could trivially
> > just overwrite some binary in /bin, update /proc/core-pattern to point to
> 
> Not wiithout 'open'. When run as root, it would be nice to chroot()
> also to some empty directory and then drop chroot() privileges.

That's just another example of my point, that adding seccomp alone
does nothing for QEMU security. It is only valuable when combined
with another security technique, be it per-user DAC separation,
SELinux MAC, or chroot, or splitting QEMU into multiple separate
processes, or using Linux containers to confine it, etc


Daniel
-- 
|: http://berrange.com  -o-http://www.flickr.com/photos/dberrange/ :|
|: http://libvirt.org  -o- http://virt-manager.org :|
|: http://autobuild.org   -o- http://search.cpan.org/~danberr/ :|
|: http://entangle-photo.org   -o-   http://live.gnome.org/gtk-vnc :|



Re: [Qemu-devel] [RFC PATCH 3/3] block: gluster as block backend

2012-06-19 Thread Avi Kivity
On 06/18/2012 08:35 PM, Stefan Hajnoczi wrote:
> On Mon, Jun 11, 2012 at 3:21 PM, Bharata B Rao
>  wrote:
>> +#include "block_int.h"
>> +#include "gluster-helpers.h"
>> +
>> +typedef void *gluster_file_t;
> 
> This typedef is already in gluster-helpers.h.  It's ugly BTW, "typedef
> struct gluster_file gluster_file_t" is nicer since it won't cast to
> other pointer types automatically.

gluster_file_t can only be cast to a NACK since names ending with _t are
reserved by the C runtime.


-- 
error compiling committee.c: too many arguments to function





Re: [Qemu-devel] [RFC PATCH 3/3] block: gluster as block backend

2012-06-19 Thread Bharata B Rao
On Mon, Jun 18, 2012 at 06:35:28PM +0100, Stefan Hajnoczi wrote:
> On Mon, Jun 11, 2012 at 3:21 PM, Bharata B Rao
>  wrote:
> > +#include "block_int.h"
> > +#include "gluster-helpers.h"
> > +
> > +typedef void *gluster_file_t;
> 
> This typedef is already in gluster-helpers.h.

Yes, will fix that.

> It's ugly BTW, "typedef
> struct gluster_file gluster_file_t" is nicer since it won't cast to
> other pointer types automatically.

Gluster routines in libglusterfsclient operate on gluster specific descriptor
called fd_t.

glusterfs_open returns a pointer to fd_t and rest of the read/write routines
take that pointer as input. libglusterfsclient hides this pointer by doing

typedef void *glusterfs_file_t.

I wanted to return an integer fd from open and then use them with read and
write. But that would need some code in gluster backend to convert integer
fd to fd_t and vice versa. Since libglusterfsclient doesn't deal with integer
fd's, I retained this ugly typedef.

> 
> > +
> > +typedef struct glusterConf {
> > +    char volfile[PATH_MAX];
> > +    char image[PATH_MAX];
> > +} glusterConf;
> 
> QEMU coding style always uses UpperCase for struct names.

Ok, will fix.

> 
> > +static void qemu_gluster_aio_event_reader(void *opaque)
> > +{
> > +    BDRVGlusterState *s = opaque;
> > +    ssize_t ret;
> > +
> > +    do {
> > +        char *p = (char *)&s->event_gaiocb;
> 
> Why make this a BDRVGlusterState field?  It could be a local, I think.

I could I guess, I was just following what rbd does.

> 
> > +    /* Use O_DSYNC for write-through caching, no flags for write-back 
> > caching,
> > +     * and O_DIRECT for no caching. */
> > +    if ((bdrv_flags & BDRV_O_NOCACHE))
> > +        s->open_flags |= O_DIRECT;
> > +    if (!(bdrv_flags & BDRV_O_CACHE_WB))
> > +        s->open_flags |= O_DSYNC;
> 
> Paolo has changed this recently, you might need to use
> bs->enable_write_cache instead.

I picked up this logic from block/raw-posix.c:raw_open_common(). Don't see
anything related to bs->enable_write_cache there. Will find out more about
bs->enable_write_cache.

> 
> > +out:
> > +    if (c) {
> > +        g_free(c);
> > +    }
> 
> g_free(NULL) is a nop, you never need to test that the pointer is non-NULL.

Ok.

> 
> > +static void gluster_finish_aiocb(void *arg)
> > +{
> > +    int ret;
> > +    gluster_aiocb_t *gaiocb = (gluster_aiocb_t *)arg;
> > +    BDRVGlusterState *s = ((glusterAIOCB *)gaiocb->opaque)->s;
> > +
> > +    ret = qemu_gluster_send_pipe(s, gaiocb);
> > +    if (ret < 0) {
> > +        g_free(gaiocb);
> 
> What about the glusterAIOCB?  You need to invoke the callback with an
> error value.
> 
> What about decrementing the in-flight I/O request count?

Again, this comes from rbd. gluster_finish_aiocb() is the callback
that we have registered with gluster. I am not doing any error handling when
we even fail to write to the pipe. An even reader would be waiting to read
from the other end of the pipe. Typically error handling and decrementing
the in-flight IO request count is done by that event reader. But in this
case, we even failed to kick (via pipe write) the even reader.

> 
> > +static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs,
> > +        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
> > +        BlockDriverCompletionFunc *cb, void *opaque, int write)
> > +{
> > +    int ret;
> > +    glusterAIOCB *acb;
> > +    gluster_aiocb_t *gaiocb;
> > +    BDRVGlusterState *s = bs->opaque;
> > +    char *buf;
> > +    size_t size;
> > +    off_t offset;
> > +
> > +    acb = qemu_aio_get(&gluster_aio_pool, bs, cb, opaque);
> > +    acb->write = write;
> > +    acb->qiov = qiov;
> > +    acb->bounce = qemu_blockalign(bs, qiov->size);
> > +    acb->ret = 0;
> > +    acb->bh = NULL;
> > +    acb->s = s;
> > +
> > +    if (write) {
> > +        qemu_iovec_to_buffer(acb->qiov, acb->bounce);
> > +    }
> > +
> > +    buf = acb->bounce;
> > +    offset = sector_num * BDRV_SECTOR_SIZE;
> > +    size = nb_sectors * BDRV_SECTOR_SIZE;
> > +    s->qemu_aio_count++;
> > +
> > +    gaiocb = g_malloc(sizeof(gluster_aiocb_t));
> 
> Can you make this a field of glusterAIOCB?  Then you don't need to
> worry about freeing gaiocb later.

Hmm, I already have glusterAIOCB as part of gaiocb.

> 
> > +static int64_t qemu_gluster_getlength(BlockDriverState *bs)
> > +{
> > +    BDRVGlusterState *s = bs->opaque;
> > +    gluster_file_t fd = s->fd;
> > +    struct stat st;
> > +    int ret;
> > +
> > +    ret = gluster_fstat(fd, &st);
> > +    if (ret < 0) {
> > +        return -1;
> 
> Please return a negative errno instead of -1.

Ok. May be I could just return value from gluster_fstat().

Thanks for your review.

Regards,
Bharata.




Re: [Qemu-devel] Adding errno to QMP errors

2012-06-19 Thread Kevin Wolf
Am 19.06.2012 11:20, schrieb Daniel P. Berrange:
> On Tue, Jun 19, 2012 at 09:39:34AM +0200, Kevin Wolf wrote:
>> Am 18.06.2012 20:31, schrieb Anthony Liguori:
>>> Irrespective of anything else, I think it's safe to say the experiment of 
>>> "rich 
>>> errors" has been a failure.  We still have way too many places using 
>>> error_report.
>>>
>>> As I mentioned in another thread, I think we should:
>>>
>>> 1) Introduce a GENERIC_ERROR QError type.  It could have a 'domain' and a 
>>> 'msg' 
>>> field.
>>>
>>> 2) Focus on converting users of error_report over to use propagated Error 
>>> objects.
>>>
>>> We shouldn't/can't change existing QError users.  We also shouldn't 
>>> consider 
>>> changing the wire protocol.  But for new error users, we should/can relax 
>>> the 
>>> reported errors.
>>>
>>> We need a clear support policy on whether the contents of 'msg' are stable 
>>> or 
>>> not too.
>>
>> Another point that you used to bring up in earlier discussions is
>> translated error messages. If we start returning error messages that are
>> meant to displayed to the user, should we get your gettext patches
>> applied which you did for the GTK backend? libvirt would then have to
>> pay attention to start qemu with the same locale as the client has.
> 
> You can't really start the VM in the same locale as the client app,
> because there's no persistent 1:N relationship between libvirt clients
> and VMs - it is M:N, so you can't choose a single VM. In addition there
> is a bunch of work that libvirt does against VMs in contexts that have
> no associated client. You just have to have 1 system wide locale for
> all QEMU VMs on a host and libvirt.

Good point. So if we ever needed it, we would have to introduce a
monitor command to switch. But in most cases client and server locale
should be the same anyway, so I think we can ignore that part for the start.

Kevin



Re: [Qemu-devel] [RFC PATCH 2/3] block: GlusterFS helpers to interface with libglusterfs

2012-06-19 Thread Bharata B Rao
On Mon, Jun 18, 2012 at 06:35:52PM +0100, Stefan Hajnoczi wrote:
> On Mon, Jun 11, 2012 at 3:20 PM, Bharata B Rao
>  wrote:
> > +    ret = pthread_create(&thread, NULL, gluster_handle_poll,
> > +    (void *)gctx);
> 
> Please use qemu-thread.h.  QEMU uses signals so you almost certainly
> want to mask signals for this thread (qemu_thread_create() does that).

Ok. This is temporary since this entire patch (2/3) would be redundant
when we have libglusterfsclient working.

Regards,
Bharata.




Re: [Qemu-devel] [PATCH v2 1/6] qerror: add MAX_KEYCODES 16

2012-06-19 Thread Amos Kong

On 18/06/12 23:30, Amos Kong wrote:

On 06/15/2012 09:35 PM, Luiz Capitulino wrote:

On Fri, 15 Jun 2012 09:57:49 +0200
Gerd Hoffmann  wrote:


   Hi,


It seems we need to notice user when inputted keys are more than 16.


Hi Gerd,

When I use 'sendkey' command to send key-series to guest, some keyboard
events will be send. There is a limitation (16) that was introduced by this
old commit c8256f9d (without description). Do you know the reason?


Probably hardware limitation, ps/2 keyboards can buffer up to 16 keys IIRC.


Then the perfect thing to do would be to drop the MAX_KEYCODES check from
the sendkey command and move bounds checking down to the device emulation code.


However, this will require a bit of code churn if we do it for all devices,
and won't buy us much, as the most likely reason for the error is a client/user
trying to send too many keys in parallel to the guest, right?


Agree, we can notice in stderr when the redundant keys are ignored as hid.


#define QUEUE_LENGTH16 /* should be enough for a triple-click */

static void hid_keyboard_event(void *opaque, int keycode)
{
 ...
 if (hs->n == QUEUE_LENGTH) {
 fprintf(stderr, "usb-kbd: warning: key event queue full\n");
 return;
 }



I dropped the limitation in sendkey command,
and didn't change current ps2.c, executed some
tests in different environments.

environment max inputted key number
---
win7 notepad100
rhel6 grub  15
rhel6 pxe   15
rhel6 login window  10
rhel6 vim   16
rhel6 terminal(init 3)  200


It seems original 256 queue limitation in ps2.c is fine.
I would only drop limitation(16) in old sendkey command,
it's secure.



If this is right, then I think that the best thing to do would be to drop the
MAX_KEYCODES check from the sendkey command and document that devices can drop
keys if too many of them are sent in parallel or too fast (we can mention ps/2
as an example of a 16 bytes limit).



Likewise the usb hid devices can buffer up to 16 events.  In that case
it is just a qemu implementation detail and not a property of the
hardware we are emulating, so it can be changed.  Not trivially though
as the buffer is part of the migration data, so it is more work that
just changing a #define.



--
Amos.



Re: [Qemu-devel] Any better way to access CPUArchState in vl.c?

2012-06-19 Thread Andreas Färber
Hi,

Am 19.06.2012 11:02, schrieb 陳韋任 (Wei-Ren Chen):
>> Question is, what are you trying to do? In particular, of which CPU
>> (think SMP) are you trying to print ->some_field? :)
> 
>   Currently we only consider single CPU ARM guest, so there should be only one
> env we need to take care of. We add some fields into CPUState and want to 
> print
> their value when the VM is terminated. For example,
> 
> ---
> static void main_loop(void)
> {
> do {
> nonblocking = !kvm_enabled() && last_io > 0;
> last_io = main_loop_wait(nonblocking);
> } while (!main_loop_should_exit());
> 
> // print env->some_field1
> // print env->some_field2
> }
> ---
> 
> If we can access env in vl.c directly, it would make the task easier.

If you only have one CPU then using first_cpu->some_field1 should be
almost as easy. :)

Regards,
Andreas

-- 
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg



Re: [Qemu-devel] [PATCH v4 1/2] pl330: initial version

2012-06-19 Thread Andreas Färber
Am 19.06.2012 08:40, schrieb Peter Crosthwaite:
> On Tue, Jun 19, 2012 at 12:33 AM, Igor Mitsyanko
>  wrote:
>>
>> Hi Peter, sorry for not properly reviewing your patch for such a long time,
>> I'll try to do this as soon as possible. Right now I have a few small
>> coments
>>
>>
>>
>> On 06/18/2012 04:42 AM, Peter A. G. Crosthwaite wrote:
>>>
>>> Device model for Primecell PL330 dma controller.
>>>
>>> Signed-off-by: Peter A. G. Crosthwaite
>>> Signed-off-by: Kirill Batuzov
>>> ---
>>> [..snip..]
>>>
>>> +static void pl330_dmago(PL330Chan *ch, uint8_t opcode, uint8_t *args, int
>>> len)
>>> +{
>>> +uint8_t chan_id;
>>> +uint8_t ns;
>>> +uint32_t pc;
>>> +PL330Chan *s;
>>> +
>>> +DB_PRINT("\n");
>>> +
>>> +if (!ch->is_manager) {
>>> +pl330_fault(ch, PL330_FAULT_OPERAND_INVALID);
>>
>> According to description its more likely to cause UNDEF_INSTR here, not
>> OPERAND_INVALID
> 
> Ok
> 
>>>
>>> +return;
>>> +}
>>> +ns = !!(opcode&  2);
>>> [..snip..]
>>>
>>> +
>>> +static Property pl330_properties[] = {
>>> +DEFINE_PROP_UINT32("cfg0", PL330, cfg[0], 0),
>>> +DEFINE_PROP_UINT32("cfg1", PL330, cfg[1], 0),
>>> +DEFINE_PROP_UINT32("cfg2", PL330, cfg[2], 0),
>>> +DEFINE_PROP_UINT32("cfg3", PL330, cfg[3], 0),
>>> +DEFINE_PROP_UINT32("cfg4", PL330, cfg[4], 0),
>>> +DEFINE_PROP_UINT32("cfg5", PL330, cfg[5], 0),
>>> +DEFINE_PROP_END_OF_LIST(),
>>> +};
>>> +
>>> +static void pl330_class_init(ObjectClass *klass, void *data)
>>> +{
>>> +DeviceClass *dc = DEVICE_CLASS(klass);
>>> +SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
>>> +
>>> +k->init = pl330_init;
>>> +dc->reset = pl330_reset;
>>> +dc->props = pl330_properties;
>>> +}
>>> +
>>> +static TypeInfo pl330_info = {
>>> +.name   = "pl330",
>>> +.parent = TYPE_SYS_BUS_DEVICE,
>>> +.instance_size  = sizeof(PL330),
>>> +.class_init  = pl330_class_init,
>>> +};
>>> +
>>
>> I think Andreas requires all static TypeInfos to have const qualifier and
>> their names to comply with "_type_info" naming convention. I'm not
>> sure about this though.
>>
> 
> Ok

Yes, the lack of const in uses such as these has historic reasons and
keeps propagating. If you touch it anyway, ..._type_info would be more
self-describing but not a hard requirement.
Thanks for keeping eyes open, Igor. :)

>>> +static void pl330_register_types(void)
>>> +{
>>> +type_register_static(&pl330_info);
>>> +}
>>> +
>>> +type_init(pl330_register_types)
>>
>>
>> And it still has no save/load support, it is really mandatory for all new
>> devices. I can recall that one of the maintainers wrote a while ago that
>> every device at least needs to mark itself as non-migratable, if it doesn't
>> implement a proper vmstate.
>>
> Ok, ccing  Andreas

Not my requirement but Peter's (cc'ing). Usually it's really trivial
adding a handful of fields to the VMSD, so I can understand though.

Regards,
Andreas

>> We used this PL330 implementation to transfer sound data in our emulated
>> exynos-based system. It works, but very slow, because the way real hardware
>> performs data transfers is not optimal for emulation.
>>
> 
> Thats another battle for another day,
> 
>> Tested by: Igor Mitsyanko 
>>
> 
> Sweet,
> 
> Ill roll a V5 soon, but im guessing PMM will do a review cycle here as
> well, so ill give it a few days.
> 
> Regards,
> Peter

-- 
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg



[Qemu-devel] [Bug 1013241] Re: qemu-system-ppc64 hanging occasionally in disk writes

2012-06-19 Thread Richard W.M. Jones
I switched to using virtio-scsi (instead of virtio-blk).  This appears to have 
solved
this problem, although it brings another problem.  I also tried vscsi, which 
fixes
both problems.

Therefore I will (not definitively) claim that the problem lies somewhere in 
virtio-blk,
but a workaround seems to be available.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1013241

Title:
  qemu-system-ppc64 hanging occasionally in disk writes

Status in QEMU:
  New

Bug description:
  I found last week that qemu-system-ppc64 (from git) hangs occasionally
  
  under load, and I have a reproducer for it now.  Unfortunately the
  
  reproducer really takes a long time to run -- usually I can get a hang
  
  in under 12 hours.
  

  
  Here is the reproducer case:  
  

  
https://lists.fedoraproject.org/pipermail/ppc/2012-June/001698.html 
  

  
  Notes:
  

  
  (1) Verified by one other person (other than me).  Happens on both
  
  ppc64 and x86-64 host.
  

  
  (2) Happens with both Fedora guest kernel 3.3.4-5.fc17.ppc64 and kernel   
  
  3.5.0 that I compiled myself.  The test case above contains 3.3.4-5.  
  

  
  (3) Seems to be a problem in qemu, not the guest.  The reason I think 
  
  this is because I tried to capture a backtrace of the hang using  
  
  remote gdb, but gdb just hung when trying to connect to qemu  
  
  (gdb connects fine before the bug happens).   
  

  
  (4) Judging by guest messages, appears to be happening when writing   
  
  to the disk.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1013241/+subscriptions



Re: [Qemu-devel] Status of query-netdev QMP command

2012-06-19 Thread Stefan Hajnoczi
On Mon, Jun 18, 2012 at 3:19 PM, Roger Pau Monne  wrote:
> I've read from the GSoC/2010 that some work was being done creating a
> query-netdev QMP command:
>
> http://wiki.qemu.org/Google_Summer_of_Code_2010/QMP#query-netdev
>
> The status says that "mentor has merged it into his tree", but I cannot see
> this command anywhere upstream, and it will come really handy for what I'm
> trying to do, do someone know where this has gone?

I checked qemu.git/master and don't see it either.  The HMP "info net"
command lists the net devices but I'm not aware of a QMP equivalent.
What are you trying to do?

Stefan



Re: [Qemu-devel] [Bug 1013241] Re: qemu-system-ppc64 hanging occasionally in disk writes

2012-06-19 Thread Benjamin Herrenschmidt
On Tue, 2012-06-19 at 10:16 +, Richard W.M. Jones wrote:
> I switched to using virtio-scsi (instead of virtio-blk).  This appears to 
> have solved
> this problem, although it brings another problem.  I also tried vscsi, which 
> fixes
> both problems.
> 
> Therefore I will (not definitively) claim that the problem lies somewhere in 
> virtio-blk,
> but a workaround seems to be available.

What was the virtio-scsi problem ? (Other than SLOF doesn't know about
it yet :-) I haven't audited/tested it so it might have endian issues...

I have reproduced a similar hang with vscsi in full emulation, I haven't
observed your problem with virtio-blk, I plan to spend more time doing
some torture testing & debugging this week see if I can find out what's
going on.

BTW. What was your guest kernel version ?

Cheers,
Ben.





Re: [Qemu-devel] How to measure guest memory access (qemu_ld/qemu_st) time?

2012-06-19 Thread Lluís Vilanova
Michael Kang writes:

> On Tue, Jun 19, 2012 at 4:26 AM, Lluís Vilanova  wrote:
[...]
>> I could understand having multiple 32bit regions in QEMU's virtual space (no
>> need for KVM), one per guest page table, and then simply adding an offset to
>> every memory access to redirect it to the appropriate 32-bit region (1 region
>> per guest page table).
>> 
>> This could translate a single guest ld/st into a host ld+add+ld/st (the first
>> load is to get the "region" offset for the currently executing guest 
>> context).
>> 
>> With this, you can use 'mprotect' in QEMU to enforce the guest's page
>> permissions (as long as the host supports it), and 'mmap' to share the host
>> physical memory between the different 32-bit regions whenever the guest page
>> tables share guest physical memory (again, as long as the host supports it).
>> 
>> But I suppose having a guest with as many or more bits than the host is the
>> common case, which hinders its applicability.

> I ever have some thought like you. Firstly , we only simulate 32bit
> guest on 64 bit host for the case.
> Secondly I ever did some experiments. And I can not mmap the address
> space more than
>  about 8G on 64 bit linux OS. Maybe there some limits in the linux
> kernel of host.

You can see your resource limits with "ulimit -a", but without more info I
cannot tell what's actually going on.


Lluis

-- 
 "And it's much the same thing with knowledge, for whenever you learn
 something new, the whole world becomes that much richer."
 -- The Princess of Pure Reason, as told by Norton Juster in The Phantom
 Tollbooth



Re: [Qemu-devel] Status of query-netdev QMP command

2012-06-19 Thread Roger Pau Monne

Stefan Hajnoczi wrote:

On Mon, Jun 18, 2012 at 3:19 PM, Roger Pau Monne  wrote:

I've read from the GSoC/2010 that some work was being done creating a
query-netdev QMP command:

http://wiki.qemu.org/Google_Summer_of_Code_2010/QMP#query-netdev

The status says that "mentor has merged it into his tree", but I cannot see
this command anywhere upstream, and it will come really handy for what I'm
trying to do, do someone know where this has gone?


I checked qemu.git/master and don't see it either.  The HMP "info net"
command lists the net devices but I'm not aware of a QMP equivalent.
What are you trying to do?


On Linux you can pass the name of the tap device you wish to create, and 
Qemu honors that, but on BSD systems you have no way of creating a tap 
device with a specific name, they are assigned based on the lowest free 
number (tap2 for example).


I need the query-netdev command in order to get the name of the device 
that Qemu creates, so I can use it in my scripts afterwards.


Roger.




Re: [Qemu-devel] [PATCH v4 1/2] pl330: initial version

2012-06-19 Thread Peter Maydell
On 19 June 2012 11:17, Andreas Färber  wrote:
> Am 19.06.2012 08:40, schrieb Peter Crosthwaite:
>> On Tue, Jun 19, 2012 at 12:33 AM, Igor Mitsyanko
>>> And it still has no save/load support, it is really mandatory for all new
>>> devices. I can recall that one of the maintainers wrote a while ago that
>>> every device at least needs to mark itself as non-migratable, if it doesn't
>>> implement a proper vmstate.
>>>
>> Ok, ccing  Andreas
>
> Not my requirement but Peter's (cc'ing). Usually it's really trivial
> adding a handful of fields to the VMSD, so I can understand though.

Yes, it's not difficult and there's no easy way to detect "attempted
to migrate a machine using a device that failed to implement save/load"
so I prefer to be strict about not letting new devices into the tree
that don't implement this. (Proper implementation, please, not just
marking the device as non-migratable.)

-- PMM



[Qemu-devel] [Bug 1013241] Re: qemu-system-ppc64 hanging occasionally in disk writes

2012-06-19 Thread Richard W.M. Jones
The problem with virtio-scsi is only a single disk shows up:

https://bugs.launchpad.net/qemu/+bug/1013691

I've been using guest kernels 3.3.4 and 3.5.0-rc2+ (ie. Linus git), and
both behave the same way.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1013241

Title:
  qemu-system-ppc64 hanging occasionally in disk writes

Status in QEMU:
  New

Bug description:
  I found last week that qemu-system-ppc64 (from git) hangs occasionally
  
  under load, and I have a reproducer for it now.  Unfortunately the
  
  reproducer really takes a long time to run -- usually I can get a hang
  
  in under 12 hours.
  

  
  Here is the reproducer case:  
  

  
https://lists.fedoraproject.org/pipermail/ppc/2012-June/001698.html 
  

  
  Notes:
  

  
  (1) Verified by one other person (other than me).  Happens on both
  
  ppc64 and x86-64 host.
  

  
  (2) Happens with both Fedora guest kernel 3.3.4-5.fc17.ppc64 and kernel   
  
  3.5.0 that I compiled myself.  The test case above contains 3.3.4-5.  
  

  
  (3) Seems to be a problem in qemu, not the guest.  The reason I think 
  
  this is because I tried to capture a backtrace of the hang using  
  
  remote gdb, but gdb just hung when trying to connect to qemu  
  
  (gdb connects fine before the bug happens).   
  

  
  (4) Judging by guest messages, appears to be happening when writing   
  
  to the disk.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1013241/+subscriptions



Re: [Qemu-devel] Status of query-netdev QMP command

2012-06-19 Thread Stefan Hajnoczi
On Tue, Jun 19, 2012 at 11:53 AM, Roger Pau Monne  wrote:
> Stefan Hajnoczi wrote:
>>
>> On Mon, Jun 18, 2012 at 3:19 PM, Roger Pau Monne
>>  wrote:
>>>
>>> I've read from the GSoC/2010 that some work was being done creating a
>>> query-netdev QMP command:
>>>
>>> http://wiki.qemu.org/Google_Summer_of_Code_2010/QMP#query-netdev
>>>
>>> The status says that "mentor has merged it into his tree", but I cannot
>>> see
>>> this command anywhere upstream, and it will come really handy for what
>>> I'm
>>> trying to do, do someone know where this has gone?
>>
>>
>> I checked qemu.git/master and don't see it either.  The HMP "info net"
>> command lists the net devices but I'm not aware of a QMP equivalent.
>> What are you trying to do?
>
>
> On Linux you can pass the name of the tap device you wish to create, and
> Qemu honors that, but on BSD systems you have no way of creating a tap
> device with a specific name, they are assigned based on the lowest free
> number (tap2 for example).
>
> I need the query-netdev command in order to get the name of the device that
> Qemu creates, so I can use it in my scripts afterwards.

Can you use -netdev tap,script=mycallback.sh to do that?  It gets
passed the interface name as argv[1].

Stefan



Re: [Qemu-devel] [RFC] [PATCHv2 2/2] Adding basic calls to libseccomp in vl.c

2012-06-19 Thread Avi Kivity
On 06/16/2012 09:46 AM, Blue Swirl wrote:
> On Fri, Jun 15, 2012 at 9:36 PM, Paul Moore  wrote:
>> On Friday, June 15, 2012 09:23:46 PM Blue Swirl wrote:
>>> On Fri, Jun 15, 2012 at 9:02 PM, Paul Moore  wrote:
>>> > On Friday, June 15, 2012 07:06:10 PM Blue Swirl wrote:
>>> >> I think allowing execve() would render seccomp pretty much useless.
>>> >
>>> > Not necessarily.
>>> >
>>> > I'll agree that it does seem a bit odd to allow execve(), but there is
>>> > still value in enabling seccomp to disable potentially buggy/exploitable
>>> > syscalls. Let's not forget that we have over 300 syscalls on x86_64, not
>>> > including the 32 bit versions, and even if we add all of the new syscalls
>>> > suggested in this thread we are still talking about a small subset of
>>> > syscalls.  As far as security goes, the old adage of "less is more"
>>> > applies.
>>>
>>> The helper program being executed could need any of the 300 system
>>> calls, so we'd have to allow all.
>>
>> Don't we have some basic understanding of what the applications being exec'd
>> will need to do?  I sorta see your point, but allowing the entire set of
>> syscalls seems a bit dramatic.
> 
> At least qemu-ifup/down scripts, migration exec and smbd have been
> mentioned. Only the system calls made by smbd (for some version of it)
> can be known. The user could specify arbitrary commands for the
> others, those could be assumed to use some common (large) subset of
> system calls but I think the security value would be close to zero
> then.

We're not trying to protect against the user, but against the guest.  If
we assume the user wrote those scripts with care so they cannot be
exploited by the guest, then we are okay.

However I agree with you that it would be better to restrict those
syscalls.  The scripts are already unnecessary if using a management
system and migration supports passed file descriptors, so that leaves
only smbd, which can probably be pre-execed.

> 
>>
>>> > Protecting against the abuse and misuse of execve() is something that is
>>> > better done with the host's access controls (traditional DAC, MAC via the
>>> > LSM, etc.).
>>>
>>> How about seccomp mode selected by command line switch -seccomp, in
>>> which bind/connect/open/execve are forbidden? The functionality
>>> remaining would be somewhat limited (can't migrate or use SMB etc.
>>> until refactoring of QEMU), but that way seccomp jail would be much
>>> tighter.
>>
>> When I spoke to Anthony about this earlier (offline, sorry) he was opposed to
>> requiring any switches or user interaction to enable seccomp.  I'm not sure 
>> if
>> his stance on this has changed any over the past few months.
> 
> There could be two modes, strict mode (-seccomp) and default mode
> (only some syscalls blocked). With the future decomposed QEMU, strict
> seccomp mode would be default and the switch would be obsoleted. If
> the decomposition is planned to happen soonish, adding the switch
> would be just churn.

We have decomposed qemu to some extent, in that privileged operations
happen in libvirt.  So the modes make sense - qemu has no idea whether a
privileged management system is controlling it or not.

> 
>>
>> In my perfect world, we would have a decomposed QEMU that functions as a
>> series of processes connected via some sort of IPC; the exact divisions are a
>> bit TBD and beyond the scope of this discussion.  In this scenario we would 
>> be
>> able to restrict QEMU with sVirt and seccomp to a much higher degree than we
>> could with the current monolithic QEMU.
>>
>> I don't expect to see my perfect world any time soon, but in the meantime we
>> can still improve the security of QEMU on Linux with these seccomp patches 
>> and
>> for that reason I think it's a win.  Since these patches don't expose 
>> anything
>> at runtime (no knobs, switches, etc.) we leave ourselves plenty of 
>> flexibility
>> for changing things in the future.
> 
> Yes, I'm much in favor of adding seccomp support soon. But I just
> wonder if this is really the best level of security we can reach now,
> not assuming decomposed QEMU, but just minor tweaks?

We might disable mprotect(PROT_EXEC) if running with kvm.

-- 
error compiling committee.c: too many arguments to function





Re: [Qemu-devel] [RFC PATCH 3/3] block: gluster as block backend

2012-06-19 Thread Stefan Hajnoczi
On Tue, Jun 19, 2012 at 10:30 AM, Bharata B Rao
 wrote:
> On Mon, Jun 18, 2012 at 06:35:28PM +0100, Stefan Hajnoczi wrote:
>> On Mon, Jun 11, 2012 at 3:21 PM, Bharata B Rao
>>  wrote:
>> > + á á/* Use O_DSYNC for write-through caching, no flags for write-back 
>> > caching,
>> > + á á * and O_DIRECT for no caching. */
>> > + á áif ((bdrv_flags & BDRV_O_NOCACHE))
>> > + á á á ás->open_flags |= O_DIRECT;
>> > + á áif (!(bdrv_flags & BDRV_O_CACHE_WB))
>> > + á á á ás->open_flags |= O_DSYNC;
>>
>> Paolo has changed this recently, you might need to use
>> bs->enable_write_cache instead.
>
> I picked up this logic from block/raw-posix.c:raw_open_common(). Don't see
> anything related to bs->enable_write_cache there. Will find out more about
> bs->enable_write_cache.

If you fetch the latest qemu.git and check bdrv_open_common() there is
new code that stashes BDRV_O_CACHE_WB in bs->enable_write_cache and
then opens the actual block driver with BDRV_O_CACHE_WB set.  You can
use bdrv_enable_write_cache() to test the original flag.

>> > +static void gluster_finish_aiocb(void *arg)
>> > +{
>> > + á áint ret;
>> > + á ágluster_aiocb_t *gaiocb = (gluster_aiocb_t *)arg;
>> > + á áBDRVGlusterState *s = ((glusterAIOCB *)gaiocb->opaque)->s;
>> > +
>> > + á áret = qemu_gluster_send_pipe(s, gaiocb);
>> > + á áif (ret < 0) {
>> > + á á á ág_free(gaiocb);
>>
>> What about the glusterAIOCB?  You need to invoke the callback with an
>> error value.
>>
>> What about decrementing the in-flight I/O request count?
>
> Again, this comes from rbd. gluster_finish_aiocb() is the callback
> that we have registered with gluster. I am not doing any error handling when
> we even fail to write to the pipe. An even reader would be waiting to read
> from the other end of the pipe. Typically error handling and decrementing
> the in-flight IO request count is done by that event reader. But in this
> case, we even failed to kick (via pipe write) the even reader.

It sounds like you're saying the request is not properly cleaned up
and completed on failure.  Please fix :).

>> > +static int64_t qemu_gluster_getlength(BlockDriverState *bs)
>> > +{
>> > + á áBDRVGlusterState *s = bs->opaque;
>> > + á ágluster_file_t fd = s->fd;
>> > + á ástruct stat st;
>> > + á áint ret;
>> > +
>> > + á áret = gluster_fstat(fd, &st);
>> > + á áif (ret < 0) {
>> > + á á á áreturn -1;
>>
>> Please return a negative errno instead of -1.
>
> Ok. May be I could just return value from gluster_fstat().

The gluster_fstat() code also does not return negative errnos (at
least in the first case I checked, when CALLOC() fails).

Stefan



Re: [Qemu-devel] [PATCH] spapr_vscsi: Error handling fixes

2012-06-19 Thread Andreas Färber
Am 19.06.2012 08:02, schrieb Benjamin Herrenschmidt:
> We were incorrectly g_free'ing an object that isn't allocated
> in one error path and failed to release it completely in another
> 
> This fixes qemu crashes with some cases of IO errors.
> 
> Signed-off-by: Benjamin Herrenschmidt 
> ---
>  hw/spapr_vscsi.c |4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/spapr_vscsi.c b/hw/spapr_vscsi.c
> index d2fe3e5..6afc3b4 100644
> --- a/hw/spapr_vscsi.c
> +++ b/hw/spapr_vscsi.c
> @@ -801,6 +801,7 @@ static void vscsi_got_payload(VSCSIState *s, vscsi_crq 
> *crq)
>  if (crq->s.IU_length > sizeof(union viosrp_iu)) {
>  fprintf(stderr, "VSCSI: SRP IU too long (%d bytes) !\n",
>  crq->s.IU_length);
> +vscsi_put_req(req);
>  return;
>  }
>  
> @@ -808,7 +809,8 @@ static void vscsi_got_payload(VSCSIState *s, vscsi_crq 
> *crq)
>  if (spapr_vio_dma_read(&s->vdev, crq->s.IU_data_ptr, &req->iu,
> crq->s.IU_length)) {
>  fprintf(stderr, "vscsi_got_payload: DMA read failure !\n");
> -g_free(req);
> +vscsi_put_req(req);

> + return;

Tab alert. :)

/-F

>  }
>  memcpy(&req->crq, crq, sizeof(vscsi_crq));
>  
> 
> 
> 


-- 
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg



Re: [Qemu-devel] Any better way to access CPUArchState in vl.c?

2012-06-19 Thread Wei-Ren Chen
> If you only have one CPU then using first_cpu->some_field1 should be
> almost as easy. :)

  I am afraid first_cpu (CPUArchState) is got poisoned, too. :/
Even I comment out CPUArchState from poison.h,

--- vl.c
extern CPUArchState *first_cpu;

static void main_loop(void)
{
... snip ...

printf("%d", first_cpu->created);
}
---

I still get compilation error below,

---
/tmp/chenwj/qemu/vl.c:1548:20: error: expected '=', ',', ';', 'asm' or 
'__attribute__' before '*' token
/tmp/chenwj/qemu/vl.c: In function 'main_loop':
/tmp/chenwj/qemu/vl.c:1568:18: error: 'first_cpu' undeclared (first use in this 
function)
---

  Any thought on what I am missing? Thanks.

Regards,
chenwj

-- 
Wei-Ren Chen (陳韋任)
Computer Systems Lab, Institute of Information Science,
Academia Sinica, Taiwan (R.O.C.)
Tel:886-2-2788-3799 #1667
Homepage: http://people.cs.nctu.edu.tw/~chenwj



Re: [Qemu-devel] [PATCHv3] Add PIIX4 properties to control PM system states.

2012-06-19 Thread Gleb Natapov
On Wed, Jun 13, 2012 at 04:23:28PM +0300, Gleb Natapov wrote:
> Ping?
> 
Ping 2?

> On Mon, Jun 04, 2012 at 02:31:55PM +0300, Gleb Natapov wrote:
> > This patch adds two things. First it allows QEMU to distinguish between
> > regular powerdown and S4 powerdown. Later separate QMP notification will
> > be added for S4 powerdown. Second it allows S3/S4 states to be disabled
> > from QEMU command line. Some guests known to be broken with regards to
> > power management, but allow to use it anyway. Using new properties
> > management will be able to disable S3/S4 for such guests.
> > 
> > Supported system state are passed to a firmware using new fw_cfg file.
> > The file contains  6 byte array. Each byte represents one system
> > state. If byte at offset X has its MSB set it means that system state
> > X is supported and to enter it guest should use the value from lowest 3
> > bits.
> > 
> > Signed-off-by: Gleb Natapov 
> > ---
> >  hw/acpi.c   |5 -
> >  hw/acpi.h   |2 +-
> >  hw/acpi_piix4.c |   20 ++--
> >  hw/mips_malta.c |2 +-
> >  hw/pc.c |3 ++-
> >  hw/pc.h |4 ++--
> >  hw/pc_piix.c|5 +++--
> >  hw/vt82c686.c   |2 +-
> >  8 files changed, 32 insertions(+), 11 deletions(-)
> > 
> > diff --git a/hw/acpi.c b/hw/acpi.c
> > index 5d521e5..effc7ec 100644
> > --- a/hw/acpi.c
> > +++ b/hw/acpi.c
> > @@ -370,7 +370,7 @@ void acpi_pm1_cnt_init(ACPIREGS *ar)
> >  qemu_register_wakeup_notifier(&ar->wakeup);
> >  }
> >  
> > -void acpi_pm1_cnt_write(ACPIREGS *ar, uint16_t val)
> > +void acpi_pm1_cnt_write(ACPIREGS *ar, uint16_t val, char s4)
> >  {
> >  ar->pm1.cnt.cnt = val & ~(ACPI_BITMASK_SLEEP_ENABLE);
> >  
> > @@ -385,6 +385,9 @@ void acpi_pm1_cnt_write(ACPIREGS *ar, uint16_t val)
> >  qemu_system_suspend_request();
> >  break;
> >  default:
> > +if (sus_typ == s4) { /* S4 request */
> > +qemu_system_shutdown_request();
> > +}
> >  break;
> >  }
> >  }
> > diff --git a/hw/acpi.h b/hw/acpi.h
> > index fe8cdb4..7337f41 100644
> > --- a/hw/acpi.h
> > +++ b/hw/acpi.h
> > @@ -139,7 +139,7 @@ void acpi_pm1_evt_reset(ACPIREGS *ar);
> >  
> >  /* PM1a_CNT: piix and ich9 don't implement PM1b CNT. */
> >  void acpi_pm1_cnt_init(ACPIREGS *ar);
> > -void acpi_pm1_cnt_write(ACPIREGS *ar, uint16_t val);
> > +void acpi_pm1_cnt_write(ACPIREGS *ar, uint16_t val, char s4);
> >  void acpi_pm1_cnt_update(ACPIREGS *ar,
> >   bool sci_enable, bool sci_disable);
> >  void acpi_pm1_cnt_reset(ACPIREGS *ar);
> > diff --git a/hw/acpi_piix4.c b/hw/acpi_piix4.c
> > index 0345490..812e62f 100644
> > --- a/hw/acpi_piix4.c
> > +++ b/hw/acpi_piix4.c
> > @@ -27,6 +27,7 @@
> >  #include "sysemu.h"
> >  #include "range.h"
> >  #include "ioport.h"
> > +#include "fw_cfg.h"
> >  
> >  //#define DEBUG
> >  
> > @@ -71,6 +72,10 @@ typedef struct PIIX4PMState {
> >  struct pci_status pci0_status;
> >  uint32_t pci0_hotplug_enable;
> >  uint32_t pci0_slot_device_present;
> > +
> > +uint8_t disable_s3;
> > +uint8_t disable_s4;
> > +uint8_t s4_val;
> >  } PIIX4PMState;
> >  
> >  static void piix4_acpi_system_hot_add_init(PCIBus *bus, PIIX4PMState *s);
> > @@ -123,7 +128,7 @@ static void pm_ioport_write(IORange *ioport, uint64_t 
> > addr, unsigned width,
> >  pm_update_sci(s);
> >  break;
> >  case 0x04:
> > -acpi_pm1_cnt_write(&s->ar, val);
> > +acpi_pm1_cnt_write(&s->ar, val, s->s4_val);
> >  break;
> >  default:
> >  break;
> > @@ -422,7 +427,7 @@ static int piix4_pm_initfn(PCIDevice *dev)
> >  
> >  i2c_bus *piix4_pm_init(PCIBus *bus, int devfn, uint32_t smb_io_base,
> > qemu_irq sci_irq, qemu_irq smi_irq,
> > -   int kvm_enabled)
> > +   int kvm_enabled, void *fw_cfg)
> >  {
> >  PCIDevice *dev;
> >  PIIX4PMState *s;
> > @@ -438,11 +443,22 @@ i2c_bus *piix4_pm_init(PCIBus *bus, int devfn, 
> > uint32_t smb_io_base,
> >  
> >  qdev_init_nofail(&dev->qdev);
> >  
> > +if (fw_cfg) {
> > +uint8_t suspend[6] = {128, 0, 0, 129, 128, 128};
> > +suspend[3] = 1 | ((!s->disable_s3) << 7);
> > +suspend[4] = s->s4_val | ((!s->disable_s4) << 7);
> > +
> > +fw_cfg_add_file(fw_cfg, "etc/system-states", g_memdup(suspend, 6), 
> > 6);
> > +}
> > +
> >  return s->smb.smbus;
> >  }
> >  
> >  static Property piix4_pm_properties[] = {
> >  DEFINE_PROP_UINT32("smb_io_base", PIIX4PMState, smb_io_base, 0),
> > +DEFINE_PROP_UINT8("disable_s3", PIIX4PMState, disable_s3, 0),
> > +DEFINE_PROP_UINT8("disable_s4", PIIX4PMState, disable_s4, 0),
> > +DEFINE_PROP_UINT8("s4_val", PIIX4PMState, s4_val, 2),
> >  DEFINE_PROP_END_OF_LIST(),
> >  };
> >  
> > diff --git a/hw/mips_malta.c b/hw/mips_malta.c
> > index 4752bb2..205b4a5 100644
> > --- a/hw/mips_malta.c
> > ++

Re: [Qemu-devel] Any better way to access CPUArchState in vl.c?

2012-06-19 Thread Peter Maydell
On 19 June 2012 12:54, 陳韋任 (Wei-Ren Chen)  wrote:
>> If you only have one CPU then using first_cpu->some_field1 should be
>> almost as easy. :)
>
>  I am afraid first_cpu (CPUArchState) is got poisoned, too. :/

Yes. You'll need to write a function which lives in a source file which
has access to the poisoned symbols, and then call that from vl.c.
hw_error() might be a useful example to follow.

-- PMM



Re: [Qemu-devel] Any better way to access CPUArchState in vl.c?

2012-06-19 Thread Peter Crosthwaite
Andreas, will an attribute((destructor)) work ? Cos if it does you can put
your printf pretty much anwhere rather than vl.c
On Jun 19, 2012 9:54 PM, "陳韋任 (Wei-Ren Chen)" 
wrote:

> > If you only have one CPU then using first_cpu->some_field1 should be
> > almost as easy. :)
>
>  I am afraid first_cpu (CPUArchState) is got poisoned, too. :/
> Even I comment out CPUArchState from poison.h,
>
> --- vl.c
> extern CPUArchState *first_cpu;
>
> static void main_loop(void)
> {
>... snip ...
>
>printf("%d", first_cpu->created);
> }
> ---
>
> I still get compilation error below,
>
> ---
> /tmp/chenwj/qemu/vl.c:1548:20: error: expected '=', ',', ';', 'asm' or
> '__attribute__' before '*' token
> /tmp/chenwj/qemu/vl.c: In function 'main_loop':
> /tmp/chenwj/qemu/vl.c:1568:18: error: 'first_cpu' undeclared (first use in
> this function)
> ---
>
>  Any thought on what I am missing? Thanks.
>
> Regards,
> chenwj
>
> --
> Wei-Ren Chen (陳韋任)
> Computer Systems Lab, Institute of Information Science,
> Academia Sinica, Taiwan (R.O.C.)
> Tel:886-2-2788-3799 #1667
> Homepage: http://people.cs.nctu.edu.tw/~chenwj
>
>


Re: [Qemu-devel] How to measure guest memory access (qemu_ld/qemu_st) time?

2012-06-19 Thread Michael.Kang
On Tue, Jun 19, 2012 at 3:52 PM, 陳韋任 (Wei-Ren Chen)
 wrote:
>> But if QEMU/TCG is doing a GVA->GPA translation as Wei-Ren said, I don't see 
>> how
>> KVM can help.
>
>  Just want to clarify. QEMU maintain a TLB (env->tlb_table) which stores GVA 
> ->
> HVA mapping, it is used to speedup the address translation. If TLB miss, QEMU
> will call cpu_arm_handle_mmu_fault (take ARM as an example) doing GVA -> GPA
> translation.
>
>> I could understand having multiple 32bit regions in QEMU's virtual space (no
>> need for KVM), one per guest page table, and then simply adding an offset to
>> every memory access to redirect it to the appropriate 32-bit region (1 region
>> per guest page table).
>>
>> This could translate a single guest ld/st into a host ld+add+ld/st (the first
>> load is to get the "region" offset for the currently executing guest 
>> context).
>
>  It differs from what QEMU's doing? Each time we fill TLB, we add an offset to
> the GPA to get HVA, then store GVA -> HVA mapping into the TLB (IIUC). I don't
> see much differences here.
I think What is Qemu doing is to mapped GPA to HVA . Lluís mean we can
map GVA to HVA. So
 we event do not need to lookup TLB and just use one host memory
access instruction to simulate one guest
memory access instruction.

Thanks
MK

>
> Regards,
> chenwj
>
> --
> Wei-Ren Chen (陳韋任)
> Computer Systems Lab, Institute of Information Science,
> Academia Sinica, Taiwan (R.O.C.)
> Tel:886-2-2788-3799 #1667
> Homepage: http://people.cs.nctu.edu.tw/~chenwj
>



-- 
www.skyeye.org



Re: [Qemu-devel] How to measure guest memory access (qemu_ld/qemu_st) time?

2012-06-19 Thread Michael.Kang
On Tue, Jun 19, 2012 at 4:26 AM, Lluís Vilanova  wrote:
> Blue Swirl writes:
>
>> On Mon, Jun 18, 2012 at 8:28 AM, 陳韋任 (Wei-Ren Chen)
>>  wrote:
   The reason why we want to do the measuring is we want to use KVM (sounds 
 crazy
 idea) MMU virtualization to speedup the guest -> host memory address 
 translation.
 I talked to some people on LinuxCon Japan, included Paolo, about this 
 idea. The
 feedback I got is we can only use shadow page table rather than EPT/NPT to 
 do
 the address translation (if possible!) since different ISA (ARM and x86, 
 for
 example) have different page table format. Besides, QEMU has to use ioctl 
 to ask
 KVM to get the translation result, but it's an overkill as the ARM page 
 table
 is quite simple, which can be done in user mode very fast.
>>>
>>>  Anyone would like to give a comment on this? ;)
>>>
>>>  From the talk with Laurent on #qemu, he said the way he thought of is
>>> translating GVA -> GPA manually (through software), then try to insert
>>> GPA -> HPA into EPT, that's the only way HW can help.
>
>> For some 32 bit guests on some 64 bit hosts, maybe KVM could indeed
>> help. Just map the whole 4G guest virtual address space so that guest
>> memory accesses can be turned 1:1 into raw direct accesses. I/O pages
>> would be unmapped, accesses handled via fault path.
>
> But if QEMU/TCG is doing a GVA->GPA translation as Wei-Ren said, I don't see 
> how
> KVM can help.
>
> I could understand having multiple 32bit regions in QEMU's virtual space (no
> need for KVM), one per guest page table, and then simply adding an offset to
> every memory access to redirect it to the appropriate 32-bit region (1 region
> per guest page table).
>
> This could translate a single guest ld/st into a host ld+add+ld/st (the first
> load is to get the "region" offset for the currently executing guest context).
>
> With this, you can use 'mprotect' in QEMU to enforce the guest's page
> permissions (as long as the host supports it), and 'mmap' to share the host
> physical memory between the different 32-bit regions whenever the guest page
> tables share guest physical memory (again, as long as the host supports it).
>
> But I suppose having a guest with as many or more bits than the host is the
> common case, which hinders its applicability.

I ever have some thought like you. Firstly , we only simulate 32bit
guest on 64 bit host for the case.
Secondly I ever did some experiments. And I can not mmap the address
space more than
 about 8G on 64 bit linux OS. Maybe there some limits in the linux
kernel of host.

Thanks
MK

>
>
> Lluis
>
> --
>  "And it's much the same thing with knowledge, for whenever you learn
>  something new, the whole world becomes that much richer."
>  -- The Princess of Pure Reason, as told by Norton Juster in The Phantom
>  Tollbooth
>



-- 
www.skyeye.org



Re: [Qemu-devel] Any better way to access CPUArchState in vl.c?

2012-06-19 Thread Andreas Färber
Am 19.06.2012 13:54, schrieb 陳韋任 (Wei-Ren Chen):
>> If you only have one CPU then using first_cpu->some_field1 should be
>> almost as easy. :)
> 
>   I am afraid first_cpu (CPUArchState) is got poisoned, too. :/
> Even I comment out CPUArchState from poison.h,
> 
> --- vl.c
> extern CPUArchState *first_cpu;
> 
> static void main_loop(void)
> {
> ... snip ...
> 
> printf("%d", first_cpu->created);
> }
> ---
> 
> I still get compilation error below,
> 
> ---
> /tmp/chenwj/qemu/vl.c:1548:20: error: expected '=', ',', ';', 'asm' or 
> '__attribute__' before '*' token
> /tmp/chenwj/qemu/vl.c: In function 'main_loop':
> /tmp/chenwj/qemu/vl.c:1568:18: error: 'first_cpu' undeclared (first use in 
> this function)
> ---
> 
>   Any thought on what I am missing? Thanks.

Sorry, my mistake: vl.c is not compiled per-target like I thought but
per target_phys_addr_t in libhwX, thus it cannot access cpu.h or
CPUArchState (only CPUState). That means evaluations of fields in
CPUARMState need to be done in target-arm/ and you might want to check
the Notifiers or in the worst case _atexit() to hook some callback
function up. With QOM CPUState there's finalizers in theory but I don't
think they get called yet for anything except linux-user thread exit.

Andreas

-- 
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg



[Qemu-devel] [Bug 712416] Re: kvm_intel kernel module crash with via nano vmx

2012-06-19 Thread Serge Hallyn
** Changed in: linux (Ubuntu)
   Status: Incomplete => New

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/712416

Title:
  kvm_intel kernel module crash with via nano vmx

Status in QEMU:
  New
Status in “linux” package in Ubuntu:
  New
Status in “kvm” package in Debian:
  New

Bug description:
  kvm module for hardware virtualisation not work properly on via nano
  processors.

  Tested with processor: VIA Nano processor U2250.
  Processors flags (visible in /proc/cpuinfo): fpu vme de pse tsc msr pae mce 
cx8 apic sep mtrr pge mca cmov pat clflush acpi mmx fxsr sse sse2 ss tm syscall 
nx lm constant_tsc up rep_good pni monitor vmx est tm2 ssse3 cx16 xtpr rng 
rng_en ace ace_en ace2 phe phe_en lahf_lm

  With kernel 2.6.32: kvm not work and dmesg contains a lot of:
  handle_exception: unexpected, vectoring info 0x800d intr info 0x8b0d

  With kernel 2.6.35: all the system crash. Nothing visible in logs

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/712416/+subscriptions



Re: [Qemu-devel] [PATCHv3] Add PIIX4 properties to control PM system states.

2012-06-19 Thread Igor Mammedov

On 06/04/2012 01:31 PM, Gleb Natapov wrote:

This patch adds two things. First it allows QEMU to distinguish between
regular powerdown and S4 powerdown. Later separate QMP notification will
be added for S4 powerdown. Second it allows S3/S4 states to be disabled
from QEMU command line. Some guests known to be broken with regards to
power management, but allow to use it anyway. Using new properties
management will be able to disable S3/S4 for such guests.

Supported system state are passed to a firmware using new fw_cfg file.
The file contains  6 byte array. Each byte represents one system
state. If byte at offset X has its MSB set it means that system state
X is supported and to enter it guest should use the value from lowest 3
bits.

Why to use 6 byte array for 1 or 2 features/values?

PS:
Have you posted seabios counterpart?



Signed-off-by: Gleb Natapov 
---
  hw/acpi.c   |5 -
  hw/acpi.h   |2 +-
  hw/acpi_piix4.c |   20 ++--
  hw/mips_malta.c |2 +-
  hw/pc.c |3 ++-
  hw/pc.h |4 ++--
  hw/pc_piix.c|5 +++--
  hw/vt82c686.c   |2 +-
  8 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/hw/acpi.c b/hw/acpi.c
index 5d521e5..effc7ec 100644
--- a/hw/acpi.c
+++ b/hw/acpi.c
@@ -370,7 +370,7 @@ void acpi_pm1_cnt_init(ACPIREGS *ar)
  qemu_register_wakeup_notifier(&ar->wakeup);
  }

-void acpi_pm1_cnt_write(ACPIREGS *ar, uint16_t val)
+void acpi_pm1_cnt_write(ACPIREGS *ar, uint16_t val, char s4)
  {
  ar->pm1.cnt.cnt = val & ~(ACPI_BITMASK_SLEEP_ENABLE);

@@ -385,6 +385,9 @@ void acpi_pm1_cnt_write(ACPIREGS *ar, uint16_t val)
  qemu_system_suspend_request();
  break;
  default:
+if (sus_typ == s4) { /* S4 request */
+qemu_system_shutdown_request();
+}
  break;
  }
  }
diff --git a/hw/acpi.h b/hw/acpi.h
index fe8cdb4..7337f41 100644
--- a/hw/acpi.h
+++ b/hw/acpi.h
@@ -139,7 +139,7 @@ void acpi_pm1_evt_reset(ACPIREGS *ar);

  /* PM1a_CNT: piix and ich9 don't implement PM1b CNT. */
  void acpi_pm1_cnt_init(ACPIREGS *ar);
-void acpi_pm1_cnt_write(ACPIREGS *ar, uint16_t val);
+void acpi_pm1_cnt_write(ACPIREGS *ar, uint16_t val, char s4);
  void acpi_pm1_cnt_update(ACPIREGS *ar,
   bool sci_enable, bool sci_disable);
  void acpi_pm1_cnt_reset(ACPIREGS *ar);
diff --git a/hw/acpi_piix4.c b/hw/acpi_piix4.c
index 0345490..812e62f 100644
--- a/hw/acpi_piix4.c
+++ b/hw/acpi_piix4.c
@@ -27,6 +27,7 @@
  #include "sysemu.h"
  #include "range.h"
  #include "ioport.h"
+#include "fw_cfg.h"

  //#define DEBUG

@@ -71,6 +72,10 @@ typedef struct PIIX4PMState {
  struct pci_status pci0_status;
  uint32_t pci0_hotplug_enable;
  uint32_t pci0_slot_device_present;
+
+uint8_t disable_s3;
+uint8_t disable_s4;
+uint8_t s4_val;
  } PIIX4PMState;

  static void piix4_acpi_system_hot_add_init(PCIBus *bus, PIIX4PMState *s);
@@ -123,7 +128,7 @@ static void pm_ioport_write(IORange *ioport, uint64_t addr, 
unsigned width,
  pm_update_sci(s);
  break;
  case 0x04:
-acpi_pm1_cnt_write(&s->ar, val);
+acpi_pm1_cnt_write(&s->ar, val, s->s4_val);
  break;
  default:
  break;
@@ -422,7 +427,7 @@ static int piix4_pm_initfn(PCIDevice *dev)

  i2c_bus *piix4_pm_init(PCIBus *bus, int devfn, uint32_t smb_io_base,
 qemu_irq sci_irq, qemu_irq smi_irq,
-   int kvm_enabled)
+   int kvm_enabled, void *fw_cfg)
  {
  PCIDevice *dev;
  PIIX4PMState *s;
@@ -438,11 +443,22 @@ i2c_bus *piix4_pm_init(PCIBus *bus, int devfn, uint32_t 
smb_io_base,

  qdev_init_nofail(&dev->qdev);

+if (fw_cfg) {
+uint8_t suspend[6] = {128, 0, 0, 129, 128, 128};

 
Wouldn't it better to use symbolic names here?


+suspend[3] = 1 | ((!s->disable_s3) << 7);
+suspend[4] = s->s4_val | ((!s->disable_s4) << 7);
+
+fw_cfg_add_file(fw_cfg, "etc/system-states", g_memdup(suspend, 6), 6);
+}
+
  return s->smb.smbus;
  }

  static Property piix4_pm_properties[] = {
  DEFINE_PROP_UINT32("smb_io_base", PIIX4PMState, smb_io_base, 0),
+DEFINE_PROP_UINT8("disable_s3", PIIX4PMState, disable_s3, 0),
+DEFINE_PROP_UINT8("disable_s4", PIIX4PMState, disable_s4, 0),
+DEFINE_PROP_UINT8("s4_val", PIIX4PMState, s4_val, 2),
  DEFINE_PROP_END_OF_LIST(),
  };

diff --git a/hw/mips_malta.c b/hw/mips_malta.c
index 4752bb2..205b4a5 100644
--- a/hw/mips_malta.c
+++ b/hw/mips_malta.c
@@ -954,7 +954,7 @@ void mips_malta_init (ram_addr_t ram_size,
  pci_piix4_ide_init(pci_bus, hd, piix4_devfn + 1);
  pci_create_simple(pci_bus, piix4_devfn + 2, "piix4-usb-uhci");
  smbus = piix4_pm_init(pci_bus, piix4_devfn + 3, 0x1100,
-  isa_get_irq(NULL, 9), NULL, 0);
+  isa_get_irq(NULL, 9), NULL, 

Re: [Qemu-devel] [PATCH 03/39] block: implement is_allocated for raw

2012-06-19 Thread Alexander Graf

On 15.06.2012, at 15:33, Kevin Wolf wrote:

> From: Paolo Bonzini 
> 
> Either FIEMAP, or SEEK_DATA+SEEK_HOLE can be used to implement the
> is_allocated callback for raw files.  On Linux ext4, btrfs and XFS
> all support it.
> 
> Signed-off-by: Paolo Bonzini 
> Signed-off-by: Kevin Wolf 
> ---
> block/raw-posix.c |   98 +
> block/raw.c   |8 
> 2 files changed, 106 insertions(+), 0 deletions(-)
> 
> diff --git a/block/raw-posix.c b/block/raw-posix.c
> index 03fcfcc..bf7700a 100644
> --- a/block/raw-posix.c
> +++ b/block/raw-posix.c
> @@ -52,6 +52,10 @@
> #include 
> #include 
> #include 
> +#include 
> +#endif
> +#ifdef CONFIG_FIEMAP
> +#include 
> #endif
> #if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
> #include 
> @@ -583,6 +587,99 @@ static int raw_create(const char *filename, 
> QEMUOptionParameter *options)
> return result;
> }
> 
> +/*
> + * Returns true iff the specified sector is present in the disk image. 
> Drivers
> + * not implementing the functionality are assumed to not support backing 
> files,
> + * hence all their sectors are reported as allocated.
> + *
> + * If 'sector_num' is beyond the end of the disk image the return value is 0
> + * and 'pnum' is set to 0.
> + *
> + * 'pnum' is set to the number of sectors (including and immediately 
> following
> + * the specified sector) that are known to be in the same
> + * allocated/unallocated state.
> + *
> + * 'nb_sectors' is the max value 'pnum' should be set to.  If nb_sectors goes
> + * beyond the end of the disk image it will be clamped.
> + */
> +static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs,
> +int64_t sector_num,
> +int nb_sectors, int *pnum)
> +{
> +BDRVRawState *s = bs->opaque;

cc1: warnings being treated as errors
block/raw-posix.c: In function ‘raw_co_is_allocated’:
block/raw-posix.c:609: error: unused variable ‘s’


Alex




Re: [Qemu-devel] [PATCHv3] Add PIIX4 properties to control PM system states.

2012-06-19 Thread Gleb Natapov
On Tue, Jun 19, 2012 at 02:30:12PM +0200, Igor Mammedov wrote:
> On 06/04/2012 01:31 PM, Gleb Natapov wrote:
> >This patch adds two things. First it allows QEMU to distinguish between
> >regular powerdown and S4 powerdown. Later separate QMP notification will
> >be added for S4 powerdown. Second it allows S3/S4 states to be disabled
> >from QEMU command line. Some guests known to be broken with regards to
> >power management, but allow to use it anyway. Using new properties
> >management will be able to disable S3/S4 for such guests.
> >
> >Supported system state are passed to a firmware using new fw_cfg file.
> >The file contains  6 byte array. Each byte represents one system
> >state. If byte at offset X has its MSB set it means that system state
> >X is supported and to enter it guest should use the value from lowest 3
> >bits.
> Why to use 6 byte array for 1 or 2 features/values?
> 
There are 6 states 0-5. Not all of them make sense to enable/disable,
but I's rather make interface more flexible than needed than other way
around.

> PS:
> Have you posted seabios counterpart?
Long time ago. Kevin waits for QEMU side to go in before he applies.

> 
> >
> >Signed-off-by: Gleb Natapov 
> >---
> >  hw/acpi.c   |5 -
> >  hw/acpi.h   |2 +-
> >  hw/acpi_piix4.c |   20 ++--
> >  hw/mips_malta.c |2 +-
> >  hw/pc.c |3 ++-
> >  hw/pc.h |4 ++--
> >  hw/pc_piix.c|5 +++--
> >  hw/vt82c686.c   |2 +-
> >  8 files changed, 32 insertions(+), 11 deletions(-)
> >
> >diff --git a/hw/acpi.c b/hw/acpi.c
> >index 5d521e5..effc7ec 100644
> >--- a/hw/acpi.c
> >+++ b/hw/acpi.c
> >@@ -370,7 +370,7 @@ void acpi_pm1_cnt_init(ACPIREGS *ar)
> >  qemu_register_wakeup_notifier(&ar->wakeup);
> >  }
> >
> >-void acpi_pm1_cnt_write(ACPIREGS *ar, uint16_t val)
> >+void acpi_pm1_cnt_write(ACPIREGS *ar, uint16_t val, char s4)
> >  {
> >  ar->pm1.cnt.cnt = val & ~(ACPI_BITMASK_SLEEP_ENABLE);
> >
> >@@ -385,6 +385,9 @@ void acpi_pm1_cnt_write(ACPIREGS *ar, uint16_t val)
> >  qemu_system_suspend_request();
> >  break;
> >  default:
> >+if (sus_typ == s4) { /* S4 request */
> >+qemu_system_shutdown_request();
> >+}
> >  break;
> >  }
> >  }
> >diff --git a/hw/acpi.h b/hw/acpi.h
> >index fe8cdb4..7337f41 100644
> >--- a/hw/acpi.h
> >+++ b/hw/acpi.h
> >@@ -139,7 +139,7 @@ void acpi_pm1_evt_reset(ACPIREGS *ar);
> >
> >  /* PM1a_CNT: piix and ich9 don't implement PM1b CNT. */
> >  void acpi_pm1_cnt_init(ACPIREGS *ar);
> >-void acpi_pm1_cnt_write(ACPIREGS *ar, uint16_t val);
> >+void acpi_pm1_cnt_write(ACPIREGS *ar, uint16_t val, char s4);
> >  void acpi_pm1_cnt_update(ACPIREGS *ar,
> >   bool sci_enable, bool sci_disable);
> >  void acpi_pm1_cnt_reset(ACPIREGS *ar);
> >diff --git a/hw/acpi_piix4.c b/hw/acpi_piix4.c
> >index 0345490..812e62f 100644
> >--- a/hw/acpi_piix4.c
> >+++ b/hw/acpi_piix4.c
> >@@ -27,6 +27,7 @@
> >  #include "sysemu.h"
> >  #include "range.h"
> >  #include "ioport.h"
> >+#include "fw_cfg.h"
> >
> >  //#define DEBUG
> >
> >@@ -71,6 +72,10 @@ typedef struct PIIX4PMState {
> >  struct pci_status pci0_status;
> >  uint32_t pci0_hotplug_enable;
> >  uint32_t pci0_slot_device_present;
> >+
> >+uint8_t disable_s3;
> >+uint8_t disable_s4;
> >+uint8_t s4_val;
> >  } PIIX4PMState;
> >
> >  static void piix4_acpi_system_hot_add_init(PCIBus *bus, PIIX4PMState *s);
> >@@ -123,7 +128,7 @@ static void pm_ioport_write(IORange *ioport, uint64_t 
> >addr, unsigned width,
> >  pm_update_sci(s);
> >  break;
> >  case 0x04:
> >-acpi_pm1_cnt_write(&s->ar, val);
> >+acpi_pm1_cnt_write(&s->ar, val, s->s4_val);
> >  break;
> >  default:
> >  break;
> >@@ -422,7 +427,7 @@ static int piix4_pm_initfn(PCIDevice *dev)
> >
> >  i2c_bus *piix4_pm_init(PCIBus *bus, int devfn, uint32_t smb_io_base,
> > qemu_irq sci_irq, qemu_irq smi_irq,
> >-   int kvm_enabled)
> >+   int kvm_enabled, void *fw_cfg)
> >  {
> >  PCIDevice *dev;
> >  PIIX4PMState *s;
> >@@ -438,11 +443,22 @@ i2c_bus *piix4_pm_init(PCIBus *bus, int devfn, 
> >uint32_t smb_io_base,
> >
> >  qdev_init_nofail(&dev->qdev);
> >
> >+if (fw_cfg) {
> >+uint8_t suspend[6] = {128, 0, 0, 129, 128, 128};
>  
> Wouldn't it better to use symbolic names here?
> 
> >+suspend[3] = 1 | ((!s->disable_s3) << 7);
> >+suspend[4] = s->s4_val | ((!s->disable_s4) << 7);
> >+
> >+fw_cfg_add_file(fw_cfg, "etc/system-states", g_memdup(suspend, 6), 
> >6);
> >+}
> >+
> >  return s->smb.smbus;
> >  }
> >
> >  static Property piix4_pm_properties[] = {
> >  DEFINE_PROP_UINT32("smb_io_base", PIIX4PMState, smb_io_base, 0),
> >+DEFINE_PROP_UINT8("disable_s3", PIIX4PMState, d

[Qemu-devel] [Bug 712416] Re: kvm_intel kernel module crash with via nano vmx

2012-06-19 Thread khetzal
It's impossible to run any command because the system is frozen

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/712416

Title:
  kvm_intel kernel module crash with via nano vmx

Status in QEMU:
  New
Status in “linux” package in Ubuntu:
  Incomplete
Status in “kvm” package in Debian:
  New

Bug description:
  kvm module for hardware virtualisation not work properly on via nano
  processors.

  Tested with processor: VIA Nano processor U2250.
  Processors flags (visible in /proc/cpuinfo): fpu vme de pse tsc msr pae mce 
cx8 apic sep mtrr pge mca cmov pat clflush acpi mmx fxsr sse sse2 ss tm syscall 
nx lm constant_tsc up rep_good pni monitor vmx est tm2 ssse3 cx16 xtpr rng 
rng_en ace ace_en ace2 phe phe_en lahf_lm

  With kernel 2.6.32: kvm not work and dmesg contains a lot of:
  handle_exception: unexpected, vectoring info 0x800d intr info 0x8b0d

  With kernel 2.6.35: all the system crash. Nothing visible in logs

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/712416/+subscriptions



Re: [Qemu-devel] [PATCH 00/31] PPC: mpc8544ds: Create device tree dynamically

2012-06-19 Thread Alexander Graf

On 07.06.2012, at 23:09, Blue Swirl wrote:

> On Tue, Jun 5, 2012 at 11:52 PM, Alexander Graf  wrote:
>> Today we have two separate places where we keep information which device
>> is where:
>> 
>>  - hw/ppce500_mpc8544ds.c to instantiate all devices
>>  - pc-bios/mpc8544ds.dtb as device tree to tell the guest about devices
>> 
>> Every time we split crucial information, things can go terribly wrong. If
>> you update one file, but not the other, you can screw things up without
>> realizing it quickly.
>> 
>> The redundancy is also unnecessary, because QEMU already knows all the
>> information at which addresses its devices live. So we can generate the
>> device tree from the same variables - and even have the device tree adjust
>> if something changes in there.
>> 
>> The one functionality we lose with this approach is the ability to manually
>> patch the device tree to contain additional devices. To still be able to do
>> so easily, we introduce a new option -machine dumpdtb= that creates a
>> dtb output file which can be used with -machine dtb= later. In between
>> these 2 executions of QEMU, the dtb can be modified however much you like.
>> 
>> A lot of bits in this patch set are still hardcoded. We also don't accomodate
>> for dynamic creation of device tree nodes when -device is used. This requires
>> a bit more QOM'ification for us to be able to loop through all devices, so we
>> can dynamically create the device tree nodes for them. The basic concept 
>> should
>> still hold as is though.
>> 
>> 
>> Alex
> 
> Please use snprintf() instead of sprintf().

Oh how much I would love to be able to just call asprintf() and call it a day 
:).


Alex




Re: [Qemu-devel] [PATCH v2 1/2] arm_boot: Assume Linux boot flow when -dtb given

2012-06-19 Thread Peter Maydell
On 18 June 2012 02:35, Peter A. G. Crosthwaite
 wrote:
> If the user boots with a -dtb assume the Linux boot flow, even when handling 
> an
> elf.

We don't do this for -initrd, why should we do it for -dtb ?

-- PMM



Re: [Qemu-devel] [PATCH v2 2/2] arm_boot: Conditionalised DTB command line update

2012-06-19 Thread Peter Maydell
On 18 June 2012 02:35, Peter A. G. Crosthwaite
 wrote:
> The DTB command line should only be overwritten if the user provides a command
> line with -apend. Otherwise whatever command line was in the DTB should stay
> unchanged.
>
> Signed-off-by: Peter A. G. Crosthwaite 
> ---
> changed since v1:
> checked cmd line string in binfo rather than machine opt

Yep, this looks nicer and matches how we handle it in the ATAGS code
path.

Reviewed-by: Peter Maydell 

-- PMM



Re: [Qemu-devel] Adding errno to QMP errors

2012-06-19 Thread Luiz Capitulino
On Mon, 18 Jun 2012 13:31:52 -0500
Anthony Liguori  wrote:

> >> Are any users of QMP actually asking for this kind of advanced
> >> error reporting ?  From libvirt's POV we're perfectly content
> >> with just an error class&  string.
> >
> > Real users, please, not theoretical ones.
> 
> Irrespective of anything else, I think it's safe to say the experiment of 
> "rich 
> errors" has been a failure.  

Yes, I fully agree.

> We still have way too many places using error_report.
> 
> As I mentioned in another thread, I think we should:
> 
> 1) Introduce a GENERIC_ERROR QError type.  It could have a 'domain' and a 
> 'msg' 
> field.
> 
> 2) Focus on converting users of error_report over to use propagated Error 
> objects.

I agree with this too and the conversion itself can mostly be automated
I think. However, I think this is a related, but different problem (more below).

> We shouldn't/can't change existing QError users.  We also shouldn't consider 
> changing the wire protocol.  But for new error users, we should/can relax the 
> reported errors.

Can we agree on what 'relax' actually means?

In the very beginning of QMP, Markus had an idea of making errors absurdly
simple iirc it was just three general classes and a message (am I right,
Markus)?

Daniel seems to suggest something along these lines too. However, in my
understanding we're going to have two kinds of errors:

 1. OS errors: system calls or library functions errors. They will look
like this:

 { "error": "OpenFileFailed", "filename": "/tmp/foo",
   "os-error": "nospace" }

This means that, for every system call we're going to have a FOOFailed.
Not sure this is reasonable.

 2. Anything else that doesn't fall in item 2, iow command specific errors,
like InvalidBlockFormat.

Is this we really want to have? This is an honest question.

Btw, I think we first have to decide what we really want and afterwards we
discuss compatibility. I'm not saying we'll break it, but we might be able
to move forward and still maintain compatibility depending on what we want.

> We need a clear support policy on whether the contents of 'msg' are stable or 
> not too.

It's already declared on the qmp spec as not stable:

- The "desc" member is a human-readable error message. Clients should
  not attempt to parse this message.

Also, the qmp-commands.txt is very strong on error compatibility:

3. Errors, in special, are not documented. Applications should NOT check
   for specific errors classes or data (it's strongly recommended to only
   check for the "error" key)

This is a bit unrealistic today though, as this was written when we were
still unsure about QMP's future and errors are getting documented in the
schema anyway.



Re: [Qemu-devel] Any better way to access CPUArchState in vl.c?

2012-06-19 Thread Andreas Färber
Am 19.06.2012 14:09, schrieb Peter Crosthwaite:
> Andreas, will an attribute((destructor)) work ? Cos if it does you can
> put your printf pretty much anwhere rather than vl.c

Yes, it might. main() only seems to call cpus.c:pause_all_vcpus(), so
neither first_cpu nor the CPU(Arch)State would get cleaned up. But then
again all my comments are based on qemu.git master whereas Wei-Ren is
working on 0.14.x IIRC.

Andreas

-- 
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg



Re: [Qemu-devel] [PATCH] Add missing check for host_from_stream_offset return value for RAM_SAVE_FLAG_PAGE

2012-06-19 Thread Juan Quintela
Orit Wasserman  wrote:
> Signed-off-by: Orit Wasserman 

Reviewed-by: Juan Quintela  



[Qemu-devel] [PATCH 16/16] arm_boot: Conditionalised DTB command line update

2012-06-19 Thread Peter Maydell
From: Peter A. G. Crosthwaite 

The DTB command line should only be overwritten if the user provides a command
line with -append. Otherwise whatever command line was in the DTB should stay
unchanged.

Signed-off-by: Peter A. G. Crosthwaite 
Signed-off-by: Peter Maydell 
---
 hw/arm_boot.c |   10 ++
 1 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/hw/arm_boot.c b/hw/arm_boot.c
index d0e643b..a1e6ddb 100644
--- a/hw/arm_boot.c
+++ b/hw/arm_boot.c
@@ -242,10 +242,12 @@ static int load_dtb(target_phys_addr_t addr, const struct 
arm_boot_info *binfo)
 fprintf(stderr, "couldn't set /memory/reg\n");
 }
 
-rc = qemu_devtree_setprop_string(fdt, "/chosen", "bootargs",
-  binfo->kernel_cmdline);
-if (rc < 0) {
-fprintf(stderr, "couldn't set /chosen/bootargs\n");
+if (binfo->kernel_cmdline && *binfo->kernel_cmdline) {
+rc = qemu_devtree_setprop_string(fdt, "/chosen", "bootargs",
+  binfo->kernel_cmdline);
+if (rc < 0) {
+fprintf(stderr, "couldn't set /chosen/bootargs\n");
+}
 }
 
 if (binfo->initrd_size) {
-- 
1.7.1




[Qemu-devel] [Bug 712416] Missing required logs.

2012-06-19 Thread Brad Figg
This bug is missing log files that will aid in diagnosing the problem.
>From a terminal window please run:

apport-collect 712416

and then change the status of the bug to 'Confirmed'.

If, due to the nature of the issue you have encountered, you are unable
to run this command, please add a comment stating that fact and change
the bug status to 'Confirmed'.

This change has been made by an automated script, maintained by the
Ubuntu Kernel Team.

** Changed in: linux (Ubuntu)
   Status: New => Incomplete

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/712416

Title:
  kvm_intel kernel module crash with via nano vmx

Status in QEMU:
  New
Status in “linux” package in Ubuntu:
  Incomplete
Status in “kvm” package in Debian:
  New

Bug description:
  kvm module for hardware virtualisation not work properly on via nano
  processors.

  Tested with processor: VIA Nano processor U2250.
  Processors flags (visible in /proc/cpuinfo): fpu vme de pse tsc msr pae mce 
cx8 apic sep mtrr pge mca cmov pat clflush acpi mmx fxsr sse sse2 ss tm syscall 
nx lm constant_tsc up rep_good pni monitor vmx est tm2 ssse3 cx16 xtpr rng 
rng_en ace ace_en ace2 phe phe_en lahf_lm

  With kernel 2.6.32: kvm not work and dmesg contains a lot of:
  handle_exception: unexpected, vectoring info 0x800d intr info 0x8b0d

  With kernel 2.6.35: all the system crash. Nothing visible in logs

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/712416/+subscriptions



Re: [Qemu-devel] [PATCH 07/13] usb: Convert usb_packet_{map, unmap} to universal DMA helpers

2012-06-19 Thread Gerd Hoffmann
  Hi,

> Note that usb_packet_map() invokes dma_memory_map() with a NULL invalidate
> callback function.  When IOMMU support is added, this will mean that
> usb_packet_map() and the corresponding usb_packet_unmap() must be called in
> close proximity without dropping the qemu device lock

Well, that isn't guaranteed ...

> - otherwise the guest
> might invalidate IOMMU mappings while they are still in use by the device
> code.

Guest tearing down mapping while usb packets using them are still in
flight would be a guest bug.  Still not impossible to happen though. How
is this case supposed to be handled?

cheers,
  Gerd



Re: [Qemu-devel] KVM call agenda for Tuesday, June 19th

2012-06-19 Thread Juan Quintela
Juan Quintela  wrote:
> Hi
>
> Please send in any agenda items you are interested in covering.
>
> Anthony suggested for last week:
> - multithreading vhost (and general vhost improvements)
>
> I suggest:
> - status of migration: post-copy, IDL, XBRLE, huge memory, ...
>   Will send an email with an status before tomorrow call.

XBRLE: v12 is coming today or so.


This three patches should be a no-brainer (just refactoring code).
1st one is shared with postcopy.

[PATCH v11 1/9] Add MigrationParams structure
[PATCH v11 5/9] Add uleb encoding/decoding functions
[PATCH v11 6/9] Add save_block_hdr function

This ones can be be the ones that we can discuss.

[PATCH v11 2/9] Add migration capabilites
[PATCH v11 3/9] Add XBZRLE documentation
[PATCH v11 4/9] Add cache handling functions
[PATCH v11 7/9] Add XBZRLE to ram_save_block and ram_save_live
[PATCH v11 8/9] Add set_cachesize command

Postcopy:  This is just refactoring that can be integrated.

[PATCH v2 01/41] arch_init: export sort_ram_list() and ram_save_block()
[PATCH v2 02/41] arch_init: export RAM_SAVE_xxx flags for postcopy
[PATCH v2 03/41] arch_init/ram_save: introduce constant for ram save version = 4
[PATCH v2 04/41] arch_init: refactor host_from_stream_offset()
[PATCH v2 05/41] arch_init/ram_save_live: factor out RAM_SAVE_FLAG_MEM_SIZE case
[PATCH v2 06/41] arch_init: refactor ram_save_block()
[PATCH v2 07/41] arch_init/ram_save_live: factor out ram_save_limit
[PATCH v2 08/41] arch_init/ram_load: refactor ram_load
[PATCH v2 09/41] arch_init: introduce helper function to find ram block with id 
string
[PATCH v2 10/41] arch_init: simplify a bit by ram_find_block()
[PATCH v2 11/41] arch_init: factor out counting transferred bytes
[PATCH v2 12/41] arch_init: factor out setting last_block, last_offset
[PATCH v2 13/41] exec.c: factor out qemu_get_ram_ptr()
[PATCH v2 14/41] exec.c: export last_ram_offset()
[PATCH v2 15/41] savevm: export qemu_peek_buffer, qemu_peek_byte, qemu_file_skip
[PATCH v2 16/41] savevm: qemu_pending_size() to return pending buffered size
[PATCH v2 17/41] savevm, buffered_file: introduce method to drain buffer of 
buffered file
[PATCH v2 18/41] QEMUFile: add qemu_file_fd() for later use
[PATCH v2 19/41] savevm/QEMUFile: drop qemu_stdio_fd
[PATCH v2 20/41] savevm/QEMUFileSocket: drop duplicated member fd
[PATCH v2 21/41] savevm: rename QEMUFileSocket to QEMUFileFD, socket_close to 
fd_close
[PATCH v2 22/41] savevm/QEMUFile: introduce qemu_fopen_fd
[PATCH v2 23/41] migration.c: remove redundant line in migrate_init()
[PATCH v2 24/41] migration: export migrate_fd_completed() and 
migrate_fd_cleanup()
[PATCH v2 25/41] migration: factor out parameters into MigrationParams
[PATCH v2 26/41] buffered_file: factor out buffer management logic
[PATCH v2 27/41] buffered_file: Introduce QEMUFileNonblock for nonblock write
[PATCH v2 28/41] buffered_file: add qemu_file to read/write to buffer in memory

This is postcopy properly.  From this one, postcopy needs to be the
things addressed on previous review, and from there probably (at least)
another review.  Thing to have in account is that the umem (or whatever
you want to call it), should be able to work over RDMA.  Anyone that
knows anything about RDMA to comment on this?

[PATCH v2 29/41] umem.h: import Linux umem.h
[PATCH v2 30/41] update-linux-headers.sh: teach umem.h to 
update-linux-headers.sh
[PATCH v2 31/41] configure: add CONFIG_POSTCOPY option
[PATCH v2 32/41] savevm: add new section that is used by postcopy
[PATCH v2 33/41] postcopy: introduce -postcopy and -postcopy-flags option
[PATCH v2 34/41] postcopy outgoing: add -p and -n option to migrate command
[PATCH v2 35/41] postcopy: introduce helper functions for postcopy
[PATCH v2 36/41] postcopy: implement incoming part of postcopy live migration
[PATCH v2 37/41] postcopy: implement outgoing part of postcopy live migration
[PATCH v2 38/41] postcopy/outgoing: add forward, backward option to specify the 
size of prefault
[PATCH v2 39/41] postcopy/outgoing: implement prefault
[PATCH v2 40/41] migrate: add -m (movebg) option to migrate command
[PATCH v2 41/41] migration/postcopy: add movebg mode

Huge memory migration.
This ones should be trivial, and integrated.

[PATCH 1/7] Add spent time for migration
[PATCH 2/7] Add tracepoints for savevm section start/end
[PATCH 3/7] No need to iterate if we already are over the limit
[PATCH 4/7] Only TCG needs TLB handling
[PATCH 5/7] Only calculate expected_time for stage 2

This one is also trivial, but Anthony on previous reviews wanted to have
migration-thread before we integrated this one.

[PATCH 6/7] Exit loop if we have been there too long

This one, Anthony wanted a different approach improving bitmap
handling.  Not done yet.

[PATCH 7/7] Maintaing number of dirty pages

IDL patchset.  I am not against generating the VMState information, but
I am trying to understand how the patch works.  Notice that I don't grok
Python, this is is one of the reasos it is taking long.

This was one of the 

[Qemu-devel] [PATCH 14/16] cadence_gem: avoid stack-writing buffer-overrun

2012-06-19 Thread Peter Maydell
From: Jim Meyering 

Use sizeof(rxbuf)-size (not sizeof(rxbuf-size)) as the number
of bytes to clear.  The latter would always clear 4 or 8
bytes, possibly writing beyond the end of that stack buffer.
Alternatively, depending on the value of the "size" parameter,
it could fail to initialize the end of "rxbuf".
Spotted by coverity.

Signed-off-by: Jim Meyering 
Reviewed-by: Peter A.G. Crosthwaite 
Signed-off-by: Peter Maydell 
---
 hw/cadence_gem.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/hw/cadence_gem.c b/hw/cadence_gem.c
index e2140ae..dbde392 100644
--- a/hw/cadence_gem.c
+++ b/hw/cadence_gem.c
@@ -664,7 +664,7 @@ static ssize_t gem_receive(VLANClientState *nc, const 
uint8_t *buf, size_t size)
  */
 
 memcpy(rxbuf, buf, size);
-memset(rxbuf + size, 0, sizeof(rxbuf - size));
+memset(rxbuf + size, 0, sizeof(rxbuf) - size);
 rxbuf_ptr = rxbuf;
 crc_val = cpu_to_le32(crc32(0, rxbuf, MAX(size, 60)));
 if (size < 60) {
-- 
1.7.1




[Qemu-devel] [PATCH 07/16] hw/arm_gic: Add qdev property for GIC revision

2012-06-19 Thread Peter Maydell
GIC behaviour can be different between revision 1 and
2 of the architectural GIC specification; we also have
to handle the legacy 11MPCore GIC, which is different
again in some places. Introduce a qdev property so we
can behave appropriately.

Signed-off-by: Peter Maydell 
---
 hw/a15mpcore.c   |1 +
 hw/arm11mpcore.c |2 ++
 hw/arm_gic.c |   10 ++
 hw/armv7m_nvic.c |2 ++
 4 files changed, 15 insertions(+), 0 deletions(-)

diff --git a/hw/a15mpcore.c b/hw/a15mpcore.c
index 5a7b365..fc0a02a 100644
--- a/hw/a15mpcore.c
+++ b/hw/a15mpcore.c
@@ -44,6 +44,7 @@ static int a15mp_priv_init(SysBusDevice *dev)
 s->gic = qdev_create(NULL, "arm_gic");
 qdev_prop_set_uint32(s->gic, "num-cpu", s->num_cpu);
 qdev_prop_set_uint32(s->gic, "num-irq", s->num_irq);
+qdev_prop_set_uint32(s->gic, "revision", 2);
 qdev_init_nofail(s->gic);
 busdev = sysbus_from_qdev(s->gic);
 
diff --git a/hw/arm11mpcore.c b/hw/arm11mpcore.c
index c528d7a..1bff3d3 100644
--- a/hw/arm11mpcore.c
+++ b/hw/arm11mpcore.c
@@ -123,6 +123,8 @@ static int mpcore_priv_init(SysBusDevice *dev)
 s->gic = qdev_create(NULL, "arm_gic");
 qdev_prop_set_uint32(s->gic, "num-cpu", s->num_cpu);
 qdev_prop_set_uint32(s->gic, "num-irq", s->num_irq);
+/* Request the legacy 11MPCore GIC behaviour: */
+qdev_prop_set_uint32(s->gic, "revision", 0);
 qdev_init_nofail(s->gic);
 
 /* Pass through outbound IRQ lines from the GIC */
diff --git a/hw/arm_gic.c b/hw/arm_gic.c
index 2ec10ce..ad72ac6 100644
--- a/hw/arm_gic.c
+++ b/hw/arm_gic.c
@@ -119,8 +119,13 @@ typedef struct gic_state
 struct gic_state *backref[NCPU];
 MemoryRegion cpuiomem[NCPU+1]; /* CPU interfaces */
 uint32_t num_irq;
+uint32_t revision;
 } gic_state;
 
+/* The special cases for the revision property: */
+#define REV_11MPCORE 0
+#define REV_NVIC 0x
+
 static inline int gic_get_current_cpu(gic_state *s)
 {
 if (s->num_cpu > 1) {
@@ -880,6 +885,11 @@ static int arm_gic_init(SysBusDevice *dev)
 static Property arm_gic_properties[] = {
 DEFINE_PROP_UINT32("num-cpu", gic_state, num_cpu, 1),
 DEFINE_PROP_UINT32("num-irq", gic_state, num_irq, 32),
+/* Revision can be 1 or 2 for GIC architecture specification
+ * versions 1 or 2, or 0 to indicate the legacy 11MPCore GIC.
+ * (Internally, 0x also indicates "not a GIC but an NVIC".)
+ */
+DEFINE_PROP_UINT32("revision", gic_state, revision, 1),
 DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/armv7m_nvic.c b/hw/armv7m_nvic.c
index 747e245..4c130f1 100644
--- a/hw/armv7m_nvic.c
+++ b/hw/armv7m_nvic.c
@@ -447,6 +447,8 @@ static int armv7m_nvic_init(SysBusDevice *dev)
 
 /* The NVIC always has only one CPU */
 s->gic.num_cpu = 1;
+/* Tell the common code we're an NVIC */
+s->gic.revision = 0x;
 gic_init(&s->gic, s->num_irq);
 /* The NVIC and system controller register area looks like this:
  *  0..0xff : system control registers, including systick
-- 
1.7.1




[Qemu-devel] [PATCH 01/16] ARM: Exynos4210 IRQ: Introduce new IRQ gate functionality.

2012-06-19 Thread Peter Maydell
From: Evgeny Voevodin 

New IRQ gate consists of n_in input qdev gpio lines and one
output sysbus IRQ line. The output IRQ level is formed as OR
between all gpio inputs.

Signed-off-by: Evgeny Voevodin 
Signed-off-by: Peter Maydell 
---
 hw/exynos4210.c |   32 +++--
 hw/exynos4210.h |2 +-
 hw/exynos4210_gic.c |   78 +-
 3 files changed, 57 insertions(+), 55 deletions(-)

diff --git a/hw/exynos4210.c b/hw/exynos4210.c
index dd14d01..9c20b3f 100644
--- a/hw/exynos4210.c
+++ b/hw/exynos4210.c
@@ -97,11 +97,11 @@ void exynos4210_write_secondary(ARMCPU *cpu,
 Exynos4210State *exynos4210_init(MemoryRegion *system_mem,
 unsigned long ram_size)
 {
-qemu_irq cpu_irq[4];
-int n;
+qemu_irq cpu_irq[EXYNOS4210_NCPUS];
+int i, n;
 Exynos4210State *s = g_new(Exynos4210State, 1);
 qemu_irq *irqp;
-qemu_irq gate_irq[EXYNOS4210_IRQ_GATE_NINPUTS];
+qemu_irq gate_irq[EXYNOS4210_NCPUS][EXYNOS4210_IRQ_GATE_NINPUTS];
 unsigned long mem_size;
 DeviceState *dev;
 SysBusDevice *busdev;
@@ -128,16 +128,18 @@ Exynos4210State *exynos4210_init(MemoryRegion *system_mem,
 s->irq_table = exynos4210_init_irq(&s->irqs);
 
 /* IRQ Gate */
-dev = qdev_create(NULL, "exynos4210.irq_gate");
-qdev_init_nofail(dev);
-/* Get IRQ Gate input in gate_irq */
-for (n = 0; n < EXYNOS4210_IRQ_GATE_NINPUTS; n++) {
-gate_irq[n] = qdev_get_gpio_in(dev, n);
-}
-busdev = sysbus_from_qdev(dev);
-/* Connect IRQ Gate output to cpu_irq */
-for (n = 0; n < EXYNOS4210_NCPUS; n++) {
-sysbus_connect_irq(busdev, n, cpu_irq[n]);
+for (i = 0; i < EXYNOS4210_NCPUS; i++) {
+dev = qdev_create(NULL, "exynos4210.irq_gate");
+qdev_prop_set_uint32(dev, "n_in", EXYNOS4210_IRQ_GATE_NINPUTS);
+qdev_init_nofail(dev);
+/* Get IRQ Gate input in gate_irq */
+for (n = 0; n < EXYNOS4210_IRQ_GATE_NINPUTS; n++) {
+gate_irq[i][n] = qdev_get_gpio_in(dev, n);
+}
+busdev = sysbus_from_qdev(dev);
+
+/* Connect IRQ Gate output to cpu_irq */
+sysbus_connect_irq(busdev, 0, cpu_irq[i]);
 }
 
 /* Private memory region and Internal GIC */
@@ -147,7 +149,7 @@ Exynos4210State *exynos4210_init(MemoryRegion *system_mem,
 busdev = sysbus_from_qdev(dev);
 sysbus_mmio_map(busdev, 0, EXYNOS4210_SMP_PRIVATE_BASE_ADDR);
 for (n = 0; n < EXYNOS4210_NCPUS; n++) {
-sysbus_connect_irq(busdev, n, gate_irq[n * 2]);
+sysbus_connect_irq(busdev, n, gate_irq[n][0]);
 }
 for (n = 0; n < EXYNOS4210_INT_GIC_NIRQ; n++) {
 s->irqs.int_gic_irq[n] = qdev_get_gpio_in(dev, n);
@@ -166,7 +168,7 @@ Exynos4210State *exynos4210_init(MemoryRegion *system_mem,
 /* Map Distributer interface */
 sysbus_mmio_map(busdev, 1, EXYNOS4210_EXT_GIC_DIST_BASE_ADDR);
 for (n = 0; n < EXYNOS4210_NCPUS; n++) {
-sysbus_connect_irq(busdev, n, gate_irq[n * 2 + 1]);
+sysbus_connect_irq(busdev, n, gate_irq[n][1]);
 }
 for (n = 0; n < EXYNOS4210_EXT_GIC_NIRQ; n++) {
 s->irqs.ext_gic_irq[n] = qdev_get_gpio_in(dev, n);
diff --git a/hw/exynos4210.h b/hw/exynos4210.h
index b1b4609..9b1ae4c 100644
--- a/hw/exynos4210.h
+++ b/hw/exynos4210.h
@@ -56,7 +56,7 @@
 /*
  * exynos4210 IRQ subsystem stub definitions.
  */
-#define EXYNOS4210_IRQ_GATE_NINPUTS 8
+#define EXYNOS4210_IRQ_GATE_NINPUTS 2 /* Internal and External GIC */
 
 #define EXYNOS4210_MAX_INT_COMBINER_OUT_IRQ  64
 #define EXYNOS4210_MAX_EXT_COMBINER_OUT_IRQ  16
diff --git a/hw/exynos4210_gic.c b/hw/exynos4210_gic.c
index e1b215e..7d03dd9 100644
--- a/hw/exynos4210_gic.c
+++ b/hw/exynos4210_gic.c
@@ -362,61 +362,64 @@ static void exynos4210_gic_register_types(void)
 
 type_init(exynos4210_gic_register_types)
 
-/*
- * IRQGate struct.
- * IRQ Gate represents OR gate between GICs to pass IRQ to PIC.
+/* IRQ OR Gate struct.
+ *
+ * This device models an OR gate. There are n_in input qdev gpio lines and one
+ * output sysbus IRQ line. The output IRQ level is formed as OR between all
+ * gpio inputs.
  */
 typedef struct {
 SysBusDevice busdev;
 
-qemu_irq pic_irq[EXYNOS4210_NCPUS]; /* output IRQs to PICs */
-uint32_t gpio_level[EXYNOS4210_IRQ_GATE_NINPUTS]; /* Input levels */
+uint32_t n_in;  /* inputs amount */
+uint32_t *level;/* input levels */
+qemu_irq out;   /* output IRQ */
 } Exynos4210IRQGateState;
 
+static Property exynos4210_irq_gate_properties[] = {
+DEFINE_PROP_UINT32("n_in", Exynos4210IRQGateState, n_in, 1),
+DEFINE_PROP_END_OF_LIST(),
+};
+
 static const VMStateDescription vmstate_exynos4210_irq_gate = {
 .name = "exynos4210.irq_gate",
-.version_id = 1,
-.minimum_version_id = 1,
-.minimum_version_id_old = 1,
+.version_id = 2,
+.minimum_version_id = 2,
+.minimum_version_id_old = 2,
 .fields = (VMStateField[]) {
-VMSTATE_UINT32_ARRAY(gpio_level, Ex

[Qemu-devel] [PATCH 06/16] hw/armv7m_nvic: Use MemoryRegions for NVIC specific registers

2012-06-19 Thread Peter Maydell
Implement the NVIC specific register areas using a set of
overlaid MemoryRegions in a container, rather than by having
the arm_gic read/write functions use special purpose callbacks.

Signed-off-by: Peter Maydell 
---
 hw/arm_gic.c |   33 ---
 hw/armv7m_nvic.c |   74 +-
 2 files changed, 79 insertions(+), 28 deletions(-)

diff --git a/hw/arm_gic.c b/hw/arm_gic.c
index 3293ae4..2ec10ce 100644
--- a/hw/arm_gic.c
+++ b/hw/arm_gic.c
@@ -37,17 +37,17 @@ do { printf("arm_gic: " fmt , ## __VA_ARGS__); } while (0)
 #endif
 
 #ifdef NVIC
-static const uint8_t gic_id[] =
-{ 0x00, 0xb0, 0x1b, 0x00, 0x0d, 0xe0, 0x05, 0xb1 };
 /* The NVIC has 16 internal vectors.  However these are not exposed
through the normal GIC interface.  */
 #define GIC_BASE_IRQ32
 #else
-static const uint8_t gic_id[] =
-{ 0x90, 0x13, 0x04, 0x00, 0x0d, 0xf0, 0x05, 0xb1 };
 #define GIC_BASE_IRQ0
 #endif
 
+static const uint8_t gic_id[] = {
+0x90, 0x13, 0x04, 0x00, 0x0d, 0xf0, 0x05, 0xb1
+};
+
 #define FROM_SYSBUSGIC(type, dev) \
 DO_UPCAST(type, gic, FROM_SYSBUS(gic_state, dev))
 
@@ -312,7 +312,6 @@ static uint32_t gic_dist_readb(void *opaque, 
target_phys_addr_t offset)
 cpu = gic_get_current_cpu(s);
 cm = 1 << cpu;
 if (offset < 0x100) {
-#ifndef NVIC
 if (offset == 0)
 return s->enabled;
 if (offset == 4)
@@ -323,7 +322,6 @@ static uint32_t gic_dist_readb(void *opaque, 
target_phys_addr_t offset)
 /* Interrupt Security , RAZ/WI */
 return 0;
 }
-#endif
 goto bad_reg;
 } else if (offset < 0x200) {
 /* Interrupt Set/Clear Enable.  */
@@ -385,6 +383,7 @@ static uint32_t gic_dist_readb(void *opaque, 
target_phys_addr_t offset)
 } else {
 res = GIC_TARGET(irq);
 }
+#endif
 } else if (offset < 0xf00) {
 /* Interrupt Configuration.  */
 irq = (offset - 0xc00) * 2 + GIC_BASE_IRQ;
@@ -397,7 +396,6 @@ static uint32_t gic_dist_readb(void *opaque, 
target_phys_addr_t offset)
 if (GIC_TEST_TRIGGER(irq + i))
 res |= (2 << (i * 2));
 }
-#endif
 } else if (offset < 0xfe0) {
 goto bad_reg;
 } else /* offset >= 0xfe0 */ {
@@ -424,13 +422,6 @@ static uint32_t gic_dist_readw(void *opaque, 
target_phys_addr_t offset)
 static uint32_t gic_dist_readl(void *opaque, target_phys_addr_t offset)
 {
 uint32_t val;
-#ifdef NVIC
-gic_state *s = (gic_state *)opaque;
-uint32_t addr;
-addr = offset;
-if (addr < 0x100 || addr > 0xd00)
-return nvic_readl(s, addr);
-#endif
 val = gic_dist_readw(opaque, offset);
 val |= gic_dist_readw(opaque, offset + 2) << 16;
 return val;
@@ -446,9 +437,6 @@ static void gic_dist_writeb(void *opaque, 
target_phys_addr_t offset,
 
 cpu = gic_get_current_cpu(s);
 if (offset < 0x100) {
-#ifdef NVIC
-goto bad_reg;
-#else
 if (offset == 0) {
 s->enabled = (value & 1);
 DPRINTF("Distribution %sabled\n", s->enabled ? "En" : "Dis");
@@ -459,7 +447,6 @@ static void gic_dist_writeb(void *opaque, 
target_phys_addr_t offset,
 } else {
 goto bad_reg;
 }
-#endif
 } else if (offset < 0x180) {
 /* Interrupt Set Enable.  */
 irq = (offset - 0x100) * 8 + GIC_BASE_IRQ;
@@ -552,6 +539,7 @@ static void gic_dist_writeb(void *opaque, 
target_phys_addr_t offset,
 else if (irq < GIC_INTERNAL)
 value = ALL_CPU_MASK;
 s->irq_target[irq] = value & ALL_CPU_MASK;
+#endif
 } else if (offset < 0xf00) {
 /* Interrupt Configuration.  */
 irq = (offset - 0xc00) * 4 + GIC_BASE_IRQ;
@@ -571,7 +559,6 @@ static void gic_dist_writeb(void *opaque, 
target_phys_addr_t offset,
 GIC_CLEAR_TRIGGER(irq + i);
 }
 }
-#endif
 } else {
 /* 0xf00 is only handled for 32-bit writes.  */
 goto bad_reg;
@@ -593,14 +580,6 @@ static void gic_dist_writel(void *opaque, 
target_phys_addr_t offset,
 uint32_t value)
 {
 gic_state *s = (gic_state *)opaque;
-#ifdef NVIC
-uint32_t addr;
-addr = offset;
-if (addr < 0x100 || (addr > 0xd00 && addr != 0xf00)) {
-nvic_writel(s, addr, value);
-return;
-}
-#endif
 if (offset == 0xf00) {
 int cpu;
 int irq;
diff --git a/hw/armv7m_nvic.c b/hw/armv7m_nvic.c
index 653c011..747e245 100644
--- a/hw/armv7m_nvic.c
+++ b/hw/armv7m_nvic.c
@@ -30,9 +30,16 @@ typedef struct {
 int64_t tick;
 QEMUTimer *timer;
 } systick;
+MemoryRegion sysregmem;
+MemoryRegion gic_iomem_alias;
+MemoryRegion container;
 uint32_t num_irq;
 } nvic_state;
 
+static const uint8_t nvic_id[] = {
+0x00, 0xb0, 0x1b, 0x00, 0x0d, 0xe0, 0x05, 0xb1
+};
+
 /* qemu timers run at 1GHz.   We want something closer to 1MHz.  */
 #define SYSTICK_SCALE 1000ULL
 
@@ -358,12 +36

[Qemu-devel] [PATCH 05/16] hw/arm_gic: Move NVIC specific reset to armv7m_nvic_reset

2012-06-19 Thread Peter Maydell
Move the NVIC specific bits of reset to the NVIC's own
reset function, rather than using ifdefs in the common
arm_gic reset.

Signed-off-by: Peter Maydell 
---
 hw/arm_gic.c |   10 --
 hw/armv7m_nvic.c |7 +++
 2 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/hw/arm_gic.c b/hw/arm_gic.c
index 2d8ceb8..3293ae4 100644
--- a/hw/arm_gic.c
+++ b/hw/arm_gic.c
@@ -743,23 +743,13 @@ static void gic_reset(DeviceState *dev)
 s->current_pending[i] = 1023;
 s->running_irq[i] = 1023;
 s->running_priority[i] = 0x100;
-#ifdef NVIC
-/* The NVIC doesn't have per-cpu interfaces, so enable by default.  */
-s->cpu_enabled[i] = 1;
-#else
 s->cpu_enabled[i] = 0;
-#endif
 }
 for (i = 0; i < 16; i++) {
 GIC_SET_ENABLED(i, ALL_CPU_MASK);
 GIC_SET_TRIGGER(i);
 }
-#ifdef NVIC
-/* The NVIC is always enabled.  */
-s->enabled = 1;
-#else
 s->enabled = 0;
-#endif
 }
 
 static void gic_save(QEMUFile *f, void *opaque)
diff --git a/hw/armv7m_nvic.c b/hw/armv7m_nvic.c
index 99a87a2..653c011 100644
--- a/hw/armv7m_nvic.c
+++ b/hw/armv7m_nvic.c
@@ -382,6 +382,13 @@ static void armv7m_nvic_reset(DeviceState *dev)
 {
 nvic_state *s = FROM_SYSBUSGIC(nvic_state, sysbus_from_qdev(dev));
 gic_reset(&s->gic.busdev.qdev);
+/* Common GIC reset resets to disabled; the NVIC doesn't have
+ * per-CPU interfaces so mark our non-existent CPU interface
+ * as enabled by default.
+ */
+s->gic.cpu_enabled[0] = 1;
+/* The NVIC as a whole is always enabled. */
+s->gic.enabled = 1;
 systick_reset(s);
 }
 
-- 
1.7.1




[Qemu-devel] [PATCH 11/16] hw/armv7m_nvic: Make the NVIC a freestanding class

2012-06-19 Thread Peter Maydell
Rearrange the GIC and NVIC so both are straightforward
subclasses of a common class, rather than having the NVIC
source file textually include arm_gic.c.

Signed-off-by: Peter Maydell 
---
 hw/arm/Makefile.objs  |2 +-
 hw/arm_gic.c  |  241 +++--
 hw/arm_gic_common.c   |  184 +
 hw/arm_gic_internal.h |  136 
 hw/armv7m_nvic.c  |   48 ---
 5 files changed, 371 insertions(+), 240 deletions(-)
 create mode 100644 hw/arm_gic_common.c
 create mode 100644 hw/arm_gic_internal.h

diff --git a/hw/arm/Makefile.objs b/hw/arm/Makefile.objs
index a0ff6a6..88ff47d 100644
--- a/hw/arm/Makefile.objs
+++ b/hw/arm/Makefile.objs
@@ -6,7 +6,7 @@ obj-y += cadence_uart.o
 obj-y += cadence_ttc.o
 obj-y += cadence_gem.o
 obj-y += xilinx_zynq.o zynq_slcr.o
-obj-y += arm_gic.o
+obj-y += arm_gic.o arm_gic_common.o
 obj-y += realview_gic.o realview.o arm_sysctl.o arm11mpcore.o a9mpcore.o
 obj-y += exynos4210_gic.o exynos4210_combiner.o exynos4210.o
 obj-y += exynos4_boards.o exynos4210_uart.o exynos4210_pwm.o
diff --git a/hw/arm_gic.c b/hw/arm_gic.c
index ad5ab3c..ec22322 100644
--- a/hw/arm_gic.c
+++ b/hw/arm_gic.c
@@ -19,13 +19,7 @@
  */
 
 #include "sysbus.h"
-
-/* Maximum number of possible interrupts, determined by the GIC architecture */
-#define GIC_MAXIRQ 1020
-/* First 32 are private to each CPU (SGIs and PPIs). */
-#define GIC_INTERNAL 32
-/* Maximum number of possible CPU interfaces, determined by GIC architecture */
-#define NCPU 8
+#include "arm_gic_internal.h"
 
 //#define DEBUG_GIC
 
@@ -36,88 +30,12 @@ do { printf("arm_gic: " fmt , ## __VA_ARGS__); } while (0)
 #define DPRINTF(fmt, ...) do {} while(0)
 #endif
 
-/* The NVIC has 16 internal vectors.  However these are not exposed
-   through the normal GIC interface.  */
-#define GIC_BASE_IRQ ((s->revision == REV_NVIC) ? 32 : 0)
-
 static const uint8_t gic_id[] = {
 0x90, 0x13, 0x04, 0x00, 0x0d, 0xf0, 0x05, 0xb1
 };
 
-#define FROM_SYSBUSGIC(type, dev) \
-DO_UPCAST(type, gic, FROM_SYSBUS(gic_state, dev))
-
-typedef struct gic_irq_state
-{
-/* The enable bits are only banked for per-cpu interrupts.  */
-unsigned enabled:NCPU;
-unsigned pending:NCPU;
-unsigned active:NCPU;
-unsigned level:NCPU;
-unsigned model:1; /* 0 = N:N, 1 = 1:N */
-unsigned trigger:1; /* nonzero = edge triggered.  */
-} gic_irq_state;
-
-#define ALL_CPU_MASK ((unsigned)(((1 << NCPU) - 1)))
 #define NUM_CPU(s) ((s)->num_cpu)
 
-#define GIC_SET_ENABLED(irq, cm) s->irq_state[irq].enabled |= (cm)
-#define GIC_CLEAR_ENABLED(irq, cm) s->irq_state[irq].enabled &= ~(cm)
-#define GIC_TEST_ENABLED(irq, cm) ((s->irq_state[irq].enabled & (cm)) != 0)
-#define GIC_SET_PENDING(irq, cm) s->irq_state[irq].pending |= (cm)
-#define GIC_CLEAR_PENDING(irq, cm) s->irq_state[irq].pending &= ~(cm)
-#define GIC_TEST_PENDING(irq, cm) ((s->irq_state[irq].pending & (cm)) != 0)
-#define GIC_SET_ACTIVE(irq, cm) s->irq_state[irq].active |= (cm)
-#define GIC_CLEAR_ACTIVE(irq, cm) s->irq_state[irq].active &= ~(cm)
-#define GIC_TEST_ACTIVE(irq, cm) ((s->irq_state[irq].active & (cm)) != 0)
-#define GIC_SET_MODEL(irq) s->irq_state[irq].model = 1
-#define GIC_CLEAR_MODEL(irq) s->irq_state[irq].model = 0
-#define GIC_TEST_MODEL(irq) s->irq_state[irq].model
-#define GIC_SET_LEVEL(irq, cm) s->irq_state[irq].level = (cm)
-#define GIC_CLEAR_LEVEL(irq, cm) s->irq_state[irq].level &= ~(cm)
-#define GIC_TEST_LEVEL(irq, cm) ((s->irq_state[irq].level & (cm)) != 0)
-#define GIC_SET_TRIGGER(irq) s->irq_state[irq].trigger = 1
-#define GIC_CLEAR_TRIGGER(irq) s->irq_state[irq].trigger = 0
-#define GIC_TEST_TRIGGER(irq) s->irq_state[irq].trigger
-#define GIC_GET_PRIORITY(irq, cpu) (((irq) < GIC_INTERNAL) ?\
-s->priority1[irq][cpu] :\
-s->priority2[(irq) - GIC_INTERNAL])
-#define GIC_TARGET(irq) s->irq_target[irq]
-
-typedef struct gic_state
-{
-SysBusDevice busdev;
-qemu_irq parent_irq[NCPU];
-int enabled;
-int cpu_enabled[NCPU];
-
-gic_irq_state irq_state[GIC_MAXIRQ];
-int irq_target[GIC_MAXIRQ];
-int priority1[GIC_INTERNAL][NCPU];
-int priority2[GIC_MAXIRQ - GIC_INTERNAL];
-int last_active[GIC_MAXIRQ][NCPU];
-
-int priority_mask[NCPU];
-int running_irq[NCPU];
-int running_priority[NCPU];
-int current_pending[NCPU];
-
-uint32_t num_cpu;
-
-MemoryRegion iomem; /* Distributor */
-/* This is just so we can have an opaque pointer which identifies
- * both this GIC and which CPU interface we should be accessing.
- */
-struct gic_state *backref[NCPU];
-MemoryRegion cpuiomem[NCPU+1]; /* CPU interfaces */
-uint32_t num_irq;
-uint32_t revision;
-} gic_state;
-
-/* The special cases for the revision property: */
-#define REV_11MPCORE 0
-#define REV_NVIC 0x
-
 static inline int gic_get_current_cpu(gic_state *

Re: [Qemu-devel] Cant Upload tests to Wiki

2012-06-19 Thread Anthony Liguori

On 06/19/2012 01:18 AM, Peter Crosthwaite wrote:

Hi,

I'm trying to upload my little endian Microblaze test vector to the
wiki, but the file upload restrictions are for images only and file
size<  2MB. Can we relax these so I can upload my test vectors?


Not really.  There are two issues: 1) making sure we comply with appropriate 
licenses when shipping binaries 2) making sure the binaries aren't carrying 
malicious payloads.


Is this source code for these tests in qemu.git?  If the source is already in 
qemu.git, why not include the binaries in qemu.git too?


How large are the files?

Regards,

Anthony Liguori



Regards,
Peter






Re: [Qemu-devel] [PATCH v3 3/5] osdep: Enable qemu_open to dup pre-opened fd

2012-06-19 Thread Corey Bryant



On 06/18/2012 04:10 AM, Kevin Wolf wrote:

Am 15.06.2012 22:00, schrieb Eric Blake:

On 06/15/2012 01:19 PM, Corey Bryant wrote:


There are some flags that I don't think we'll be able to change.  For
example: O_RDONLY, O_WRONLY, O_RDWR.  I assume libvirt would open all
files O_RDWR.


I think we need to check all of them and fail qemu_open() if they don't
match. Those that qemu can change, should be just changed, of course.



Ok.  I remember a scenario where QEMU opens a file read-only (perhaps to
check headers and determine the file format) before re-opening it
read-write.  Perhaps this is only when format= isn't specified with
-drive.  I'm thinking we may need to change flags to read-write where
they used to be read-only, in some circumstances.


In those situations, libvirt would pass fd with O_RDWR, and qemu_open()
would be fine requesting O_RDONLY the first time (subset is okay), and
O_RDWR the second time.  Where you have to error out is where libvirt
passes O_RDONLY but qemu wants O_RDWR, and so forth.


Let's try it with requiring an exact match first. If you pass the
format, I think the probing is completely avoided indeed, and having
read-only images really opened O_RDONLY protects against stupid mistakes.

Or if we really need to open the file for probing, maybe we could add a
flag that relaxes the check and that isn't used in the real bdrv_open().

Kevin



I haven't heard any objection to this so I'll be checking for exact 
match, and implementing a flag to relax the check only if it's necessary.


--
Regards,
Corey





Re: [Qemu-devel] KVM call agenda for Tuesday, June 19th

2012-06-19 Thread Anthony Liguori

On 06/19/2012 08:54 AM, Juan Quintela wrote:

Juan Quintela  wrote:

Hi

Please send in any agenda items you are interested in covering.

Anthony suggested for last week:
- multithreading vhost (and general vhost improvements)

I suggest:
- status of migration: post-copy, IDL, XBRLE, huge memory, ...
   Will send an email with an status before tomorrow call.


XBRLE: v12 is coming today or so.


This three patches should be a no-brainer (just refactoring code).
1st one is shared with postcopy.

[PATCH v11 1/9] Add MigrationParams structure
[PATCH v11 5/9] Add uleb encoding/decoding functions
[PATCH v11 6/9] Add save_block_hdr function

This ones can be be the ones that we can discuss.

[PATCH v11 2/9] Add migration capabilites
[PATCH v11 3/9] Add XBZRLE documentation
[PATCH v11 4/9] Add cache handling functions
[PATCH v11 7/9] Add XBZRLE to ram_save_block and ram_save_live
[PATCH v11 8/9] Add set_cachesize command

Postcopy:  This is just refactoring that can be integrated.

[PATCH v2 01/41] arch_init: export sort_ram_list() and ram_save_block()
[PATCH v2 02/41] arch_init: export RAM_SAVE_xxx flags for postcopy
[PATCH v2 03/41] arch_init/ram_save: introduce constant for ram save version = 4
[PATCH v2 04/41] arch_init: refactor host_from_stream_offset()
[PATCH v2 05/41] arch_init/ram_save_live: factor out RAM_SAVE_FLAG_MEM_SIZE case
[PATCH v2 06/41] arch_init: refactor ram_save_block()
[PATCH v2 07/41] arch_init/ram_save_live: factor out ram_save_limit
[PATCH v2 08/41] arch_init/ram_load: refactor ram_load
[PATCH v2 09/41] arch_init: introduce helper function to find ram block with id 
string
[PATCH v2 10/41] arch_init: simplify a bit by ram_find_block()
[PATCH v2 11/41] arch_init: factor out counting transferred bytes
[PATCH v2 12/41] arch_init: factor out setting last_block, last_offset
[PATCH v2 13/41] exec.c: factor out qemu_get_ram_ptr()
[PATCH v2 14/41] exec.c: export last_ram_offset()
[PATCH v2 15/41] savevm: export qemu_peek_buffer, qemu_peek_byte, qemu_file_skip
[PATCH v2 16/41] savevm: qemu_pending_size() to return pending buffered size
[PATCH v2 17/41] savevm, buffered_file: introduce method to drain buffer of 
buffered file
[PATCH v2 18/41] QEMUFile: add qemu_file_fd() for later use
[PATCH v2 19/41] savevm/QEMUFile: drop qemu_stdio_fd
[PATCH v2 20/41] savevm/QEMUFileSocket: drop duplicated member fd
[PATCH v2 21/41] savevm: rename QEMUFileSocket to QEMUFileFD, socket_close to 
fd_close
[PATCH v2 22/41] savevm/QEMUFile: introduce qemu_fopen_fd
[PATCH v2 23/41] migration.c: remove redundant line in migrate_init()
[PATCH v2 24/41] migration: export migrate_fd_completed() and 
migrate_fd_cleanup()
[PATCH v2 25/41] migration: factor out parameters into MigrationParams
[PATCH v2 26/41] buffered_file: factor out buffer management logic
[PATCH v2 27/41] buffered_file: Introduce QEMUFileNonblock for nonblock write
[PATCH v2 28/41] buffered_file: add qemu_file to read/write to buffer in memory

This is postcopy properly.  From this one, postcopy needs to be the
things addressed on previous review, and from there probably (at least)
another review.  Thing to have in account is that the umem (or whatever
you want to call it), should be able to work over RDMA.  Anyone that
knows anything about RDMA to comment on this?

[PATCH v2 29/41] umem.h: import Linux umem.h
[PATCH v2 30/41] update-linux-headers.sh: teach umem.h to 
update-linux-headers.sh
[PATCH v2 31/41] configure: add CONFIG_POSTCOPY option
[PATCH v2 32/41] savevm: add new section that is used by postcopy
[PATCH v2 33/41] postcopy: introduce -postcopy and -postcopy-flags option
[PATCH v2 34/41] postcopy outgoing: add -p and -n option to migrate command
[PATCH v2 35/41] postcopy: introduce helper functions for postcopy
[PATCH v2 36/41] postcopy: implement incoming part of postcopy live migration
[PATCH v2 37/41] postcopy: implement outgoing part of postcopy live migration
[PATCH v2 38/41] postcopy/outgoing: add forward, backward option to specify the 
size of prefault
[PATCH v2 39/41] postcopy/outgoing: implement prefault
[PATCH v2 40/41] migrate: add -m (movebg) option to migrate command
[PATCH v2 41/41] migration/postcopy: add movebg mode


I'm not at all convinced that postcopy is a good idea.  There needs a clear 
expression of what the value proposition is that's backed by benchmarks.  Those 
benchmarks need to include latency measurements of downtime which so far, I've 
not seen.


I don't want to take any postcopy patches until this discussion happens.

Regards,

Anthony Liguori



Huge memory migration.
This ones should be trivial, and integrated.

[PATCH 1/7] Add spent time for migration
[PATCH 2/7] Add tracepoints for savevm section start/end
[PATCH 3/7] No need to iterate if we already are over the limit
[PATCH 4/7] Only TCG needs TLB handling
[PATCH 5/7] Only calculate expected_time for stage 2

This one is also trivial, but Anthony on previous reviews wanted to have
migration-thread before we integrated this one.

[PATCH 6/7] Exit loop i

Re: [Qemu-devel] [PATCH 05/31] dt: add helper for phandle enumeration

2012-06-19 Thread Alexander Graf

On 09.06.2012, at 03:02, Peter Crosthwaite wrote:

> On Fri, Jun 8, 2012 at 10:46 PM, Alexander Graf  wrote:
>> 
>> On 07.06.2012, at 02:28, Peter Crosthwaite wrote:
>> 
>>> On Thu, Jun 7, 2012 at 1:58 AM, Alexander Graf  wrote:
 On 06/06/2012 07:11 AM, Peter Crosthwaite wrote:
> 
> On Wed, 2012-06-06 at 01:52 +0200, Alexander Graf wrote:
>> 
>> This patch adds a helper to search for a node's phandle by its path. This
>> is especially useful when the phandle is part of an array, not just a
>> single
>> cell in which case qemu_devtree_setprop_phandle would be the easy choice.
>> 
>> Signed-off-by: Alexander Graf
>> ---
>>  device_tree.c |   16 +++-
>>  device_tree.h |1 +
>>  2 files changed, 16 insertions(+), 1 deletions(-)
>> 
>> diff --git a/device_tree.c b/device_tree.c
>> index 6cbc5af..6745d17 100644
>> --- a/device_tree.c
>> +++ b/device_tree.c
>> @@ -162,10 +162,24 @@ int qemu_devtree_setprop_string(void *fdt, const
>> char *node_path,
>>  return r;
>>  }
>> 
>> +uint32_t qemu_devtree_get_phandle(void *fdt, const char *path)
>> +{
>> +uint32_t r;
>> +
>> +r = fdt_get_phandle(fdt, findnode_nofail(fdt, path));
>> +if (r<= 0) {
>> +fprintf(stderr, "%s: Couldn't get phandle for %s: %s\n",
>> __func__,
>> +path, fdt_strerror(r));
>> +exit(1);
> 
> Is it really this functions job to terminate qemu on fail?  There may be
> scenarios where a node does not have a phandle where the client can
> handle that. Perhaps return -1 on error and the client has to check?
 
 
 If it can, what's the point in not calling libfdt directly then?
 
>>> 
>>> Its a very good question. If the point of this function is to fail of
>>> error though, perhaps it should have the _nofail suffix for clarity?
>> 
>> If we do a global s/qemu_devtree_/qdt/g throughout the code base, I'd be 
>> open to add _nofail to all function names at the end :). Otherwise we'll get 
>> into even more trouble of staying within 80 characters per line...
>> 
> 
> Since the majority of those functions are wrappers around "fdt_" API
> calls, perhaps it should be:
> 
> s/qemu_devtree_/qemu_fdt_/g
> 
> buys you 4 chars, which should minimise the incidence of 80 char
> violations when adding _nofail suffixes. Do we have a large number of
> lines already between 78-80 chars?

Hrm. Let's keep this in mind for a later cleanup series. It certainly is out of 
scope of this patch set :).


Alex




[Qemu-devel] [PATCH 15/16] cadence_ttc: changed master clock frequency

2012-06-19 Thread Peter Maydell
From: Peter A. G. Crosthwaite 

Change the timer clock frequency to 133MHz which is correct. the old 2.5MHz
value was for the pre-silicon emulation platform.

Signed-off-by: Peter A. G. Crosthwaite 
Signed-off-by: Peter Maydell 
---
 hw/cadence_ttc.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/hw/cadence_ttc.c b/hw/cadence_ttc.c
index 2b5477b..dd02f86 100644
--- a/hw/cadence_ttc.c
+++ b/hw/cadence_ttc.c
@@ -405,7 +405,7 @@ static int cadence_ttc_init(SysBusDevice *dev)
 int i;
 
 for (i = 0; i < 3; ++i) {
-cadence_timer_init(250, &s->timer[i]);
+cadence_timer_init(13300, &s->timer[i]);
 sysbus_init_irq(dev, &s->timer[i].irq);
 }
 
-- 
1.7.1




Re: [Qemu-devel] vm state save/restore question

2012-06-19 Thread Alexander Graf

On 09.06.2012, at 13:34, Benjamin Herrenschmidt wrote:

> On Sat, 2012-06-09 at 20:53 +1000, Benjamin Herrenschmidt wrote:
>> Hi folks !
> 
> (After some discussion with Andreas ...)
> 
>> I'm looking at sorting out the state save/restore of target-ppc (which
>> means understanding in general how it works in qemu :-)
>> 
>> So far I've somewhat figured out that there's the "old way" where we
>> just provide a "bulk" save/restore function pair, and the "new way"
>> where we have nicely tagged field lists etc...
>> 
>> x86 seems to use the later for the CPU state, ppc is a mess and uses the
>> former with interesting incompatible format change depending on how qemu
>> is build :-) So I think that's one area I need to fix.
> 
> Ok, so I'm told there are patches to convert ppc, I haven't seen them in
> my list archives, so if somebody has a pointer, please shoot, that will
> save me some work :-)
> 
>  .../...
> 
>> What I'd need is something in spapr that can be used to "resync" bits of
>> the cpu state with the external htab that gets run after everything is
>> loaded and before emulation restarts.
>> 
>> Any idea how to do that properly ? I suppose I could also try to iterate
>> all the vcpu's after loading the hash table & update the fields but not
>> only that's gross ... I also don't know how to do it :-)
> 
> So I did an experiment using the "old style" save/restore (bad boy !)
> and got that part to work by just iterating the vcpu's.
> 
> It's a bit nasty but it's the right way I think, ie, what we have here
> (the external hash table) is a global object under control/ownership of
> the platform code for which a pointer is cached in the CPU state (so the
> mmu emulation gets to it easily), so those cached pointers need to be
> updated in all CPUs when a new hash table is loaded/allocated.
> 
> That leads to another question however... I need to add save/restore to
> a bunch more stuff such as the xics (interrupt controller), the various
> spapr devices, etc...
> 
> So far the VMState stuff is all nice if you have fixed sized arrays.
> However I haven't quite found out the right way to use it for things
> like:
> 
> - The hash table (mentioned above). This is just a big chunk of memory
> (it will routinely be 16M), so I really don't want to start iterating
> all elements, just a bulk load will do, and the size might actually be
> variable.
> 
> - The XICS (interrupt controller). The actual size of the interrupt
> state array can vary (the number of interrupt sources can vary, it's
> fixed today by the machine code but I wouldn't rely too much on that and
> in any case, from the XICS driver perspective, it's not a constant, it's
> a variable it gets passed when initializing).
> 
> So in both these cases, I need either code to control the save/load
> process (old style ? hard to hook into vmstate as far as I can tell) or
> maybe a way to describe the array so that the array size itself is a
> pointer to a variable (Andreas mentioned something along those lines).
> Is there any doco for that stuff btw ? I haven't seen anything
> detailed...

I'm sure Juan knows more there :)


Alex




[Qemu-devel] [PULL 00/16] arm-devs queue

2012-06-19 Thread Peter Maydell
Hi; this is an arm-devs pullreq which mostly has patches which I've
had queued since before freeze and my holiday. (I had to make a
trivial fix to one of the GIC patches to account for the list of
object files moving from Makefile.target to hw/arm/Makefile.objs
but otherwise an unproblematic rebase to current master.)
I know there are other arm devs patches on my to-review list but
this seems a long enough set of patches to be worth sending a
pullreq for now.

Please pull.

-- PMM

The following changes since commit 8aca521512a14c439624191bd0a891c52f91b401:

  Merge remote-tracking branch 'afaerber-or/qom-next-2' into staging 
(2012-06-18 10:35:16 -0500)

are available in the git repository at:

  git://git.linaro.org/people/pmaydell/qemu-arm.git arm-devs.for-upstream

Andreas Färber (1):
  arm_boot: Fix typos in comment

Evgeny Voevodin (1):
  ARM: Exynos4210 IRQ: Introduce new IRQ gate functionality.

Jim Meyering (1):
  cadence_gem: avoid stack-writing buffer-overrun

Peter A. G. Crosthwaite (2):
  cadence_ttc: changed master clock frequency
  arm_boot: Conditionalised DTB command line update

Peter Maydell (11):
  hw/arm_gic: Remove NVIC ifdefs from gic_state struct
  hw/arm_gic: Remove the special casing of NCPU for the NVIC
  hw/arm_gic: Move NVIC specific reset to armv7m_nvic_reset
  hw/armv7m_nvic: Use MemoryRegions for NVIC specific registers
  hw/arm_gic: Add qdev property for GIC revision
  hw/arm_gic: Make CPU target registers RAZ/WI on uniprocessor
  hw/arm_gic.c: Make NVIC interrupt numbering a runtime setting
  hw/arm_gic: Move CPU interface memory region setup into arm_gic_init
  hw/armv7m_nvic: Make the NVIC a freestanding class
  hw/omap.h: Drop broken MEM_VERBOSE tracing
  hw/a9mpcore: Fix compilation failure if physaddrs are 64 bit

 hw/a15mpcore.c|1 +
 hw/a9mpcore.c |2 +-
 hw/arm-misc.h |4 +-
 hw/arm/Makefile.objs  |2 +-
 hw/arm11mpcore.c  |2 +
 hw/arm_boot.c |   10 +-
 hw/arm_gic.c  |  366 -
 hw/arm_gic_common.c   |  184 +
 hw/arm_gic_internal.h |  136 ++
 hw/armv7m_nvic.c  |  138 ---
 hw/cadence_gem.c  |2 +-
 hw/cadence_ttc.c  |2 +-
 hw/exynos4210.c   |   32 +++--
 hw/exynos4210.h   |2 +-
 hw/exynos4210_gic.c   |   78 +-
 hw/omap.h |   95 -
 16 files changed, 569 insertions(+), 487 deletions(-)
 create mode 100644 hw/arm_gic_common.c
 create mode 100644 hw/arm_gic_internal.h



Re: [Qemu-devel] KVM call agenda for Tuesday, June 19th

2012-06-19 Thread Takuya Yoshikawa
On Tue, 19 Jun 2012 09:01:36 -0500
Anthony Liguori  wrote:

> I'm not at all convinced that postcopy is a good idea.  There needs a clear 
> expression of what the value proposition is that's backed by benchmarks.  
> Those 
> benchmarks need to include latency measurements of downtime which so far, 
> I've 
> not seen.
> 
> I don't want to take any postcopy patches until this discussion happens.

FWIW:

I rather see postcopy as a way of migrating guests forcibly and I know
a service in which such a way is needed: emergency migration.  There is
also a product which does live migration when some hardware problems are
detected (as a semi-FT solution) -- in such cases, we cannot wait until
the guest becomes calm.

Although I am not certain whether QEMU can be used for such products,
it may be worth thinking about.

Thanks,
Takuya



[Qemu-devel] [PATCH 09/16] hw/arm_gic.c: Make NVIC interrupt numbering a runtime setting

2012-06-19 Thread Peter Maydell
Make the minor tweaks to interrupt numbering used by the NVIC
a runtime setting rather than a compile time one, so we can
drop more NVIC ifdefs.

Signed-off-by: Peter Maydell 
---
 hw/arm_gic.c |   12 
 1 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/hw/arm_gic.c b/hw/arm_gic.c
index a6e2431..c288bc5 100644
--- a/hw/arm_gic.c
+++ b/hw/arm_gic.c
@@ -36,13 +36,9 @@ do { printf("arm_gic: " fmt , ## __VA_ARGS__); } while (0)
 #define DPRINTF(fmt, ...) do {} while(0)
 #endif
 
-#ifdef NVIC
 /* The NVIC has 16 internal vectors.  However these are not exposed
through the normal GIC interface.  */
-#define GIC_BASE_IRQ32
-#else
-#define GIC_BASE_IRQ0
-#endif
+#define GIC_BASE_IRQ ((s->revision == REV_NVIC) ? 32 : 0)
 
 static const uint8_t gic_id[] = {
 0x90, 0x13, 0x04, 0x00, 0x0d, 0xf0, 0x05, 0xb1
@@ -839,7 +835,6 @@ static void gic_init(gic_state *s, int num_irq)
 }
 
 i = s->num_irq - GIC_INTERNAL;
-#ifndef NVIC
 /* For the GIC, also expose incoming GPIO lines for PPIs for each CPU.
  * GPIO array layout is thus:
  *  [0..N-1] SPIs
@@ -847,8 +842,9 @@ static void gic_init(gic_state *s, int num_irq)
  *  [N+32..N+63] PPIs for CPU 1
  *   ...
  */
-i += (GIC_INTERNAL * s->num_cpu);
-#endif
+if (s->revision != REV_NVIC) {
+i += (GIC_INTERNAL * s->num_cpu);
+}
 qdev_init_gpio_in(&s->busdev.qdev, gic_set_irq, i);
 for (i = 0; i < NUM_CPU(s); i++) {
 sysbus_init_irq(&s->busdev, &s->parent_irq[i]);
-- 
1.7.1




[Qemu-devel] [PATCH 03/16] hw/arm_gic: Remove NVIC ifdefs from gic_state struct

2012-06-19 Thread Peter Maydell
Remove some NVIC ifdefs from the gic_state struct and its
state save/load functions. This means there are some fields
in it which are present for the NVIC but not used, but means
it always has the same layout and can be pulled out into a
common subclass.

Note that the addition of irq_target[] to the save/load
struct for the NVIC requires a vmstate version bump.

Signed-off-by: Peter Maydell 
Reviewed-by: Andreas Färber 
---
 hw/arm_gic.c |   15 +++
 1 files changed, 3 insertions(+), 12 deletions(-)

diff --git a/hw/arm_gic.c b/hw/arm_gic.c
index 72298b4..17b2eba 100644
--- a/hw/arm_gic.c
+++ b/hw/arm_gic.c
@@ -108,9 +108,7 @@ typedef struct gic_state
 int cpu_enabled[NCPU];
 
 gic_irq_state irq_state[GIC_MAXIRQ];
-#ifndef NVIC
 int irq_target[GIC_MAXIRQ];
-#endif
 int priority1[GIC_INTERNAL][NCPU];
 int priority2[GIC_MAXIRQ - GIC_INTERNAL];
 int last_active[GIC_MAXIRQ][NCPU];
@@ -120,18 +118,14 @@ typedef struct gic_state
 int running_priority[NCPU];
 int current_pending[NCPU];
 
-#if NCPU > 1
 uint32_t num_cpu;
-#endif
 
 MemoryRegion iomem; /* Distributor */
-#ifndef NVIC
 /* This is just so we can have an opaque pointer which identifies
  * both this GIC and which CPU interface we should be accessing.
  */
 struct gic_state *backref[NCPU];
 MemoryRegion cpuiomem[NCPU+1]; /* CPU interfaces */
-#endif
 uint32_t num_irq;
 } gic_state;
 
@@ -800,9 +794,7 @@ static void gic_save(QEMUFile *f, void *opaque)
 qemu_put_be32(f, s->priority2[i]);
 }
 for (i = 0; i < s->num_irq; i++) {
-#ifndef NVIC
 qemu_put_be32(f, s->irq_target[i]);
-#endif
 qemu_put_byte(f, s->irq_state[i].enabled);
 qemu_put_byte(f, s->irq_state[i].pending);
 qemu_put_byte(f, s->irq_state[i].active);
@@ -818,8 +810,9 @@ static int gic_load(QEMUFile *f, void *opaque, int 
version_id)
 int i;
 int j;
 
-if (version_id != 2)
+if (version_id != 3) {
 return -EINVAL;
+}
 
 s->enabled = qemu_get_be32(f);
 for (i = 0; i < NUM_CPU(s); i++) {
@@ -837,9 +830,7 @@ static int gic_load(QEMUFile *f, void *opaque, int 
version_id)
 s->priority2[i] = qemu_get_be32(f);
 }
 for (i = 0; i < s->num_irq; i++) {
-#ifndef NVIC
 s->irq_target[i] = qemu_get_be32(f);
-#endif
 s->irq_state[i].enabled = qemu_get_byte(f);
 s->irq_state[i].pending = qemu_get_byte(f);
 s->irq_state[i].active = qemu_get_byte(f);
@@ -914,7 +905,7 @@ static void gic_init(gic_state *s, int num_irq)
 }
 #endif
 
-register_savevm(NULL, "arm_gic", -1, 2, gic_save, gic_load, s);
+register_savevm(NULL, "arm_gic", -1, 3, gic_save, gic_load, s);
 }
 
 #ifndef NVIC
-- 
1.7.1




[Qemu-devel] [PATCH 12/16] hw/omap.h: Drop broken MEM_VERBOSE tracing

2012-06-19 Thread Peter Maydell
Remove the MEM_VERBOSE tracing option from omap.h. This worked by
intercepting cpu_register_io_memory() calls; it has been broken
since cpu_register_io_memory() was removed in favour of the
MemoryRegion API.

Signed-off-by: Peter Maydell 
---
 hw/omap.h |   95 -
 1 files changed, 0 insertions(+), 95 deletions(-)

diff --git a/hw/omap.h b/hw/omap.h
index 2819e5d..3d98941 100644
--- a/hw/omap.h
+++ b/hw/omap.h
@@ -998,7 +998,6 @@ enum {
 #define OMAP_GPIOSW_OUTPUT 0x0002
 
 # define TCMI_VERBOSE  1
-//# define MEM_VERBOSE 1
 
 # ifdef TCMI_VERBOSE
 #  define OMAP_8B_REG(paddr)   \
@@ -1018,98 +1017,4 @@ enum {
 
 # define OMAP_MPUI_REG_MASK0x07ff
 
-# ifdef MEM_VERBOSE
-struct io_fn {
-CPUReadMemoryFunc * const *mem_read;
-CPUWriteMemoryFunc * const *mem_write;
-void *opaque;
-int in;
-};
-
-static uint32_t io_readb(void *opaque, target_phys_addr_t addr)
-{
-struct io_fn *s = opaque;
-uint32_t ret;
-
-s->in ++;
-ret = s->mem_read[0](s->opaque, addr);
-s->in --;
-if (!s->in)
-fprintf(stderr, "%08x ---> %02x\n", (uint32_t) addr, ret);
-return ret;
-}
-static uint32_t io_readh(void *opaque, target_phys_addr_t addr)
-{
-struct io_fn *s = opaque;
-uint32_t ret;
-
-s->in ++;
-ret = s->mem_read[1](s->opaque, addr);
-s->in --;
-if (!s->in)
-fprintf(stderr, "%08x ---> %04x\n", (uint32_t) addr, ret);
-return ret;
-}
-static uint32_t io_readw(void *opaque, target_phys_addr_t addr)
-{
-struct io_fn *s = opaque;
-uint32_t ret;
-
-s->in ++;
-ret = s->mem_read[2](s->opaque, addr);
-s->in --;
-if (!s->in)
-fprintf(stderr, "%08x ---> %08x\n", (uint32_t) addr, ret);
-return ret;
-}
-static void io_writeb(void *opaque, target_phys_addr_t addr, uint32_t value)
-{
-struct io_fn *s = opaque;
-
-if (!s->in)
-fprintf(stderr, "%08x <--- %02x\n", (uint32_t) addr, value);
-s->in ++;
-s->mem_write[0](s->opaque, addr, value);
-s->in --;
-}
-static void io_writeh(void *opaque, target_phys_addr_t addr, uint32_t value)
-{
-struct io_fn *s = opaque;
-
-if (!s->in)
-fprintf(stderr, "%08x <--- %04x\n", (uint32_t) addr, value);
-s->in ++;
-s->mem_write[1](s->opaque, addr, value);
-s->in --;
-}
-static void io_writew(void *opaque, target_phys_addr_t addr, uint32_t value)
-{
-struct io_fn *s = opaque;
-
-if (!s->in)
-fprintf(stderr, "%08x <--- %08x\n", (uint32_t) addr, value);
-s->in ++;
-s->mem_write[2](s->opaque, addr, value);
-s->in --;
-}
-
-static CPUReadMemoryFunc * const io_readfn[] = { io_readb, io_readh, io_readw, 
};
-static CPUWriteMemoryFunc * const io_writefn[] = { io_writeb, io_writeh, 
io_writew, };
-
-inline static int debug_register_io_memory(CPUReadMemoryFunc * const *mem_read,
-   CPUWriteMemoryFunc * const 
*mem_write,
-   void *opaque)
-{
-struct io_fn *s = g_malloc(sizeof(struct io_fn));
-
-s->mem_read = mem_read;
-s->mem_write = mem_write;
-s->opaque = opaque;
-s->in = 0;
-return cpu_register_io_memory(io_readfn, io_writefn, s,
-  DEVICE_NATIVE_ENDIAN);
-}
-#  define cpu_register_io_memory   debug_register_io_memory
-# endif
-
 #endif /* hw_omap_h */
-- 
1.7.1




[Qemu-devel] [PATCH 02/16] arm_boot: Fix typos in comment

2012-06-19 Thread Peter Maydell
From: Andreas Färber 

mimicing -> mimicking
thei -> the

Signed-off-by: Andreas Färber 
Reviewed-by: Stefan Weil 
Signed-off-by: Peter Maydell 
---
 hw/arm-misc.h |4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/arm-misc.h b/hw/arm-misc.h
index 1d51570..1f96229 100644
--- a/hw/arm-misc.h
+++ b/hw/arm-misc.h
@@ -45,9 +45,9 @@ struct arm_boot_info {
 /* multicore boards that use the default secondary core boot functions
  * can ignore these two function calls. If the default functions won't
  * work, then write_secondary_boot() should write a suitable blob of
- * code mimicing the secondary CPU startup process used by the board's
+ * code mimicking the secondary CPU startup process used by the board's
  * boot loader/boot ROM code, and secondary_cpu_reset_hook() should
- * perform any necessary CPU reset handling and set the PC for thei
+ * perform any necessary CPU reset handling and set the PC for the
  * secondary CPUs to point at this boot blob.
  */
 void (*write_secondary_boot)(ARMCPU *cpu,
-- 
1.7.1




[Qemu-devel] [PATCH] Makefile.hw: avoid overly large 'make clean' rm command

2012-06-19 Thread Peter Maydell
Avoid 'make clean' producing an 'rm' command which has a lot
of duplicate 'hw//*.o' arguments, by using $(sort $(dir ..))
rather than $(dir $(sort ..)) so Make's sort function will
remove the duplicates for us. We can also remove the double
'//' safely because $(dir ..) is guaranteed to return a string
ending in '/'.

Signed-off-by: Peter Maydell 
---
Mostly cosmetic, although I guess there's a faint chance that expanding
all those hw/*.o would hit the command line argument buffer limit...

 Makefile.hw |4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile.hw b/Makefile.hw
index 2bcbaff..28fe100 100644
--- a/Makefile.hw
+++ b/Makefile.hw
@@ -19,8 +19,8 @@ all: $(hw-obj-y)
@true
 
 clean:
-   rm -f $(addsuffix /*.o, $(dir $(sort $(hw-obj-y
-   rm -f $(addsuffix /*.d, $(dir $(sort $(hw-obj-y
+   rm -f $(addsuffix *.o, $(sort $(dir $(hw-obj-y
+   rm -f $(addsuffix *.d, $(sort $(dir $(hw-obj-y
 
 # Include automatically generated dependency files
 -include $(patsubst %.o, %.d, $(hw-obj-y))
-- 
1.7.1




[Qemu-devel] [PATCH 04/16] hw/arm_gic: Remove the special casing of NCPU for the NVIC

2012-06-19 Thread Peter Maydell
Drop the special casing of NCPU=1 for the NVIC. This slightly
increases the amount of memory used by its state structure,
but removes some ifdeffery and means we can safely move the
GIC state into a common subclass structure.

Signed-off-by: Peter Maydell 
---
 hw/arm_gic.c |   23 +++
 hw/armv7m_nvic.c |5 ++---
 2 files changed, 5 insertions(+), 23 deletions(-)

diff --git a/hw/arm_gic.c b/hw/arm_gic.c
index 17b2eba..2d8ceb8 100644
--- a/hw/arm_gic.c
+++ b/hw/arm_gic.c
@@ -25,11 +25,7 @@
 /* First 32 are private to each CPU (SGIs and PPIs). */
 #define GIC_INTERNAL 32
 /* Maximum number of possible CPU interfaces, determined by GIC architecture */
-#ifdef NVIC
-#define NCPU 1
-#else
 #define NCPU 8
-#endif
 
 //#define DEBUG_GIC
 
@@ -67,11 +63,7 @@ typedef struct gic_irq_state
 } gic_irq_state;
 
 #define ALL_CPU_MASK ((unsigned)(((1 << NCPU) - 1)))
-#if NCPU > 1
 #define NUM_CPU(s) ((s)->num_cpu)
-#else
-#define NUM_CPU(s) 1
-#endif
 
 #define GIC_SET_ENABLED(irq, cm) s->irq_state[irq].enabled |= (cm)
 #define GIC_CLEAR_ENABLED(irq, cm) s->irq_state[irq].enabled &= ~(cm)
@@ -131,11 +123,9 @@ typedef struct gic_state
 
 static inline int gic_get_current_cpu(gic_state *s)
 {
-#if NCPU > 1
 if (s->num_cpu > 1) {
 return cpu_single_env->cpu_index;
 }
-#endif
 return 0;
 }
 
@@ -842,21 +832,14 @@ static int gic_load(QEMUFile *f, void *opaque, int 
version_id)
 return 0;
 }
 
-#if NCPU > 1
-static void gic_init(gic_state *s, int num_cpu, int num_irq)
-#else
 static void gic_init(gic_state *s, int num_irq)
-#endif
 {
 int i;
 
-#if NCPU > 1
-s->num_cpu = num_cpu;
 if (s->num_cpu > NCPU) {
 hw_error("requested %u CPUs exceeds GIC maximum %d\n",
- num_cpu, NCPU);
+ s->num_cpu, NCPU);
 }
-#endif
 s->num_irq = num_irq + GIC_BASE_IRQ;
 if (s->num_irq > GIC_MAXIRQ) {
 hw_error("requested %u interrupt lines exceeds GIC maximum %d\n",
@@ -880,7 +863,7 @@ static void gic_init(gic_state *s, int num_irq)
  *  [N+32..N+63] PPIs for CPU 1
  *   ...
  */
-i += (GIC_INTERNAL * num_cpu);
+i += (GIC_INTERNAL * s->num_cpu);
 #endif
 qdev_init_gpio_in(&s->busdev.qdev, gic_set_irq, i);
 for (i = 0; i < NUM_CPU(s); i++) {
@@ -915,7 +898,7 @@ static int arm_gic_init(SysBusDevice *dev)
 /* Device instance init function for the GIC sysbus device */
 int i;
 gic_state *s = FROM_SYSBUS(gic_state, dev);
-gic_init(s, s->num_cpu, s->num_irq);
+gic_init(s, s->num_irq);
 /* Distributor */
 sysbus_init_mmio(dev, &s->iomem);
 /* cpu interfaces (one for "current cpu" plus one per cpu) */
diff --git a/hw/armv7m_nvic.c b/hw/armv7m_nvic.c
index 986a6bb..99a87a2 100644
--- a/hw/armv7m_nvic.c
+++ b/hw/armv7m_nvic.c
@@ -389,9 +389,8 @@ static int armv7m_nvic_init(SysBusDevice *dev)
 {
 nvic_state *s= FROM_SYSBUSGIC(nvic_state, dev);
 
-   /* note that for the M profile gic_init() takes the number of external
-* interrupt lines only.
-*/
+/* The NVIC always has only one CPU */
+s->gic.num_cpu = 1;
 gic_init(&s->gic, s->num_irq);
 memory_region_add_subregion(get_system_memory(), 0xe000e000, 
&s->gic.iomem);
 s->systick.timer = qemu_new_timer_ns(vm_clock, systick_timer_tick, s);
-- 
1.7.1




[Qemu-devel] [PATCH 08/16] hw/arm_gic: Make CPU target registers RAZ/WI on uniprocessor

2012-06-19 Thread Peter Maydell
The GIC spec says that the CPU target registers should RAZ/WI
for uniprocessor implementations. Implement this, which also
conveniently lets us drop an NVIC ifdef.

Annoyingly, the 11MPCore's GIC is the odd one out, since
it always has these registers, even in uniprocessor configs.

Signed-off-by: Peter Maydell 
---
 hw/arm_gic.c |   56 +---
 1 files changed, 33 insertions(+), 23 deletions(-)

diff --git a/hw/arm_gic.c b/hw/arm_gic.c
index ad72ac6..a6e2431 100644
--- a/hw/arm_gic.c
+++ b/hw/arm_gic.c
@@ -86,11 +86,7 @@ typedef struct gic_irq_state
 #define GIC_GET_PRIORITY(irq, cpu) (((irq) < GIC_INTERNAL) ?\
 s->priority1[irq][cpu] :\
 s->priority2[(irq) - GIC_INTERNAL])
-#ifdef NVIC
-#define GIC_TARGET(irq) 1
-#else
 #define GIC_TARGET(irq) s->irq_target[irq]
-#endif
 
 typedef struct gic_state
 {
@@ -377,18 +373,22 @@ static uint32_t gic_dist_readb(void *opaque, 
target_phys_addr_t offset)
 if (irq >= s->num_irq)
 goto bad_reg;
 res = GIC_GET_PRIORITY(irq, cpu);
-#ifndef NVIC
 } else if (offset < 0xc00) {
 /* Interrupt CPU Target.  */
-irq = (offset - 0x800) + GIC_BASE_IRQ;
-if (irq >= s->num_irq)
-goto bad_reg;
-if (irq >= 29 && irq <= 31) {
-res = cm;
+if (s->num_cpu == 1 && s->revision != REV_11MPCORE) {
+/* For uniprocessor GICs these RAZ/WI */
+res = 0;
 } else {
-res = GIC_TARGET(irq);
+irq = (offset - 0x800) + GIC_BASE_IRQ;
+if (irq >= s->num_irq) {
+goto bad_reg;
+}
+if (irq >= 29 && irq <= 31) {
+res = cm;
+} else {
+res = GIC_TARGET(irq);
+}
 }
-#endif
 } else if (offset < 0xf00) {
 /* Interrupt Configuration.  */
 irq = (offset - 0xc00) * 2 + GIC_BASE_IRQ;
@@ -533,18 +533,22 @@ static void gic_dist_writeb(void *opaque, 
target_phys_addr_t offset,
 } else {
 s->priority2[irq - GIC_INTERNAL] = value;
 }
-#ifndef NVIC
 } else if (offset < 0xc00) {
-/* Interrupt CPU Target.  */
-irq = (offset - 0x800) + GIC_BASE_IRQ;
-if (irq >= s->num_irq)
-goto bad_reg;
-if (irq < 29)
-value = 0;
-else if (irq < GIC_INTERNAL)
-value = ALL_CPU_MASK;
-s->irq_target[irq] = value & ALL_CPU_MASK;
-#endif
+/* Interrupt CPU Target. RAZ/WI on uniprocessor GICs, with the
+ * annoying exception of the 11MPCore's GIC.
+ */
+if (s->num_cpu != 1 || s->revision == REV_11MPCORE) {
+irq = (offset - 0x800) + GIC_BASE_IRQ;
+if (irq >= s->num_irq) {
+goto bad_reg;
+}
+if (irq < 29) {
+value = 0;
+} else if (irq < GIC_INTERNAL) {
+value = ALL_CPU_MASK;
+}
+s->irq_target[irq] = value & ALL_CPU_MASK;
+}
 } else if (offset < 0xf00) {
 /* Interrupt Configuration.  */
 irq = (offset - 0xc00) * 4 + GIC_BASE_IRQ;
@@ -733,6 +737,12 @@ static void gic_reset(DeviceState *dev)
 GIC_SET_ENABLED(i, ALL_CPU_MASK);
 GIC_SET_TRIGGER(i);
 }
+if (s->num_cpu == 1) {
+/* For uniprocessor GICs all interrupts always target the sole CPU */
+for (i = 0; i < GIC_MAXIRQ; i++) {
+s->irq_target[i] = 1;
+}
+}
 s->enabled = 0;
 }
 
-- 
1.7.1




Re: [Qemu-devel] vm state save/restore question

2012-06-19 Thread Juan Quintela
Alexander Graf  wrote:
> On 09.06.2012, at 13:34, Benjamin Herrenschmidt wrote:
>
>> On Sat, 2012-06-09 at 20:53 +1000, Benjamin Herrenschmidt wrote:
>>> Hi folks !
>> 
>> (After some discussion with Andreas ...)
>> 
>>> I'm looking at sorting out the state save/restore of target-ppc (which
>>> means understanding in general how it works in qemu :-)
>>> 
>>> So far I've somewhat figured out that there's the "old way" where we
>>> just provide a "bulk" save/restore function pair, and the "new way"
>>> where we have nicely tagged field lists etc...
>>> 
>>> x86 seems to use the later for the CPU state, ppc is a mess and uses the
>>> former with interesting incompatible format change depending on how qemu
>>> is build :-) So I think that's one area I need to fix.
>> 
>> Ok, so I'm told there are patches to convert ppc, I haven't seen them in
>> my list archives, so if somebody has a pointer, please shoot, that will
>> save me some work :-)

I can send a new version tomorrow.

>>> What I'd need is something in spapr that can be used to "resync" bits of
>>> the cpu state with the external htab that gets run after everything is
>>> loaded and before emulation restarts.
>>> 
>>> Any idea how to do that properly ? I suppose I could also try to iterate
>>> all the vcpu's after loading the hash table & update the fields but not
>>> only that's gross ... I also don't know how to do it :-)
>> 
>> So I did an experiment using the "old style" save/restore (bad boy !)
>> and got that part to work by just iterating the vcpu's.
>> 
>> It's a bit nasty but it's the right way I think, ie, what we have here
>> (the external hash table) is a global object under control/ownership of
>> the platform code for which a pointer is cached in the CPU state (so the
>> mmu emulation gets to it easily), so those cached pointers need to be
>> updated in all CPUs when a new hash table is loaded/allocated.
>> 
>> That leads to another question however... I need to add save/restore to
>> a bunch more stuff such as the xics (interrupt controller), the various
>> spapr devices, etc...
>> 
>> So far the VMState stuff is all nice if you have fixed sized arrays.
>> However I haven't quite found out the right way to use it for things
>> like:
>> 
>> - The hash table (mentioned above). This is just a big chunk of memory
>> (it will routinely be 16M), so I really don't want to start iterating
>> all elements, just a bulk load will do, and the size might actually be
>> variable.

This is going to kill migration download time.  With current setup, we
just sent something like 1-2MB in stage 3 (i.e. after the machine is
down).  Default downtime is 30ms, And 16MB is going to take around 1s on
gigabit ethenet.

Once said that, if you told me the state that you want to sent, I can
take a look.

>> - The XICS (interrupt controller). The actual size of the interrupt
>> state array can vary (the number of interrupt sources can vary, it's
>> fixed today by the machine code but I wouldn't rely too much on that and
>> in any case, from the XICS driver perspective, it's not a constant, it's
>> a variable it gets passed when initializing).

Can you point me at the structure that you want to sent?

>> So in both these cases, I need either code to control the save/load
>> process (old style ? hard to hook into vmstate as far as I can tell) or
>> maybe a way to describe the array so that the array size itself is a
>> pointer to a variable (Andreas mentioned something along those lines).
>> Is there any doco for that stuff btw ? I haven't seen anything
>> detailed...
>
> I'm sure Juan knows more there :)

thanks for pointing me to the discussion O:-)

Later, Juan.



[Qemu-devel] [PATCH 10/16] hw/arm_gic: Move CPU interface memory region setup into arm_gic_init

2012-06-19 Thread Peter Maydell
Remove more NVIC ifdefs by moving the code to setup the CPU interface
memory regions into the GIC specific arm_gic_init() function rather
than the gic_init() function. Rename the latter to more closely
reflect what it's now actually doing.

Signed-off-by: Peter Maydell 
---
 hw/arm_gic.c |   26 +-
 hw/armv7m_nvic.c |2 +-
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/hw/arm_gic.c b/hw/arm_gic.c
index c288bc5..ad5ab3c 100644
--- a/hw/arm_gic.c
+++ b/hw/arm_gic.c
@@ -812,7 +812,7 @@ static int gic_load(QEMUFile *f, void *opaque, int 
version_id)
 return 0;
 }
 
-static void gic_init(gic_state *s, int num_irq)
+static void gic_init_irqs_and_distributor(gic_state *s, int num_irq)
 {
 int i;
 
@@ -850,7 +850,19 @@ static void gic_init(gic_state *s, int num_irq)
 sysbus_init_irq(&s->busdev, &s->parent_irq[i]);
 }
 memory_region_init_io(&s->iomem, &gic_dist_ops, s, "gic_dist", 0x1000);
+
+register_savevm(NULL, "arm_gic", -1, 3, gic_save, gic_load, s);
+}
+
 #ifndef NVIC
+
+static int arm_gic_init(SysBusDevice *dev)
+{
+/* Device instance init function for the GIC sysbus device */
+int i;
+gic_state *s = FROM_SYSBUS(gic_state, dev);
+gic_init_irqs_and_distributor(s, s->num_irq);
+
 /* Memory regions for the CPU interfaces (NVIC doesn't have these):
  * a region for "CPU interface for this core", then a region for
  * "CPU interface for core 0", "for core 1", ...
@@ -866,19 +878,7 @@ static void gic_init(gic_state *s, int num_irq)
 memory_region_init_io(&s->cpuiomem[i+1], &gic_cpu_ops, &s->backref[i],
   "gic_cpu", 0x100);
 }
-#endif
-
-register_savevm(NULL, "arm_gic", -1, 3, gic_save, gic_load, s);
-}
-
-#ifndef NVIC
 
-static int arm_gic_init(SysBusDevice *dev)
-{
-/* Device instance init function for the GIC sysbus device */
-int i;
-gic_state *s = FROM_SYSBUS(gic_state, dev);
-gic_init(s, s->num_irq);
 /* Distributor */
 sysbus_init_mmio(dev, &s->iomem);
 /* cpu interfaces (one for "current cpu" plus one per cpu) */
diff --git a/hw/armv7m_nvic.c b/hw/armv7m_nvic.c
index 4c130f1..031a7fd 100644
--- a/hw/armv7m_nvic.c
+++ b/hw/armv7m_nvic.c
@@ -449,7 +449,7 @@ static int armv7m_nvic_init(SysBusDevice *dev)
 s->gic.num_cpu = 1;
 /* Tell the common code we're an NVIC */
 s->gic.revision = 0x;
-gic_init(&s->gic, s->num_irq);
+gic_init_irqs_and_distributor(&s->gic, s->num_irq);
 /* The NVIC and system controller register area looks like this:
  *  0..0xff : system control registers, including systick
  *  0x100..0xcff : GIC-like registers
-- 
1.7.1




[Qemu-devel] [PATCH 13/16] hw/a9mpcore: Fix compilation failure if physaddrs are 64 bit

2012-06-19 Thread Peter Maydell
Add a cast to a logging printf to avoid a compilation failure
if target_phys_addr_t is a 64 bit type. (This is better than
using TARGET_FMT_plx because we really don't need a full
16 digit hex string to print the offset into a device.)

Signed-off-by: Peter Maydell 
Reviewed-by: Eric Blake 
---
 hw/a9mpcore.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/hw/a9mpcore.c b/hw/a9mpcore.c
index c2ff74d..ebd5b29 100644
--- a/hw/a9mpcore.c
+++ b/hw/a9mpcore.c
@@ -75,7 +75,7 @@ static void a9_scu_write(void *opaque, target_phys_addr_t 
offset,
 break;
 default:
 fprintf(stderr, "Invalid size %u in write to a9 scu register %x\n",
-size, offset);
+size, (unsigned)offset);
 return;
 }
 
-- 
1.7.1




Re: [Qemu-devel] [PATCH] hw/cadence_gem: Make rx_desc_addr and tx_desc_addr uint32_t

2012-06-19 Thread Peter Maydell
Peter C: ping? I was hoping for a review or ack for this
one before I stick it into an arm-devs pullreq.

thanks
-- PMM

On 22 May 2012 18:02, Peter Maydell  wrote:
> Make the state fields rx_desc_addr and tx_desc_addr uint32_t;
> this matches the VMStateDescription, and also conforms to how
> hardware works: the registers don't magically become larger
> if the device is attached to a CPU with a larger physical
> address size. It also fixes a compile failure if the
> target_phys_addr_t type is changed to 64 bits.
>
> Signed-off-by: Peter Maydell 
> ---
> I'm going through fixing problems with moving target-arm to
> a larger physical address width so we can support the A15
> Large Physical Address Extensions...
>
>  hw/cadence_gem.c |    4 ++--
>  1 files changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/hw/cadence_gem.c b/hw/cadence_gem.c
> index e2140ae..e563409 100644
> --- a/hw/cadence_gem.c
> +++ b/hw/cadence_gem.c
> @@ -339,8 +339,8 @@ typedef struct {
>     uint8_t phy_loop; /* Are we in phy loopback? */
>
>     /* The current DMA descriptor pointers */
> -    target_phys_addr_t rx_desc_addr;
> -    target_phys_addr_t tx_desc_addr;
> +    uint32_t rx_desc_addr;
> +    uint32_t tx_desc_addr;
>
>  } GemState;
>
> --
> 1.7.1



Re: [Qemu-devel] [PATCH] qemu_find_file: check name as a straight path even if it has no '/'

2012-06-19 Thread Peter Maydell
Ping? (patch still applies cleanly to current master; patchwork URL
http://patchwork.ozlabs.org/patch/161324/ )

-- PMM

On 25 May 2012 13:07, Peter Maydell  wrote:
> Make qemu_find_file() check for the passed in name as a straight
> pathname even if it doesn't have any path separator character in it.
> This means that "-bios foo", "-dtb foo" etc will find a file 'foo'
> in the current directory.
> This removes an inconsistency with -kernel and -initrd, which both
> accept plain filenames as meaning files in the current directory.
> It's also less confusing for the user than an undocumented restriction
> that "this option accepts a filename, except for the special case
> where the filename you pass happens not to have a '/' in it, in
> which case we'll ignore it."
>
> Signed-off-by: Peter Maydell 
> ---
>  vl.c |    5 ++---
>  1 files changed, 2 insertions(+), 3 deletions(-)
>
> diff --git a/vl.c b/vl.c
> index 23ab3a3..4639526 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -1801,9 +1801,8 @@ char *qemu_find_file(int type, const char *name)
>     const char *subdir;
>     char *buf;
>
> -    /* If name contains path separators then try it as a straight path.  */
> -    if ((strchr(name, '/') || strchr(name, '\\'))
> -        && access(name, R_OK) == 0) {
> +    /* Try the name as a straight path first */
> +    if (access(name, R_OK) == 0) {
>         return g_strdup(name);
>     }
>     switch (type) {
> --
> 1.7.1



Re: [Qemu-devel] [PATCH buildfix] configure: Assure existence of linux-headers/ build directory

2012-06-19 Thread Peter Maydell
On 14 June 2012 16:14, Andreas Färber  wrote:
> Commit ec5b06d (configure: ensure directory exists when creating symlinks)
> moved the creation of directories into the symlink() function but forgot
> the case where no symlink is created.
>
> This leads to build errors on arm Linux due to -I../linux-headers.
>
> Unbreak the build on arm Linux by reverting part of that commit.

I think it would be better to just revert ec5b06d completely.
The comment documenting the symlink() function says
"# symbolically link $1 to $2.  Portable version of "ln -sf"
and "ln -sf" doesn't create the target directory, so symlink()
should not either.

-- PMM



Re: [Qemu-devel] [PATCH buildfix] configure: Assure existence of linux-headers/ build directory

2012-06-19 Thread Andreas Färber
Am 19.06.2012 17:18, schrieb Peter Maydell:
> On 14 June 2012 16:14, Andreas Färber  wrote:
>> Commit ec5b06d (configure: ensure directory exists when creating symlinks)
>> moved the creation of directories into the symlink() function but forgot
>> the case where no symlink is created.
>>
>> This leads to build errors on arm Linux due to -I../linux-headers.
>>
>> Unbreak the build on arm Linux by reverting part of that commit.
> 
> I think it would be better to just revert ec5b06d completely.
> The comment documenting the symlink() function says
> "# symbolically link $1 to $2.  Portable version of "ln -sf"
> and "ln -sf" doesn't create the target directory, so symlink()
> should not either.

I don't mind either way as long as the build gets fixed. Anthony?

/-F

-- 
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg



[Qemu-devel] [PATCH v12 01/13] Add MigrationParams structure

2012-06-19 Thread Orit Wasserman
From: Isaku Yamahata 

Signed-off-by: Isaku Yamahata 
---
 block-migration.c |8 
 migration.c   |   13 -
 migration.h   |8 ++--
 qemu-common.h |1 +
 savevm.c  |   13 +
 sysemu.h  |3 ++-
 vmstate.h |2 +-
 7 files changed, 31 insertions(+), 17 deletions(-)

diff --git a/block-migration.c b/block-migration.c
index fd2..b95b4e1 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -700,13 +700,13 @@ static int block_load(QEMUFile *f, void *opaque, int 
version_id)
 return 0;
 }
 
-static void block_set_params(int blk_enable, int shared_base, void *opaque)
+static void block_set_params(const MigrationParams *params, void *opaque)
 {
-block_mig_state.blk_enable = blk_enable;
-block_mig_state.shared_base = shared_base;
+block_mig_state.blk_enable = params->blk;
+block_mig_state.shared_base = params->shared;
 
 /* shared base means that blk_enable = 1 */
-block_mig_state.blk_enable |= shared_base;
+block_mig_state.blk_enable |= params->shared;
 }
 
 void blk_mig_init(void)
diff --git a/migration.c b/migration.c
index 3f485d3..810727f 100644
--- a/migration.c
+++ b/migration.c
@@ -352,7 +352,7 @@ void migrate_fd_connect(MigrationState *s)
   migrate_fd_close);
 
 DPRINTF("beginning savevm\n");
-ret = qemu_savevm_state_begin(s->file, s->blk, s->shared);
+ret = qemu_savevm_state_begin(s->file, &s->params);
 if (ret < 0) {
 DPRINTF("failed, %d\n", ret);
 migrate_fd_error(s);
@@ -361,15 +361,14 @@ void migrate_fd_connect(MigrationState *s)
 migrate_fd_put_ready(s);
 }
 
-static MigrationState *migrate_init(int blk, int inc)
+static MigrationState *migrate_init(const MigrationParams *params)
 {
 MigrationState *s = migrate_get_current();
 int64_t bandwidth_limit = s->bandwidth_limit;
 
 memset(s, 0, sizeof(*s));
 s->bandwidth_limit = bandwidth_limit;
-s->blk = blk;
-s->shared = inc;
+s->params = *params;
 
 s->bandwidth_limit = bandwidth_limit;
 s->state = MIG_STATE_SETUP;
@@ -394,9 +393,13 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
  Error **errp)
 {
 MigrationState *s = migrate_get_current();
+MigrationParams params;
 const char *p;
 int ret;
 
+params.blk = blk;
+params.shared = inc;
+
 if (s->state == MIG_STATE_ACTIVE) {
 error_set(errp, QERR_MIGRATION_ACTIVE);
 return;
@@ -411,7 +414,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
 return;
 }
 
-s = migrate_init(blk, inc);
+s = migrate_init(¶ms);
 
 if (strstart(uri, "tcp:", &p)) {
 ret = tcp_start_outgoing_migration(s, p, errp);
diff --git a/migration.h b/migration.h
index 2e9ca2e..4168883 100644
--- a/migration.h
+++ b/migration.h
@@ -19,6 +19,11 @@
 #include "notify.h"
 #include "error.h"
 
+struct MigrationParams {
+int blk;
+int shared;
+};
+
 typedef struct MigrationState MigrationState;
 
 struct MigrationState
@@ -31,8 +36,7 @@ struct MigrationState
 int (*close)(MigrationState *s);
 int (*write)(MigrationState *s, const void *buff, size_t size);
 void *opaque;
-int blk;
-int shared;
+MigrationParams params;
 };
 
 void process_incoming_migration(QEMUFile *f);
diff --git a/qemu-common.h b/qemu-common.h
index 91e0562..62081c0 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -17,6 +17,7 @@ typedef struct DeviceState DeviceState;
 
 struct Monitor;
 typedef struct Monitor Monitor;
+typedef struct MigrationParams MigrationParams;
 
 /* we put basic includes here to avoid repeating them in device drivers */
 #include 
diff --git a/savevm.c b/savevm.c
index 2d18bab..638d2b1 100644
--- a/savevm.c
+++ b/savevm.c
@@ -1561,7 +1561,8 @@ bool qemu_savevm_state_blocked(Error **errp)
 return false;
 }
 
-int qemu_savevm_state_begin(QEMUFile *f, int blk_enable, int shared)
+int qemu_savevm_state_begin(QEMUFile *f,
+const MigrationParams *params)
 {
 SaveStateEntry *se;
 int ret;
@@ -1569,8 +1570,8 @@ int qemu_savevm_state_begin(QEMUFile *f, int blk_enable, 
int shared)
 QTAILQ_FOREACH(se, &savevm_handlers, entry) {
 if(se->set_params == NULL) {
 continue;
-   }
-   se->set_params(blk_enable, shared, se->opaque);
+}
+se->set_params(params, se->opaque);
 }
 
 qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
@@ -1708,13 +1709,17 @@ void qemu_savevm_state_cancel(QEMUFile *f)
 static int qemu_savevm_state(QEMUFile *f)
 {
 int ret;
+MigrationParams params = {
+.blk = 0,
+.shared = 0
+};
 
 if (qemu_savevm_state_blocked(NULL)) {
 ret = -EINVAL;
 goto out;
 }
 
-ret = qemu_savevm_state_begin(f, 0, 0);
+ret = qemu_savevm_state_begin(f, ¶ms);
 if (ret < 0)
 goto out;
 
diff --git a/sysemu.h b/sysemu.h
index bc2c788..6540c79 100644
--

[Qemu-devel] [PATCH v12 03/13] Add XBZRLE documentation

2012-06-19 Thread Orit Wasserman
Signed-off-by: Orit Wasserman 
---
 docs/xbzrle.txt |  115 +++
 1 files changed, 115 insertions(+), 0 deletions(-)
 create mode 100644 docs/xbzrle.txt

diff --git a/docs/xbzrle.txt b/docs/xbzrle.txt
new file mode 100644
index 000..62db494
--- /dev/null
+++ b/docs/xbzrle.txt
@@ -0,0 +1,115 @@
+XBZRLE (Xor Based Zero Run Length Encoding)
+===
+
+Using XBZRLE (Xor Based Zero Run Length Encoding) allows for the reduction 
+of VM downtime and the total live-migration time of Virtual machines.
+It is particularly useful for virtual machines running memory write intensive
+workloads that are typical of large enterprise applications such as SAP ERP
+Systems, and generally speaking for any application that uses a sparse memory
+update pattern.
+
+Instead of sending the changed guest memory page this solution will send a
+compressed version of the updates, thus reducing the amount of data sent during
+live migration.
+In order to be able to calculate the update, the previous memory pages need to
+be stored on the source. Those pages are stored in a dedicated cache
+(hash table) and are
+accessed by their address.
+The larger the cache size the better the chances are that the page has already
+been stored in the cache.
+A small cache size will result in high cache miss rate.
+Cache size can be changed before and during migration.
+
+Format
+===
+
+The compression format preforms a XOR between the previous and current content
+of the page, where zero represents an unchanged value.
+The page data delta is represented by zero and non zero runs.
+A zero run is represented by it's length (in bytes).
+A non zero run is represented by it's length (in bytes) and the data.
+The run length is encoded using ULEB128 (http://en.wikipedia.org/wiki/LEB128)
+
+page = zrun nzrun
+   | zrun nzrun page
+
+zrun = length
+
+nzrun = length byte...
+
+length = uleb128 encoded integer
+
+On the sender side XBZRLE is used as a compact delta encoding of page updates,
+retrieving the old page content from the cache (default size of 512 MB). The
+receiving side uses the existing page's content and XBZRLE to decode the new
+page's content.
+
+This is a more compact way to store the deltas than the previous version.
+
+This work was originally based on research results published
+VEE 2011: Evaluation of Delta Compression Techniques for Efficient Live
+Migration of Large Virtual Machines by Benoit, Svard, Tordsson and Elmroth.
+Additionally the delta encoder XBRLE was improved further using the XBZRLE
+instead.
+
+XBZRLE has a sustained bandwidth of 2-2.5 GB/s for typical workloads making it
+ideal for in-line, real-time encoding such as is needed for live-migration.
+
+Migration Capabilities
+==
+In order to use XBZRLE the destination QEMU version should be able to
+decode the new format.
+Adding a new migration capabilities command that will allow external management
+to query for it support.
+A typical use for the destination
+{qemu} info migrate_capabilities
+{qemu} xbzrle, ...
+
+In order to enable capabilities for future live migration,
+a new command migrate_set_parameter is introduced:
+{qemu} migrate_set_parameter xbzrle
+
+Usage
+==
+
+1. Activate xbzrle
+2. Set the XBZRLE cache size - the cache size is in MBytes and should be a
+power of 2. The cache default value is 64MBytes.
+3. start outgoing migration
+
+A typical usage scenario:
+{qemu} migrate_set_parameter xbzrle
+{qemu} migrate_set_cachesize 256m
+{qemu} migrate -d tcp:destination.host:
+{qemu} info migrate
+...
+transferred ram-duplicate: A kbytes
+transferred ram-normal: B kbytes
+transferred ram-xbrle: C kbytes
+overflow ram-xbrle: D pages
+cache-miss ram-xbrle: E pages
+
+cache-miss: the number of cache misses to date - high cache-miss rate
+indicates that the cache size is set too low.
+overflow: the number of overflows in the decoding which where the delta could
+not be compressed. This can happen if the changes in the pages are too large
+or there are many short changes for example change every second byte (half a
+page).
+
+Testing: Testing indicated that live migration with XBZRLE was completed in 110
+seconds, whereas without it would not be able to complete.
+
+A simple synthetic memory r/w load generator:
+..include 
+..include 
+..int main()
+..{
+..char *buf = (char *) calloc(4096, 4096);
+..while (1) {
+..int i;
+..for (i = 0; i < 4096 * 4; i++) {
+..buf[i * 4096 / 4]++;
+..}
+..printf(".");
+..}
+..}
-- 
1.7.7.6




[Qemu-devel] [PATCH v12 02/13] Add migration capabilites

2012-06-19 Thread Orit Wasserman
Add migration capabiltes that can be queried by the management.
The managment can query the source QEMU and the destination QEMU in order to
verify both support some  migration capability (currently only XBZRLE).
The managment can enable a capabilty for the next migration by using
migrate_set_parameter command.

Signed-off-by: Orit Wasserman 
---
 hmp-commands.hx  |   16 ++
 hmp.c|   62 ++
 hmp.h|2 +
 migration.c  |   48 -
 migration.h  |2 +
 monitor.c|7 ++
 qapi-schema.json |   46 +++-
 qmp-commands.hx  |   48 +
 8 files changed, 228 insertions(+), 3 deletions(-)

diff --git a/hmp-commands.hx b/hmp-commands.hx
index f5d9d91..b3051a8 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -861,6 +861,20 @@ Set maximum tolerated downtime (in seconds) for migration.
 ETEXI
 
 {
+.name   = "migrate_set_parameter",
+.args_type  = "capability:s,state:b",
+.params = "",
+.help   = "Enable the usage of a capability for migration",
+.mhandler.cmd = hmp_migrate_set_parameter,
+},
+
+STEXI
+@item migrate_set_parameter @var{capabilty} @var{state}
+@findex migrate_set_parameter
+Enable/Disable the usage of a capability @var{capabilty} for migration.
+ETEXI
+
+{
 .name   = "client_migrate_info",
 .args_type  = 
"protocol:s,hostname:s,port:i?,tls-port:i?,cert-subject:s?",
 .params = "protocol hostname port tls-port cert-subject",
@@ -1419,6 +1433,8 @@ show CPU statistics
 show user network stack connection states
 @item info migrate
 show migration status
+@item info migration_capabilities
+show migration capabilities
 @item info balloon
 show balloon information
 @item info qtree
diff --git a/hmp.c b/hmp.c
index 2ce8cb9..67d4122 100644
--- a/hmp.c
+++ b/hmp.c
@@ -130,9 +130,18 @@ void hmp_info_mice(Monitor *mon)
 void hmp_info_migrate(Monitor *mon)
 {
 MigrationInfo *info;
+MigrationCapabilityInfoList *cap;
 
 info = qmp_query_migrate(NULL);
 
+if (info->has_params && info->params) {
+monitor_printf(mon, "params: ");
+for (cap = info->params; cap; cap = cap->next) {
+monitor_printf(mon, "%s",
+   MigrationCapability_lookup[cap->value->capability]);
+}
+monitor_printf(mon, "\n");
+}
 if (info->has_status) {
 monitor_printf(mon, "Migration status: %s\n", info->status);
 }
@@ -158,6 +167,24 @@ void hmp_info_migrate(Monitor *mon)
 qapi_free_MigrationInfo(info);
 }
 
+void hmp_info_migration_capabilities(Monitor *mon)
+{
+MigrationCapabilityInfoList *caps_list, *cap;
+
+caps_list = qmp_query_migration_capabilities(NULL);
+if (!caps_list) {
+monitor_printf(mon, "No migration capabilities found\n");
+return;
+}
+
+for (cap = caps_list; cap; cap = cap->next) {
+monitor_printf(mon, "%s ",
+   MigrationCapability_lookup[cap->value->capability]);
+}
+
+qapi_free_MigrationCapabilityInfoList(caps_list);
+}
+
 void hmp_info_cpus(Monitor *mon)
 {
 CpuInfoList *cpu_list, *cpu;
@@ -732,6 +759,41 @@ void hmp_migrate_set_speed(Monitor *mon, const QDict 
*qdict)
 qmp_migrate_set_speed(value, NULL);
 }
 
+void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
+{
+const char *cap = qdict_get_str(qdict, "capability");
+bool state = qdict_get_bool(qdict, "state");
+Error *err = NULL;
+MigrationCapabilityInfoList *params = NULL;
+int i;
+
+for (i = 0; i < MIGRATION_CAPABILITY_MAX; i++) {
+if (strcmp(cap, MigrationCapability_lookup[i]) == 0) {
+if (!params) {
+params = g_malloc0(sizeof(*params));
+}
+params->value = g_malloc0(sizeof(*params->value));
+params->value->capability = i;
+params->value->state = state;
+params->next = NULL;
+qmp_migrate_set_parameters(params, &err);
+break;
+}
+}
+
+if (i == MIGRATION_CAPABILITY_MAX) {
+error_set(&err, QERR_INVALID_PARAMETER, cap);
+}
+
+qapi_free_MigrationCapabilityInfoList(params);
+
+if (err) {
+monitor_printf(mon, "migrate_set_parameter: %s\n",
+   error_get_pretty(err));
+error_free(err);
+}
+}
+
 void hmp_set_password(Monitor *mon, const QDict *qdict)
 {
 const char *protocol  = qdict_get_str(qdict, "protocol");
diff --git a/hmp.h b/hmp.h
index 79d138d..09ba198 100644
--- a/hmp.h
+++ b/hmp.h
@@ -25,6 +25,7 @@ void hmp_info_uuid(Monitor *mon);
 void hmp_info_chardev(Monitor *mon);
 void hmp_info_mice(Monitor *mon);
 void hmp_info_migrate(Monitor *mon);
+void hmp_info_migration_capabilities(Monitor *mon);
 void hmp_info_cpus(Monitor *mon);
 void hmp_info_block

[Qemu-devel] [PATCH v12 00/13] XBZRLE delta for live migration of large memory app

2012-06-19 Thread Orit Wasserman
Changes from v11: 
- divide patch 7 to several smaller patches.
- Use an array for setting migration parameters QMP only (there
  is not support for arrays in HMP commands). parameters can be enabled
  or disabled.
- Do not use XBZRLE in stage 3 , it is a very sensitive stage and CPU
  can be an issue.
- Fix review comments by Juan Quintela and Eric Blake

Changes from v10:
- Cache size will be in bytes, in case it is not a power of 2 it will be
  reduced to the nearest power of 2.
- fix documentation
- use cache_init with number of pages not cache size.

Changes from v9:
- move cache implementation to separate files. Kept our own 
implementation because GCache or GHashTable have no size limit.
- Add migrate_set_parameter function
- removed XBZRLE option from migrate command
- add cache size information to query_migrate command
- add documantation file
- write/read the exact XBZRLE header format
- fix other review comments by Anthony and Juan

Changes from v8:
Implement more effiecent cache_resize method
fix set_cachesize command 

Changes from v7:
Copy current page before encoding it, this will prevents page content
change during the encoding.
Allow changing the cache size during an active migration.
Fix comments by Avi.

Changes from v6:
 1) add assert checks to ULEB encoding/decoding
 2) no need to send last zero run

Changes from v5:
1) Add migration capabilities
2) Use ULEB to encode run length
3) Do not send unmodified (dirty) page
3) Fix other patch comments

Using GCache or GHashTable requires allocating new buffer on every content 
change and have no size limit ,
so I decided to keep the simple cache implementation.

Changes from v4:
1) Rebase
2) divide patch into 9 patches
3) move memory allocation into cache_insert

Future work :
 Use SSE for encoding.
 Page ranking acording to their dirty rate and automatic 
activation/deactivation of the feature - will be sent in a separate patch 
series.  

By using XBZRLE (Xor Based Zero Run Length Encoding) we can reduce VM downtime
and total live-migration time of VMs running memory write intensive workloads
typical of large enterprise applications such as SAP ERP Systems, and generally
speaking for any application with a sparse memory update pattern.

The compression format uses the fact that we will have many zero (zero 
represents
an unchanged value). 
We repesent the page data delta by zero and non zero runs.
We represent a zero run with it's length (in bytes). 
We represent a non zero run with it's length (in bytes) and the data.
The run length is encoded using ULEB128 (http://en.wikipedia.org/wiki/LEB128)

page = zrun nzrun
   | zrun nzrun page

zrun = length

nzrun = length byte...

length = uleb128 encoded integer

On the sender side XBZRLE is used as a compact delta encoding of page updates,
retrieving the old page content from an LRU cache (default size of 512 MB). The
receiving side uses the existing page content and XBZRLE to decode the new page
content.

This is a more compact way to store the delta than the previous version.

This work was originally based on research results published VEE 2011: 
Evaluation of
Delta Compression Techniques for Efficient Live Migration of Large Virtual
Machines by Benoit, Svard, Tordsson and Elmroth. Additionally the delta encoder
XBRLE was improved further using XBZRLE instead.

XBZRLE has a sustained bandwidth of 2-2.5 GB/s for typical workloads making it
ideal for in-line, real-time encoding such as is needed for live-migration.

A typical usage scenario:
{qemu} migrate_set_cachesize 256m
{qemu} migrate_set_parameter xbzrle
{qemu} migrate -d tcp:destination.host:
{qemu} info migrate
...
transferred ram: A kbytes
remaining ram: B kbytes
total ram: C kbytes
cache size: D bytes
xbzrle transferred: E kbytes
xbzrle pages: F pages
xbzrle cache miss: G
xbzrle overflow : H

Testing: live migration with XBZRLE completed in 110 seconds, without live
migration was not able to complete.

A simple synthetic memory r/w load generator:
..include 
..include 
..int main()
..{
..char *buf = (char *) calloc(4096, 4096);
..while (1) {
..int i;
..for (i = 0; i < 4096 * 4; i++) {
..buf[i * 4096 / 4]++;
..}
..printf(".");
..}
..}

Signed-off-by: Benoit Hudzia 
Signed-off-by: Petter Svard 
Signed-off-by: Aidan Shribman 

Orit Wasserman (13):
  Add MigrationParams structure
  Add migration capabilites
  Add XBZRLE documentation
  Add cache handling functions
  Add uleb encoding/decoding functions
  Add save_block_hdr function
  Add debugging infrastructure
  Change ram_save_block to return -1 if there are no more changes
  Add migration_end function
  Add xbzrle_e

  1   2   3   >