date:20140522

Re: [Qemu-devel] [PATCH 1/2] virtio-blk: Factor out virtio_blk_handle_scsi_req from virtio_blk_handle_scsi

2014-05-22 Thread Paolo Bonzini


Il 22/05/2014 08:36, Fam Zheng ha scritto:

-
-if (!req->dev->blk.scsi) {
-status = VIRTIO_BLK_S_UNSUPP;
-goto fail;
-}


Where did you move this condition?

Also, the scsi field of VirtIOBlockReq is now unused.

Paolo

Re: [Qemu-devel] [PATCH 2/2] dataplane: Support VIRTIO_BLK_T_SCSI_CMD

2014-05-22 Thread Paolo Bonzini


Il 22/05/2014 08:36, Fam Zheng ha scritto:

Signed-off-by: Fam Zheng 
---
 hw/block/dataplane/virtio-blk.c | 19 +++
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index 46a6824..6a66196 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -201,6 +201,16 @@ static void do_flush_cmd(VirtIOBlockDataPlane *s, 
VirtQueueElement *elem,
 bdrv_aio_flush(s->blk->conf.bs, complete_flush, req);
 }

+static void do_scsi_cmd(VirtIOBlockDataPlane *s, VirtQueueElement *elem,
+ QEMUIOVector *inhdr)
+{
+int status;
+
+status = virtio_blk_handle_scsi_req(s->blk->conf.bs, elem);
+complete_request_early(s, elem, inhdr, status);
+
+}
+
 static int process_request(VirtIOBlockDataPlane *s, VirtQueueElement *elem)
 {
 struct iovec *iov = elem->out_sg;
@@ -249,8 +259,7 @@ static int process_request(VirtIOBlockDataPlane *s, 
VirtQueueElement *elem)
 return 0;

 case VIRTIO_BLK_T_SCSI_CMD:
-/* TODO support SCSI commands */
-complete_request_early(s, elem, inhdr, VIRTIO_BLK_S_UNSUPP);
+do_scsi_cmd(s, elem, inhdr);
 return 0;

 case VIRTIO_BLK_T_FLUSH:
@@ -326,12 +335,6 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, 
VirtIOBlkConf *blk,
 return;
 }

-if (blk->scsi) {
-error_setg(errp,
-   "device is incompatible with x-data-plane, use scsi=off");
-return;
-}
-
 /* If dataplane is (re-)enabled while the guest is running there could be
  * block jobs that can conflict.
  */



Reviewed-by: Paolo Bonzini

Re: [Qemu-devel] [PATCH 3/7] monitor: Add migrate_set_capability completion.

2014-05-22 Thread Markus Armbruster

"Dr. David Alan Gilbert"  writes:

> * Hani Benhabiles (kroo...@gmail.com) wrote:
>> Signed-off-by: Hani Benhabiles 
>> ---
>>  hmp-commands.hx |  1 +
>>  hmp.h   |  2 ++
>>  monitor.c   | 21 +
>>  3 files changed, 24 insertions(+)
>> 
>> diff --git a/hmp-commands.hx b/hmp-commands.hx
>> index 45e1763..919af6e 100644
>> --- a/hmp-commands.hx
>> +++ b/hmp-commands.hx
>> @@ -975,6 +975,7 @@ ETEXI
>>  .params = "capability state",
>>  .help   = "Enable/Disable the usage of a capability for 
>> migration",
>>  .mhandler.cmd = hmp_migrate_set_capability,
>> +.command_completion = migrate_set_capability_completion,
>>  },
>>  
>>  STEXI
>> diff --git a/hmp.h b/hmp.h
>> index a70804d..0c814d0 100644
>> --- a/hmp.h
>> +++ b/hmp.h
>> @@ -107,5 +107,7 @@ void ringbuf_write_completion(ReadLineState *rs, int 
>> nb_args, const char *str);
>>  void ringbuf_read_completion(ReadLineState *rs, int nb_args, const char 
>> *str);
>>  void watchdog_action_completion(ReadLineState *rs, int nb_args,
>>  const char *str);
>> +void migrate_set_capability_completion(ReadLineState *rs, int nb_args,
>> +   const char *str);
>
> Thank you for doing this; I spend way too much time typing these commands.
>
>>  #endif
>> diff --git a/monitor.c b/monitor.c
>> index fb300c2..6a3a5c9 100644
>> --- a/monitor.c
>> +++ b/monitor.c
>> @@ -4572,6 +4572,27 @@ void watchdog_action_completion(ReadLineState *rs, 
>> int nb_args, const char *str)
>>  add_completion_option(rs, str, "none");
>>  }
>>  
>> +void migrate_set_capability_completion(ReadLineState *rs, int nb_args,
>> +   const char *str)
>> +{
>> +size_t len;
>> +
>> +len = strlen(str);
>> +readline_set_completion_index(rs, len);
>> +if (nb_args == 2) {
>> +int i;
>> +for (i = 0; i < MIGRATION_CAPABILITY_MAX; i++) {
>> +const char *name = MigrationCapability_lookup[i];
>> +if (!strncmp(str, name, len)) {
>> +readline_add_completion(rs, name);
>> +}
>> +}
>> +} else if (nb_args == 3) {
>> +add_completion_option(rs, str, "on");
>> +add_completion_option(rs, str, "off");
>> +}
>
> It's a shame you have to do all of these manually; if we could tell something
> that we had an enum of 'MigrationCapability' then it could remove the command
> specific glue.

HMP command arguments are specified by an args_type string, which can't
express "enumeration", so we use strings instead.

We could hack in another arguments type 'E', abusing the parameter name
like type 'O'.  But I think the args_type string is long past its "best
before" date.

We could replace it by a command schema, like QMP's.  Differences to QMP
include: arguments are positional, fewer types are accepted, and there's
convenience syntax such as size suffixes.

Not exactly an easy task, I'm afraid.

[...]

Re: [Qemu-devel] [PATCH 0/2] dataplane: Enable "scsi=on"

2014-05-22 Thread Paolo Bonzini


Il 22/05/2014 08:36, Fam Zheng ha scritto:

This makes the SG_IO code of non-dataplane available to dataplane, so that
dataplane can use to allow scsi=on.


I like the idea of using more hw/block/virtio-blk.c code in the 
dataplane thread.  We should turn this into an actual plan to remove 
more and more hw/block/dataplane/ code.  It will help for 
rerror=/werror= too.


Paolo

Re: [Qemu-devel] [PATCH 2/9] target-ppc: Refactor init_proc_POWER7

2014-05-22 Thread Alexander Graf



> Am 22.05.2014 um 05:59 schrieb Alexey Kardashevskiy :
> 
>> On 05/21/2014 11:23 PM, Alexander Graf wrote:
>> 
>>> On 21.05.14 14:30, Alexey Kardashevskiy wrote:
 On 05/21/2014 08:44 PM, Alexander Graf wrote:
> On 21.05.14 08:20, Alexey Kardashevskiy wrote:
> This moves SPR initialization to helper functions.
> 
> Signed-off-by: Alexey Kardashevskiy 
 I like the idea, but please refactor all book3s CPUs, not just POWER7.
 
 I also think we can cover a lot of the SPR registration by matching on
 feature fields. VR for example is coupled to Altivec.
>>> 
>>> Ok.
>>> 
 Maybe we could also introduce an enum for the exact cpu type, similar to
 how we do it on e500? Then we could do fun things like
 
 if (cpu_type >= CPU_TYPE_970) {
 gen_spr_book3s_vr(env);
 }
 
 if (cpu_type >= CPU_TYPE_POWER7) {
 gen_spr_lpar(env);
 }
 
 switch (cpu_type) {
 case CPU_TYPE_POWER7:
 env->slb_nr = 32;
 break;
 default:
 env->slb_nr = 64;
 break;
 }
 
 and thus combine all those book3s init functions into a single, more
 maintainable version.
>>> If I can, I would like not to do it in this way, I'd rather have explicit
>>> list of gen_spr_FACILITY() calls always. For example,
>>> DABR/DABRX/whateverPOWER8has - it is not going to be always ">", and this
>>> breaks my weak mind :(
>> 
>> Could you give me some examples where a newer POWER has lost features over
>> an older POWER?
> 
> DABR/DABRX, also Paul mentioned "one exception is the instructions in
> power6 that are register moves between gpr and fpr registers".

For those cases, just switch() explicitly for the cpu type. We don't have to 
arithmetically solve all of the feature matches :)

Alex

> I do not
> know anything else though. So your point is taken, I'll try to do what you
> want :)
> 
> 
> -- 
> Alexey

Re: [Qemu-devel] [PATCH 8/9] spapr_iommu: Introduce page_shift in sPAPRTCETable

2014-05-22 Thread Alexander Graf



> Am 22.05.2014 um 06:25 schrieb Alexey Kardashevskiy :
> 
>> On 05/22/2014 08:11 AM, Alexander Graf wrote:
>> 
>>> On 21.05.14 16:21, Alexey Kardashevskiy wrote:
>>> At the moment only 4K pages are supported by sPAPRTCETable. Since sPAPR
>>> spec allows other page sizes and we are going to implement them, we need
>>> page size to be configrable.
>>> 
>>> This adds @page_shift into sPAPRTCETable and replaces SPAPR_TCE_PAGE_SHIFT
>>> with it whereever it is possible.
>>> 
>>> This removes SPAPR_TCE_PAGE_MASK as it is no longer used.
>>> 
>>> Signed-off-by: Alexey Kardashevskiy 
>>> ---
>>>  hw/ppc/spapr_iommu.c   | 54
>>> +-
>>>  hw/ppc/spapr_pci.c |  1 +
>>>  hw/ppc/spapr_vio.c |  1 +
>>>  include/hw/ppc/spapr.h |  3 ++-
>>>  4 files changed, 35 insertions(+), 24 deletions(-)
>>> 
>>> diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
>>> index 90de3e3..e765a6d 100644
>>> --- a/hw/ppc/spapr_iommu.c
>>> +++ b/hw/ppc/spapr_iommu.c
>>> @@ -70,12 +70,13 @@ static IOMMUTLBEntry
>>> spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr)
>>>if (tcet->bypass) {
>>>  ret.perm = IOMMU_RW;
>>> -} else if ((addr >> SPAPR_TCE_PAGE_SHIFT) < tcet->nb_table) {
>>> +} else if ((addr >> tcet->page_shift) < tcet->nb_table) {
>>>  /* Check if we are in bound */
>>> -tce = tcet->table[addr >> SPAPR_TCE_PAGE_SHIFT];
>>> -ret.iova = addr & ~SPAPR_TCE_PAGE_MASK;
>>> -ret.translated_addr = tce & ~SPAPR_TCE_PAGE_MASK;
>>> -ret.addr_mask = SPAPR_TCE_PAGE_MASK;
>>> +target_ulong mask = ~((1 << tcet->page_shift) - 1);
>> 
>> Why target_ulong? This should be u64 or hwaddr or something along those
>> lines, no?
> 
> Should be hwaddr, right, will fix.
> 
>> Also, can the mask grow bigger than 31bits? If so you probably
>> want 1ULL (below as well).
> 
> It cannot now but PAPR allows pages to be 16GB so you are making good point
> here, will fix too.
> 
>> In fact, we might be better off with a few more fields to tcet. Just add
>> page_mask and page_size in addition to the page_shift one and use them
>> instead of calculating them over and over again.
> 
> This is only used for emulated devices, not even for virtio. How much will
> we earn from that optimization? Will it be even measurable? On the other
> hand, this creates an opportunity (subtle, yes) to screw things up. Still
> worth?

I'm not concerned about speed here, but readibility :). Inline calculations are 
just harder to read.

The alternative would be to extract every calculation into a local variable and 
use that instead ;).

Alex

> 
> 
> 
> -- 
> Alexey

Re: [Qemu-devel] [PATCH 1/2] virtio-blk: Factor out virtio_blk_handle_scsi_req from virtio_blk_handle_scsi

2014-05-22 Thread Fam Zheng

On Thu, 05/22 09:03, Paolo Bonzini wrote:
> Il 22/05/2014 08:36, Fam Zheng ha scritto:
> >-
> >-if (!req->dev->blk.scsi) {
> >-status = VIRTIO_BLK_S_UNSUPP;
> >-goto fail;
> >-}
> 
> Where did you move this condition?
> 
> Also, the scsi field of VirtIOBlockReq is now unused.

Just realized I missed that. Will fix.

Thanks for reviewing!

Fam

Re: [Qemu-devel] [v2][PATCH 4/8] xen, gfx passthrough: reserve 00:02.0 for INTEL IGD

2014-05-22 Thread Chen, Tiejun

> -Original Message-
> From: Gerd Hoffmann [mailto:kra...@redhat.com]
> Sent: Thursday, May 22, 2014 2:45 PM
> To: Chen, Tiejun
> Cc: Anthony PERARD; Daniel P. Berrange; peter.mayd...@linaro.org;
> xen-de...@lists.xensource.com; m...@redhat.com;
> stefano.stabell...@eu.citrix.com; Kay, Allen M; kelly.zyta...@amd.com;
> qemu-devel@nongnu.org; Zhang, Yang Z; anth...@codemonkey.ws
> Subject: Re: [Qemu-devel] [v2][PATCH 4/8] xen, gfx passthrough: reserve
> 00:02.0 for INTEL IGD
> 
>   Hi,
> 
> > > Another useful thing would be to not create the xen platform device
> > > in case "-nodefaults" was specified on the command line (that switch
> > > turns off a bunch of other devices present by default: vga, nic, cdrom, 
> > > ...).
> >
> > Currently looks 'xen-platform' itself can't be created, not those devices
> existed on that.
> 
> The error message looks more like libxl tries to hot-unplug the xen platform
> device.
> 
> Attached patch (untested!) hooks up the xen platform device to the default
> device code we have in qemu.  Two effects:
> 
>   (1) As mentioned above the device will not be created in case
>   -nodefaults  is specified on the command line.

I think this is unnecessary to add this option to support GFX passthrough.

>   (2) Autocreating the device is also turned off in case xen-platform
>   is added manually via -device.
> 
> With the patch applied you should be able to move the xen-platform device to
> some other place with a simple 'qemu -device xen-platform,addr=$slot'.
> 

Yes, this patch works now. So I assume this would be merged into ML soon.

Thanks for your great help, Gerd!

Tiejun

Re: [Qemu-devel] [PATCH v2 8/8] spapr_pci: Use XICS interrupt allocator and do not cache interrupts in PHB

2014-05-22 Thread Alexander Graf



> Am 22.05.2014 um 08:53 schrieb Alexey Kardashevskiy :
> 
>> On 05/21/2014 10:42 PM, Alexey Kardashevskiy wrote:
>>> On 05/21/2014 08:35 PM, Alexander Graf wrote:
>>> 
 On 21.05.14 12:13, Alexey Kardashevskiy wrote:
> On 05/21/2014 07:50 PM, Alexander Graf wrote:
>> On 21.05.14 11:33, Alexey Kardashevskiy wrote:
>>> On 05/21/2014 07:13 PM, Alexander Graf wrote:
 On 21.05.14 11:11, Michael S. Tsirkin wrote:
> On Wed, May 21, 2014 at 11:06:09AM +0200, Alexander Graf wrote:
>> On 21.05.14 10:52, Alexey Kardashevskiy wrote:
>>> On 05/21/2014 06:40 PM, Alexander Graf wrote:
 On 15.05.14 11:59, Alexey Kardashevskiy wrote:
 Currently SPAPR PHB keeps track of all allocated MSI/MISX
 interrupt as
 XICS used to be unable to reuse interrupts which becomes a
 problem for
 dynamic MSI reconfiguration which is happening on guest driver
 reload or
 PCI hot (un)plug. Another problem is that PHB has a limit of 
 devices
 supporting MSI/MSIX (SPAPR_MSIX_MAX_DEVS=32) and there is no good
 reason
 for that.
 
 This makes use of new XICS ability to reuse interrupts.
 
 This removes cached MSI configuration from SPAPR PHB so the first
 IRQ
 number
 of a device is stored in MSI/MSIX config space so there is no
 need to
 store
 this anywhere else. From now on, SPAPR PHB only keeps flags telling
 what
 type
 of interrupt for which device it has configured in order to return
 error if
 (for example) MSIX was enabled and the guest is trying to disable
 MSI
 which
 it has not enabled.
 
 This removes a limit for the maximum number of MSIX-enabled devices
 per PHB,
 now XICS and PCI bus capacity are the only limitation.
 
 This changes migration stream as it fixes
 vmstate_spapr_pci_msi::name
 which was
 wrong since the beginning.
 
 This fixed traces to be more informative.
 
 Signed-off-by: Alexey Kardashevskiy 
 ---
 
 In reality either MSIX or MSI is enabled, never both. So I could
 remove
 msi/msix
 bitmaps from this patch, would it make sense?
>>> Is this a hard requirement? Does a device have to choose between
>>> MSIX and
>>> MSI or could it theoretically have both enabled? Is this a PCI
>>> limitation,
>>> a PAPR/XICS limitation or just a limitation of your implementation?
>> My implementation does not have this limitation, I asked if I can
>> simplify
>> code by introducing one :)
>> 
>> I cannot see any reason why PCI cannot have both MSI and MSIX enabled
>> but
>> it does not seem to be used by anyone => cannot debug and confirm.
>> 
>> PAPR spec assumes that if the guest tries enabling MSIX when MSI is
>> already
>> enabled, this is a "change", not enabling both types. But it also
>> says MSI
>> and MSIX vector numbers are not shared. Hm.
> Yeah, I'm not aware of any limitation on hardware here and I'd
> rather not impose one.
> 
> Michael, do you know of any hardware that uses MSI *and* MSI-X at
> the same time?
> 
> 
> Alex
 No, and the PCI spec says:
  A function is permitted to implement both MSI and MSI-X, but
 system
  software is
  prohibited from enabling both at the same time. If system software
  enables both at the same time, the result is undefined.
>>> Ah, cool. So yes Alexey, feel free to impose it :).
>> Heh. This solves just half of the problem - I still have to keep track of
>> what device got MSI/MSIX configured via that ibm,change-msi interface. I
>> was hoping I can store such flag somewhere in a device PCI config space
>> but
>> MSI/MSIX enable bit is not good as it is not set when those calls are
>> made.
>> And I cannot rely on address/data fields much as the guest can change 
>> them
>> (I already use them to store IRQ numbers and btw it is missing checks 
>> when
>> I read them back for disposal, I'll fix in next round).
>> 
>> Or on "enable" event I could put IRQ numbers to .data of MSI config space
>> and on "disable" check if it is not zero, then configuration took place,
>> then I can remove my msi[]/msix[] flag arrays. If the guest did any 
>> change
>> to MSI/MSIX config space (it does not on SPAPR except weird selftest
>>

[Qemu-devel] [PATCH] xen: make xen-platform a default device

2014-05-22 Thread Gerd Hoffmann

Patch hooks up the xen platform device to the default device code we
have in qemu.  Two effects:

  (1) The device will not be created in case -nodefaults is specified
  on the command line.
  (2) Autocreating the device is also turned off in case xen-platform
  is added manually via -device.

With the patch applied you can move the xen-platform device to some
other place with a simple 'qemu -device xen-platform,addr=$slot'.

Tested-by: Tiejun Chen 
Signed-off-by: Gerd Hoffmann 
---
 hw/i386/pc_piix.c| 2 +-
 include/hw/xen/xen.h | 1 +
 vl.c | 3 +++
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index eaf3e61..f987d03 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -385,7 +385,7 @@ static void pc_xen_hvm_init(QEMUMachineInitArgs *args)
 pc_init_pci(args);
 
 bus = pci_find_primary_bus();
-if (bus != NULL) {
+if (bus != NULL && default_xenplatform) {
 pci_create_simple(bus, -1, "xen-platform");
 }
 }
diff --git a/include/hw/xen/xen.h b/include/hw/xen/xen.h
index 85fda3d..b350413 100644
--- a/include/hw/xen/xen.h
+++ b/include/hw/xen/xen.h
@@ -20,6 +20,7 @@ enum xen_mode {
 
 extern uint32_t xen_domid;
 extern enum xen_mode xen_mode;
+extern int default_xenplatform;
 
 extern bool xen_allowed;
 
diff --git a/vl.c b/vl.c
index 709d8cd..673148e 100644
--- a/vl.c
+++ b/vl.c
@@ -226,6 +226,7 @@ static int default_floppy = 1;
 static int default_cdrom = 1;
 static int default_sdcard = 1;
 static int default_vga = 1;
+int default_xenplatform = 1;
 
 static struct {
 const char *driver;
@@ -247,6 +248,7 @@ static struct {
 { .driver = "isa-cirrus-vga",   .flag = &default_vga   },
 { .driver = "vmware-svga",  .flag = &default_vga   },
 { .driver = "qxl-vga",  .flag = &default_vga   },
+{ .driver = "xen-platform", .flag = &default_xenplatform },
 };
 
 static QemuOptsList qemu_rtc_opts = {
@@ -4101,6 +4103,7 @@ int main(int argc, char **argv, char **envp)
 default_monitor = 0;
 default_net = 0;
 default_vga = 0;
+default_xenplatform = 0;
 }
 
 if (is_daemonized()) {
-- 
1.8.3.1

[Qemu-devel] [PATCH v2 1/2] openpic: Move definition of openpic_reset

2014-05-22 Thread Paul Janzen

This patch moves the definition of openpic_reset after the various
register read/write functions. No functional change.  It is in
preparation for using the register read/write functions in
openpic_reset.

Signed-off-by: Paul Janzen 
---
 hw/intc/openpic.c |   99 +++--
 1 files changed, 50 insertions(+), 49 deletions(-)

diff --git a/hw/intc/openpic.c b/hw/intc/openpic.c
index 17136c9..81469ff 100644
--- a/hw/intc/openpic.c
+++ b/hw/intc/openpic.c
@@ -192,6 +192,7 @@ static uint32_t openpic_cpu_read_internal(void *opaque, 
hwaddr addr,
   int idx);
 static void openpic_cpu_write_internal(void *opaque, hwaddr addr,
uint32_t val, int idx);
+static void openpic_reset(DeviceState *d);
 
 typedef enum IRQType {
 IRQ_TYPE_NORMAL = 0,
@@ -534,55 +535,6 @@ static void openpic_set_irq(void *opaque, int n_IRQ, int 
level)
 }
 }
 
-static void openpic_reset(DeviceState *d)
-{
-OpenPICState *opp = OPENPIC(d);
-int i;
-
-opp->gcr = GCR_RESET;
-/* Initialise controller registers */
-opp->frr = ((opp->nb_irqs - 1) << FRR_NIRQ_SHIFT) |
-   ((opp->nb_cpus - 1) << FRR_NCPU_SHIFT) |
-   (opp->vid << FRR_VID_SHIFT);
-
-opp->pir = 0;
-opp->spve = -1 & opp->vector_mask;
-opp->tfrr = opp->tfrr_reset;
-/* Initialise IRQ sources */
-for (i = 0; i < opp->max_irq; i++) {
-opp->src[i].ivpr = opp->ivpr_reset;
-opp->src[i].idr  = opp->idr_reset;
-
-switch (opp->src[i].type) {
-case IRQ_TYPE_NORMAL:
-opp->src[i].level = !!(opp->ivpr_reset & IVPR_SENSE_MASK);
-break;
-
-case IRQ_TYPE_FSLINT:
-opp->src[i].ivpr |= IVPR_POLARITY_MASK;
-break;
-
-case IRQ_TYPE_FSLSPECIAL:
-break;
-}
-}
-/* Initialise IRQ destinations */
-for (i = 0; i < MAX_CPU; i++) {
-opp->dst[i].ctpr  = 15;
-memset(&opp->dst[i].raised, 0, sizeof(IRQQueue));
-opp->dst[i].raised.next = -1;
-memset(&opp->dst[i].servicing, 0, sizeof(IRQQueue));
-opp->dst[i].servicing.next = -1;
-}
-/* Initialise timers */
-for (i = 0; i < OPENPIC_MAX_TMR; i++) {
-opp->timers[i].tccr = 0;
-opp->timers[i].tbcr = TBCR_CI;
-}
-/* Go out of RESET state */
-opp->gcr = 0;
-}
-
 static inline uint32_t read_IRQreg_idr(OpenPICState *opp, int n_IRQ)
 {
 return opp->src[n_IRQ].idr;
@@ -1466,6 +1418,55 @@ static int openpic_load(QEMUFile* f, void *opaque, int 
version_id)
 return 0;
 }
 
+static void openpic_reset(DeviceState *d)
+{
+OpenPICState *opp = OPENPIC(d);
+int i;
+
+opp->gcr = GCR_RESET;
+/* Initialise controller registers */
+opp->frr = ((opp->nb_irqs - 1) << FRR_NIRQ_SHIFT) |
+   ((opp->nb_cpus - 1) << FRR_NCPU_SHIFT) |
+   (opp->vid << FRR_VID_SHIFT);
+
+opp->pir = 0;
+opp->spve = -1 & opp->vector_mask;
+opp->tfrr = opp->tfrr_reset;
+/* Initialise IRQ sources */
+for (i = 0; i < opp->max_irq; i++) {
+opp->src[i].ivpr = opp->ivpr_reset;
+opp->src[i].idr  = opp->idr_reset;
+
+switch (opp->src[i].type) {
+case IRQ_TYPE_NORMAL:
+opp->src[i].level = !!(opp->ivpr_reset & IVPR_SENSE_MASK);
+break;
+
+case IRQ_TYPE_FSLINT:
+opp->src[i].ivpr |= IVPR_POLARITY_MASK;
+break;
+
+case IRQ_TYPE_FSLSPECIAL:
+break;
+}
+}
+/* Initialise IRQ destinations */
+for (i = 0; i < MAX_CPU; i++) {
+opp->dst[i].ctpr  = 15;
+memset(&opp->dst[i].raised, 0, sizeof(IRQQueue));
+opp->dst[i].raised.next = -1;
+memset(&opp->dst[i].servicing, 0, sizeof(IRQQueue));
+opp->dst[i].servicing.next = -1;
+}
+/* Initialise timers */
+for (i = 0; i < OPENPIC_MAX_TMR; i++) {
+opp->timers[i].tccr = 0;
+opp->timers[i].tbcr = TBCR_CI;
+}
+/* Go out of RESET state */
+opp->gcr = 0;
+}
+
 typedef struct MemReg {
 const char *name;
 MemoryRegionOps const  *ops;
-- 
1.7.1

Re: [Qemu-devel] [PATCH] xen: make xen-platform a default device

2014-05-22 Thread Michael S. Tsirkin

On Thu, May 22, 2014 at 09:20:50AM +0200, Gerd Hoffmann wrote:
> Patch hooks up the xen platform device to the default device code we
> have in qemu.  Two effects:
> 
>   (1) The device will not be created in case -nodefaults is specified
>   on the command line.
>   (2) Autocreating the device is also turned off in case xen-platform
>   is added manually via -device.
> 
> With the patch applied you can move the xen-platform device to some
> other place with a simple 'qemu -device xen-platform,addr=$slot'.
> 
> Tested-by: Tiejun Chen 
> Signed-off-by: Gerd Hoffmann 

Applied, thanks!

> ---
>  hw/i386/pc_piix.c| 2 +-
>  include/hw/xen/xen.h | 1 +
>  vl.c | 3 +++
>  3 files changed, 5 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
> index eaf3e61..f987d03 100644
> --- a/hw/i386/pc_piix.c
> +++ b/hw/i386/pc_piix.c
> @@ -385,7 +385,7 @@ static void pc_xen_hvm_init(QEMUMachineInitArgs *args)
>  pc_init_pci(args);
>  
>  bus = pci_find_primary_bus();
> -if (bus != NULL) {
> +if (bus != NULL && default_xenplatform) {
>  pci_create_simple(bus, -1, "xen-platform");
>  }
>  }
> diff --git a/include/hw/xen/xen.h b/include/hw/xen/xen.h
> index 85fda3d..b350413 100644
> --- a/include/hw/xen/xen.h
> +++ b/include/hw/xen/xen.h
> @@ -20,6 +20,7 @@ enum xen_mode {
>  
>  extern uint32_t xen_domid;
>  extern enum xen_mode xen_mode;
> +extern int default_xenplatform;
>  
>  extern bool xen_allowed;
>  
> diff --git a/vl.c b/vl.c
> index 709d8cd..673148e 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -226,6 +226,7 @@ static int default_floppy = 1;
>  static int default_cdrom = 1;
>  static int default_sdcard = 1;
>  static int default_vga = 1;
> +int default_xenplatform = 1;
>  
>  static struct {
>  const char *driver;
> @@ -247,6 +248,7 @@ static struct {
>  { .driver = "isa-cirrus-vga",   .flag = &default_vga   },
>  { .driver = "vmware-svga",  .flag = &default_vga   },
>  { .driver = "qxl-vga",  .flag = &default_vga   },
> +{ .driver = "xen-platform", .flag = &default_xenplatform },
>  };
>  
>  static QemuOptsList qemu_rtc_opts = {
> @@ -4101,6 +4103,7 @@ int main(int argc, char **argv, char **envp)
>  default_monitor = 0;
>  default_net = 0;
>  default_vga = 0;
> +default_xenplatform = 0;
>  }
>  
>  if (is_daemonized()) {
> -- 
> 1.8.3.1

[Qemu-devel] [PATCH v2 2/2] openpic: Reset IRQ source private members

2014-05-22 Thread Paul Janzen

The openpic emulation code maintains an allowable-CPU's bitmap
("destmask") for each IRQ source which is calculated from the IDR
register value whenever the guest OS writes to it.  However, if the
guest OS relies on the system to set the IDR register to a default
value at reset, and does not write IDR, then destmask does not get
updated, and interrupts do not get propagated to the guest.
Additionally, if an IRQ source is marked as critical, the source's
internal "output" and "nomask" fields are not correctly reset when the
PIC is reset.

Fix both these issues by calling write_IRQreg_idr from within
openpic_reset, instead of simply setting the IDR register to the
specified idr_reset value.

Signed-off-by: Paul Janzen 
---
 hw/intc/openpic.c |4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/intc/openpic.c b/hw/intc/openpic.c
index 81469ff..811db6f 100644
--- a/hw/intc/openpic.c
+++ b/hw/intc/openpic.c
@@ -1435,8 +1435,6 @@ static void openpic_reset(DeviceState *d)
 /* Initialise IRQ sources */
 for (i = 0; i < opp->max_irq; i++) {
 opp->src[i].ivpr = opp->ivpr_reset;
-opp->src[i].idr  = opp->idr_reset;
-
 switch (opp->src[i].type) {
 case IRQ_TYPE_NORMAL:
 opp->src[i].level = !!(opp->ivpr_reset & IVPR_SENSE_MASK);
@@ -1449,6 +1447,8 @@ static void openpic_reset(DeviceState *d)
 case IRQ_TYPE_FSLSPECIAL:
 break;
 }
+
+write_IRQreg_idr(opp, i, opp->idr_reset);
 }
 /* Initialise IRQ destinations */
 for (i = 0; i < MAX_CPU; i++) {
-- 
1.7.1

Re: [Qemu-devel] [PATCH v3 16/22] target-arm: A64: Generalize ERET to various ELs

2014-05-22 Thread Peter Maydell

On 22 May 2014 01:48, Edgar E. Iglesias  wrote:
> On Wed, May 21, 2014 at 08:20:20PM +0100, Peter Maydell wrote:
>> it needs to also fix the bit in the "returning to an exception
>> level which is 32 bit" which says "new_el = 0" since that's
>> not guaranteed to be true any more. (Also I think the register
>> mapping for AArch32 EL2/EL1 needs handling correctly.)
>
> I've tried to stay away from touching too much of the AArch32
> code as I haven't had a setup to test 64/32 transitions
> beyond a64/el1 and a32/el0.

OK; if we put in a TODO comment that we assume EL1..EL3
are 64 bit currently, we'll have a marker to come back and fix
later.

thanks
-- PMM

Re: [Qemu-devel] [PATCH] spapr_iommu: Replace @instance_id with LIOBN for migration

2014-05-22 Thread Alexey Kardashevskiy

On 05/12/2014 06:46 PM, Alexey Kardashevskiy wrote:
> SPAPR IOMMU is a bus-less device and therefore its only ID in
> migration stream is an instance id which is not reliable ID
> as it depends on the command line parameters order. Since
> libvirt may change the order, we need something better than that.
> 
> This removes VMSD descriptor from the class definitiion and
> registers it with @liobn as an intance ID to let the destination
> side find the right device to receive migration data.

Ping?


> 
> Signed-off-by: Alexey Kardashevskiy 
> ---
>  hw/ppc/spapr_iommu.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
> index 72493d8..7c3f8c2 100644
> --- a/hw/ppc/spapr_iommu.c
> +++ b/hw/ppc/spapr_iommu.c
> @@ -140,6 +140,9 @@ static int spapr_tce_table_realize(DeviceState *dev)
>  
>  QLIST_INSERT_HEAD(&spapr_tce_tables, tcet, list);
>  
> +vmstate_register(DEVICE(tcet), tcet->liobn, &vmstate_spapr_tce_table,
> + tcet);
> +
>  return 0;
>  }
>  
> @@ -323,7 +326,6 @@ int spapr_tcet_dma_dt(void *fdt, int node_off, const char 
> *propname,
>  static void spapr_tce_table_class_init(ObjectClass *klass, void *data)
>  {
>  DeviceClass *dc = DEVICE_CLASS(klass);
> -dc->vmsd = &vmstate_spapr_tce_table;
>  dc->init = spapr_tce_table_realize;
>  dc->reset = spapr_tce_reset;
>  
> 


-- 
Alexey

Re: [Qemu-devel] [PATCH v2 16/23] target-arm: Use arm_current_sctlr to access SCTLR

2014-05-22 Thread Edgar E. Iglesias

On Tue, May 13, 2014 at 06:16:01PM +0200, Fabian Aggeler wrote:
> Add SCTLR_EL3 and introduce new function to access correct
> instance of SCTLR in different modes/worlds.

Hi,

AArch64 has a couple of insn/regs that do address translation
as seen by other ELs. E.g, from EL3 you can perform address
translation as seen by EL0. See for example ATS12E0R.
AArch32 has similar features, it can also lower S to NS when in S mode.

With regards to arm_current_sctlr() and the use in get_phys_addr, I
don't think it is enough here.

I was planning to post was patches that add new args to get_phys_addr()
with the translation-EL and flags to control if stage-2 translation
should be done or not. That way ats_write() can keep reusing
get_phys_addr(). We would need to pass the security state as an argument
aswell to handle AArch32. Does that make sense?

Cheers,
Edgar

> 
> Signed-off-by: Fabian Aggeler 
> ---
>  hw/arm/pxa2xx.c|  2 +-
>  target-arm/cpu-qom.h   |  1 +
>  target-arm/cpu.c   |  4 +--
>  target-arm/cpu.h   | 14 ++-
>  target-arm/helper.c| 67 
> ++
>  target-arm/op_helper.c |  2 +-
>  6 files changed, 69 insertions(+), 21 deletions(-)
> 
> diff --git a/hw/arm/pxa2xx.c b/hw/arm/pxa2xx.c
> index e0cd847..8b69e72 100644
> --- a/hw/arm/pxa2xx.c
> +++ b/hw/arm/pxa2xx.c
> @@ -274,7 +274,7 @@ static void pxa2xx_pwrmode_write(CPUARMState *env, const 
> ARMCPRegInfo *ri,
>  case 3:
>  s->cpu->env.uncached_cpsr = ARM_CPU_MODE_SVC;
>  s->cpu->env.daif = PSTATE_A | PSTATE_F | PSTATE_I;
> -s->cpu->env.cp15.c1_sys = 0;
> +s->cpu->env.cp15.c1_sys_el1 = 0;
>  s->cpu->env.cp15.c1_coproc = 0;
>  s->cpu->env.cp15.ttbr0_el1 = 0;
>  s->cpu->env.cp15.c3 = 0;
> diff --git a/target-arm/cpu-qom.h b/target-arm/cpu-qom.h
> index edc7f26..38cbd43 100644
> --- a/target-arm/cpu-qom.h
> +++ b/target-arm/cpu-qom.h
> @@ -119,6 +119,7 @@ typedef struct ARMCPU {
>  uint32_t mvfr2;
>  uint32_t ctr;
>  uint32_t reset_sctlr;
> +uint32_t reset_sctlr_el3;
>  uint32_t id_pfr0;
>  uint32_t id_pfr1;
>  uint32_t id_dfr0;
> diff --git a/target-arm/cpu.c b/target-arm/cpu.c
> index b0d4a9b..bdbdbb1 100644
> --- a/target-arm/cpu.c
> +++ b/target-arm/cpu.c
> @@ -100,7 +100,7 @@ static void arm_cpu_reset(CPUState *s)
>  #if defined(CONFIG_USER_ONLY)
>  env->pstate = PSTATE_MODE_EL0t;
>  /* Userspace expects access to CTL_EL0 and the cache ops */
> -env->cp15.c1_sys |= SCTLR_UCT | SCTLR_UCI;
> +env->cp15.c1_sys_el1 |= SCTLR_UCT | SCTLR_UCI;
>  /* and to the FP/Neon instructions */
>  env->cp15.c1_coproc = deposit64(env->cp15.c1_coproc, 20, 2, 3);
>  #else
> @@ -146,7 +146,7 @@ static void arm_cpu_reset(CPUState *s)
>  }
>  }
>  
> -if (env->cp15.c1_sys & SCTLR_V) {
> +if (arm_current_sctlr(env) & SCTLR_V) {
>  env->regs[15] = 0x;
>  }
>  
> diff --git a/target-arm/cpu.h b/target-arm/cpu.h
> index a4bb6bd..780c1f5 100644
> --- a/target-arm/cpu.h
> +++ b/target-arm/cpu.h
> @@ -180,7 +180,8 @@ typedef struct CPUARMState {
>  struct {
>  uint32_t c0_cpuid;
>  uint64_t c0_cssel; /* Cache size selection.  */
> -uint64_t c1_sys; /* System control register.  */
> +uint64_t c1_sys_el1; /* System control register (EL1). */
> +uint64_t c1_sys_el3; /* System control register (EL3).  */
>  uint64_t c1_coproc; /* Coprocessor access register.  */
>  uint32_t c1_xscaleauxcr; /* XScale auxiliary control register.  */
>  uint32_t c1_scr; /* secure config register.  */
> @@ -971,6 +972,17 @@ static inline int arm_current_pl(CPUARMState *env)
>  return 1;
>  }
>  
> +static inline uint64_t arm_current_sctlr(CPUARMState *env)
> +{
> +if (is_a64(env) && arm_current_pl(env) == 3) {
> +/* EL3 has its own SCTLR */
> +return env->cp15.c1_sys_el3;
> +} else {
> +/* Only A32 with NS-bit clear accesses secure instance of SCTLR */
> +return A32_MAPPED_EL3_REG_GET(env, c1_sys);
> +}
> +}
> +
>  typedef struct ARMCPRegInfo ARMCPRegInfo;
>  
>  typedef enum CPAccessResult {
> diff --git a/target-arm/helper.c b/target-arm/helper.c
> index 98c3dc9..ac8b15a 100644
> --- a/target-arm/helper.c
> +++ b/target-arm/helper.c
> @@ -1767,7 +1767,7 @@ static void aa64_fpsr_write(CPUARMState *env, const 
> ARMCPRegInfo *ri,
>  
>  static CPAccessResult aa64_daif_access(CPUARMState *env, const ARMCPRegInfo 
> *ri)
>  {
> -if (arm_current_pl(env) == 0 && !(env->cp15.c1_sys & SCTLR_UMA)) {
> +if (arm_current_pl(env) == 0 && !(arm_current_sctlr(env) & SCTLR_UMA)) {
>  return CP_ACCESS_TRAP;
>  }
>  return CP_ACCESS_OK;
> @@ -1785,7 +1785,7 @@ static CPAccessResult aa64_cacheop_access(CPUARMState 
> *env,
>  /* Cache invalidate/clean: NOP, but EL0 must UNDEF unless
>   * SCTLR_EL1.UCI is set.
>   */
> -if

[Qemu-devel] [PATCH v2 0/2] dataplane: Enable "scsi=on"

2014-05-22 Thread Fam Zheng

This makes the SG_IO code of non-dataplane available to dataplane, so that
dataplane can use to allow scsi=on.

v2:
[1/2] Fix scsi=off case and drop VirtIOBlockReq.scsi.
[2/2] Pass conf to virtio_blk_handle_scsi_req.

Fam


Fam Zheng (2):
  virtio-blk: Factor out virtio_blk_handle_scsi_req from
virtio_blk_handle_scsi
  dataplane: Support VIRTIO_BLK_T_SCSI_CMD

 hw/block/dataplane/virtio-blk.c | 18 +
 hw/block/virtio-blk.c   | 83 +++--
 include/hw/virtio/virtio-blk.h  |  4 ++
 3 files changed, 60 insertions(+), 45 deletions(-)

-- 
1.9.2

[Qemu-devel] [PATCH v2 2/2] dataplane: Support VIRTIO_BLK_T_SCSI_CMD

2014-05-22 Thread Fam Zheng

Signed-off-by: Fam Zheng 
---
 hw/block/dataplane/virtio-blk.c | 18 ++
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index 46a6824..e833045 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -201,6 +201,15 @@ static void do_flush_cmd(VirtIOBlockDataPlane *s, 
VirtQueueElement *elem,
 bdrv_aio_flush(s->blk->conf.bs, complete_flush, req);
 }
 
+static void do_scsi_cmd(VirtIOBlockDataPlane *s, VirtQueueElement *elem,
+QEMUIOVector *inhdr)
+{
+int status;
+
+status = virtio_blk_handle_scsi_req(s->blk->conf.bs, s->blk, elem);
+complete_request_early(s, elem, inhdr, status);
+}
+
 static int process_request(VirtIOBlockDataPlane *s, VirtQueueElement *elem)
 {
 struct iovec *iov = elem->out_sg;
@@ -249,8 +258,7 @@ static int process_request(VirtIOBlockDataPlane *s, 
VirtQueueElement *elem)
 return 0;
 
 case VIRTIO_BLK_T_SCSI_CMD:
-/* TODO support SCSI commands */
-complete_request_early(s, elem, inhdr, VIRTIO_BLK_S_UNSUPP);
+do_scsi_cmd(s, elem, inhdr);
 return 0;
 
 case VIRTIO_BLK_T_FLUSH:
@@ -326,12 +334,6 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, 
VirtIOBlkConf *blk,
 return;
 }
 
-if (blk->scsi) {
-error_setg(errp,
-   "device is incompatible with x-data-plane, use scsi=off");
-return;
-}
-
 /* If dataplane is (re-)enabled while the guest is running there could be
  * block jobs that can conflict.
  */
-- 
1.9.2

[Qemu-devel] [PATCH v2 1/2] virtio-blk: Factor out virtio_blk_handle_scsi_req from virtio_blk_handle_scsi

2014-05-22 Thread Fam Zheng

The common logic to process a scsi request in a VirtQueueElement is
extracted to a function to share with dataplane.

This makes VirtIOBlockReq.scsi unused, so drop it.

Signed-off-by: Fam Zheng 
---
 hw/block/virtio-blk.c  | 83 +++---
 include/hw/virtio/virtio-blk.h |  4 ++
 2 files changed, 50 insertions(+), 37 deletions(-)

diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 5e9433d..78e81a6 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -33,7 +33,6 @@ typedef struct VirtIOBlockReq
 VirtQueueElement elem;
 struct virtio_blk_inhdr *in;
 struct virtio_blk_outhdr *out;
-struct virtio_scsi_inhdr *scsi;
 QEMUIOVector qiov;
 struct VirtIOBlockReq *next;
 BlockAcctCookie acct;
@@ -125,13 +124,16 @@ static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock 
*s)
 return req;
 }
 
-static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
+int virtio_blk_handle_scsi_req(BlockDriverState *bs,
+   VirtIOBlkConf *conf,
+   VirtQueueElement *elem)
 {
+int status = VIRTIO_BLK_S_OK;
+struct virtio_scsi_inhdr *scsi = NULL;
 #ifdef __linux__
-int ret;
 int i;
+struct sg_io_hdr hdr;
 #endif
-int status = VIRTIO_BLK_S_OK;
 
 /*
  * We require at least one output segment each for the virtio_blk_outhdr
@@ -140,63 +142,61 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
  * We also at least require the virtio_blk_inhdr, the virtio_scsi_inhdr
  * and the sense buffer pointer in the input segments.
  */
-if (req->elem.out_num < 2 || req->elem.in_num < 3) {
-virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
-g_free(req);
-return;
+if (elem->out_num < 2 || elem->in_num < 3) {
+status = VIRTIO_BLK_S_IOERR;
+goto fail;
 }
 
 /*
  * The scsi inhdr is placed in the second-to-last input segment, just
  * before the regular inhdr.
  */
-req->scsi = (void *)req->elem.in_sg[req->elem.in_num - 2].iov_base;
+scsi = (void *)elem->in_sg[elem->in_num - 2].iov_base;
 
-if (!req->dev->blk.scsi) {
+/*
+ * No support for bidirection commands yet.
+ */
+if (elem->out_num > 2 && elem->in_num > 3) {
 status = VIRTIO_BLK_S_UNSUPP;
 goto fail;
 }
 
-/*
- * No support for bidirection commands yet.
- */
-if (req->elem.out_num > 2 && req->elem.in_num > 3) {
+if (!conf->scsi) {
 status = VIRTIO_BLK_S_UNSUPP;
 goto fail;
 }
 
 #ifdef __linux__
-struct sg_io_hdr hdr;
 memset(&hdr, 0, sizeof(struct sg_io_hdr));
 hdr.interface_id = 'S';
-hdr.cmd_len = req->elem.out_sg[1].iov_len;
-hdr.cmdp = req->elem.out_sg[1].iov_base;
+hdr.cmd_len = elem->out_sg[1].iov_len;
+hdr.cmdp = elem->out_sg[1].iov_base;
 hdr.dxfer_len = 0;
 
-if (req->elem.out_num > 2) {
+if (elem->out_num > 2) {
 /*
  * If there are more than the minimally required 2 output segments
  * there is write payload starting from the third iovec.
  */
 hdr.dxfer_direction = SG_DXFER_TO_DEV;
-hdr.iovec_count = req->elem.out_num - 2;
+hdr.iovec_count = elem->out_num - 2;
 
 for (i = 0; i < hdr.iovec_count; i++)
-hdr.dxfer_len += req->elem.out_sg[i + 2].iov_len;
+hdr.dxfer_len += elem->out_sg[i + 2].iov_len;
 
-hdr.dxferp = req->elem.out_sg + 2;
+hdr.dxferp = elem->out_sg + 2;
 
-} else if (req->elem.in_num > 3) {
+} else if (elem->in_num > 3) {
 /*
  * If we have more than 3 input segments the guest wants to actually
  * read data.
  */
 hdr.dxfer_direction = SG_DXFER_FROM_DEV;
-hdr.iovec_count = req->elem.in_num - 3;
+hdr.iovec_count = elem->in_num - 3;
 for (i = 0; i < hdr.iovec_count; i++)
-hdr.dxfer_len += req->elem.in_sg[i].iov_len;
+hdr.dxfer_len += elem->in_sg[i].iov_len;
 
-hdr.dxferp = req->elem.in_sg;
+hdr.dxferp = elem->in_sg;
 } else {
 /*
  * Some SCSI commands don't actually transfer any data.
@@ -204,11 +204,11 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
 hdr.dxfer_direction = SG_DXFER_NONE;
 }
 
-hdr.sbp = req->elem.in_sg[req->elem.in_num - 3].iov_base;
-hdr.mx_sb_len = req->elem.in_sg[req->elem.in_num - 3].iov_len;
+hdr.sbp = elem->in_sg[elem->in_num - 3].iov_base;
+hdr.mx_sb_len = elem->in_sg[elem->in_num - 3].iov_len;
 
-ret = bdrv_ioctl(req->dev->bs, SG_IO, &hdr);
-if (ret) {
+status = bdrv_ioctl(bs, SG_IO, &hdr);
+if (status) {
 status = VIRTIO_BLK_S_UNSUPP;
 goto fail;
 }
@@ -224,23 +224,32 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
 hdr.status = CHECK_CONDITION;
 }
 
-stl_p(&req->scsi->errors,
+stl_p(&scsi->errors,
   hdr.statu

Re: [Qemu-devel] [PATCH v2 14/23] target-arm: add banked coprocessor register type and macros

2014-05-22 Thread Edgar E. Iglesias

On Tue, May 13, 2014 at 06:15:59PM +0200, Fabian Aggeler wrote:
> Banked CP registers can be defined with a A32_BANKED_REG macro which defines
> a non-secure instance of the register followed by an adjacent secure instance.
> Using a union makes the code backwards-compatible since the non-secure
> instance can normally be accessed by its existing name.
> 
> When translating a banked CP register access instruction in monitor mode,
> the SCR.NS bit determines which instance is going to be accessed.
> 
> If EL3 is operating in Aarch64 state coprocessor registers are not
> banked anymore but in some cases have its own _EL3 instance.

Hi

Regarding the sctlr regs and the banking of S/NS regs in general, I
think the general pattern should be to arrayify the regs that need
to be indexed by EL.

This is an example of a structure (indexed by EL) with the aarch32
struct beeing here to help clarify.
union {
struct {
uint64_t pad;
uint64_t sctlr_ns;
uint64_t hsctlr;
uint64_t sctlr_s;
} aarch32;
uint64_t sctlr_el[4];
}

I think we would naturally want to register this for AArch32 as banked
with NS=sctlr_el[1] and S=sctlr_el[3].

Another register example is FAR. For FAR, things look like this
(when EL2 is available and ignoring DFAR for clarity):
union {
struct {
uint64_t pad;
uint64_t ifar_ns;
uint64_t ifar_s;
} aarch32;
uint64_t far_el[4];
}

Preferably we need something that can handle both cases.
An option could be to arrayify the .fieldoffset in reginfos?
If we don't want hardcoded TZ knowledge in the generic cpreg accessors,
maybe there could be a generic function that returns the .fieldoffset
index based on CPUState (e.g NS=0, S=1 etc). Or maybe specialized
read/write fns would be enough.

Just an idea to get the discussion going.

struct ARMCPRegInfo {

union {
ptrdiff_t fieldoffset;
ptrdiff_t fieldoffsets[2];
};
};

unsigned int arm_cpreg_tzbank_idx(CPUARMState *env)
{
return is_a64(env) ? 0 : arm_is_secure(env);
}

Example:
{ .name = "FAR_EL1", .state = ARM_CP_STATE_BOTH,
  .opc0 = 3, .crn = 6, .crm = 0, .opc1 = 0, .opc2 = 0,
  .access = PL1_RW,
  .fieldindex_fn = arm_cpreg_tzbank_idx,
  .fieldoffsets[] = { offsetof(CPUARMState, cp15.far_el[1]),
  offsetof(CPUARMState, cp15.far_el[2])},
  .resetvalue = 0, },

  ARMCPRegInfo sctlr = {
  .name = "SCTLR", .state = ARM_CP_STATE_BOTH,
  .opc0 = 3, .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 0,
  .access = PL1_RW,
  .fieldindex_fn = arm_cpreg_tzbank_idx,
  .fieldoffsets[] = { offsetof(CPUARMState, cp15.sctlr_el[1]),
  offsetof(CPUARMState, cp15.sctlr_el[3]),
  },
  /* Assuming raw_write and raw_read respect the indexing.  */
  .writefn = sctlr_write, .resetvalue = cpu->reset_sctlr,
  .raw_writefn = raw_write,
  };

Best regards,
Edgar





> 
> Signed-off-by: Sergey Fedorov 
> Signed-off-by: Fabian Aggeler 
> ---
>  target-arm/cpu.h   | 121 
> +
>  target-arm/helper.c|  64 --
>  target-arm/translate.c |  19 +---
>  3 files changed, 184 insertions(+), 20 deletions(-)
> 
> diff --git a/target-arm/cpu.h b/target-arm/cpu.h
> index a970d55..9e325ac 100644
> --- a/target-arm/cpu.h
> +++ b/target-arm/cpu.h
> @@ -80,6 +80,16 @@
>  #define offsetofhigh32(S, M) (offsetof(S, M) + sizeof(uint32_t))
>  #endif
>  
> +/* Define a banked coprocessor register state field. Use %name as the
> + * register field name for the secure instance. The non-secure instance
> + * has a "_nonsecure" suffix.
> + */
> +#define A32_BANKED_REG(type, name) \
> +union { \
> +type name; \
> +type name##_banked[2]; \
> +}
> +
>  /* Meanings of the ARMCPU object's two inbound GPIO lines */
>  #define ARM_CPU_IRQ 0
>  #define ARM_CPU_FIQ 1
> @@ -89,6 +99,7 @@ typedef void ARMWriteCPFunc(void *opaque, int cp_info,
>  typedef uint32_t ARMReadCPFunc(void *opaque, int cp_info,
> int dstreg, int operand);
>  
> +
>  struct arm_boot_info;
>  
>  #define NB_MMU_MODES 5
> @@ -673,6 +684,78 @@ static inline bool arm_el_is_aa64(CPUARMState *env, int 
> el)
>  return arm_feature(env, ARM_FEATURE_AARCH64);
>  }
>  
> +/* When EL3 is operating in Aarch32 state, the NS-bit determines
> + * whether the secure instance of a cp-register should be used. */
> +#define USE_SECURE_REG(env) ( \
> +arm_feature(env, ARM_FEATURE_SECURITY_EXTENSIONS) && 
> \
> +!arm_el_is_aa64(env, 3) && \
> +!((env)->cp15.c1_scr & 1/*NS*/))
> +
> +#define NONSECURE_BANK 0
> +#define SECURE_BANK 1
> +
> +#define A32_BANKED_REG_GET(env, regname) \
> +((USE_SECURE_REG(env)) ? \
> +(env)->cp15.regname##_banked[SECURE_BANK] : \
> +(env)->cp15.regname)
>

Re: [Qemu-devel] [PATCH v2 1/2] virtio-blk: Factor out virtio_blk_handle_scsi_req from virtio_blk_handle_scsi

2014-05-22 Thread Paolo Bonzini


Il 22/05/2014 09:37, Fam Zheng ha scritto:

-static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
+int virtio_blk_handle_scsi_req(BlockDriverState *bs,
+   VirtIOBlkConf *conf,
+   VirtQueueElement *elem)


Two more comments...

please pass the VirtIOBlock * instead of bs/conf here.


 {
+int status = VIRTIO_BLK_S_OK;
+struct virtio_scsi_inhdr *scsi = NULL;
 #ifdef __linux__
-int ret;
 int i;
+struct sg_io_hdr hdr;
 #endif
-int status = VIRTIO_BLK_S_OK;

 /*
  * We require at least one output segment each for the virtio_blk_outhdr
@@ -140,63 +142,61 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
  * We also at least require the virtio_blk_inhdr, the virtio_scsi_inhdr
  * and the sense buffer pointer in the input segments.
  */
-if (req->elem.out_num < 2 || req->elem.in_num < 3) {
-virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
-g_free(req);
-return;
+if (elem->out_num < 2 || elem->in_num < 3) {
+status = VIRTIO_BLK_S_IOERR;
+goto fail;
 }

 /*
  * The scsi inhdr is placed in the second-to-last input segment, just
  * before the regular inhdr.
  */
-req->scsi = (void *)req->elem.in_sg[req->elem.in_num - 2].iov_base;
+scsi = (void *)elem->in_sg[elem->in_num - 2].iov_base;

-if (!req->dev->blk.scsi) {
+/*
+ * No support for bidirection commands yet.
+ */
+if (elem->out_num > 2 && elem->in_num > 3) {
 status = VIRTIO_BLK_S_UNSUPP;
 goto fail;
 }

-/*
- * No support for bidirection commands yet.
- */
-if (req->elem.out_num > 2 && req->elem.in_num > 3) {
+if (!conf->scsi) {
 status = VIRTIO_BLK_S_UNSUPP;
 goto fail;
 }


Swapping the two conditionals unnecessarily makes the patch bigger.

Paolo


 #ifdef __linux__
-struct sg_io_hdr hdr;
 memset(&hdr, 0, sizeof(struct sg_io_hdr));
 hdr.interface_id = 'S';
-hdr.cmd_len = req->elem.out_sg[1].iov_len;
-hdr.cmdp = req->elem.out_sg[1].iov_base;
+hdr.cmd_len = elem->out_sg[1].iov_len;
+hdr.cmdp = elem->out_sg[1].iov_base;
 hdr.dxfer_len = 0;

-if (req->elem.out_num > 2) {
+if (elem->out_num > 2) {
 /*
  * If there are more than the minimally required 2 output segments
  * there is write payload starting from the third iovec.
  */
 hdr.dxfer_direction = SG_DXFER_TO_DEV;
-hdr.iovec_count = req->elem.out_num - 2;
+hdr.iovec_count = elem->out_num - 2;

 for (i = 0; i < hdr.iovec_count; i++)
-hdr.dxfer_len += req->elem.out_sg[i + 2].iov_len;
+hdr.dxfer_len += elem->out_sg[i + 2].iov_len;

-hdr.dxferp = req->elem.out_sg + 2;
+hdr.dxferp = elem->out_sg + 2;

-} else if (req->elem.in_num > 3) {
+} else if (elem->in_num > 3) {
 /*
  * If we have more than 3 input segments the guest wants to actually
  * read data.
  */
 hdr.dxfer_direction = SG_DXFER_FROM_DEV;
-hdr.iovec_count = req->elem.in_num - 3;
+hdr.iovec_count = elem->in_num - 3;
 for (i = 0; i < hdr.iovec_count; i++)
-hdr.dxfer_len += req->elem.in_sg[i].iov_len;
+hdr.dxfer_len += elem->in_sg[i].iov_len;

-hdr.dxferp = req->elem.in_sg;
+hdr.dxferp = elem->in_sg;
 } else {
 /*
  * Some SCSI commands don't actually transfer any data.
@@ -204,11 +204,11 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
 hdr.dxfer_direction = SG_DXFER_NONE;
 }

-hdr.sbp = req->elem.in_sg[req->elem.in_num - 3].iov_base;
-hdr.mx_sb_len = req->elem.in_sg[req->elem.in_num - 3].iov_len;
+hdr.sbp = elem->in_sg[elem->in_num - 3].iov_base;
+hdr.mx_sb_len = elem->in_sg[elem->in_num - 3].iov_len;

-ret = bdrv_ioctl(req->dev->bs, SG_IO, &hdr);
-if (ret) {
+status = bdrv_ioctl(bs, SG_IO, &hdr);
+if (status) {
 status = VIRTIO_BLK_S_UNSUPP;
 goto fail;
 }
@@ -224,23 +224,32 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
 hdr.status = CHECK_CONDITION;
 }

-stl_p(&req->scsi->errors,
+stl_p(&scsi->errors,
   hdr.status | (hdr.msg_status << 8) |
   (hdr.host_status << 16) | (hdr.driver_status << 24));
-stl_p(&req->scsi->residual, hdr.resid);
-stl_p(&req->scsi->sense_len, hdr.sb_len_wr);
-stl_p(&req->scsi->data_len, hdr.dxfer_len);
+stl_p(&scsi->residual, hdr.resid);
+stl_p(&scsi->sense_len, hdr.sb_len_wr);
+stl_p(&scsi->data_len, hdr.dxfer_len);

-virtio_blk_req_complete(req, status);
-g_free(req);
-return;
+return status;
 #else
 abort();
 #endif

 fail:
 /* Just put anything nonzero so that the ioctl fails in the guest.  */
-stl_p(&req->scsi->errors, 255);
+if (scsi) {
+stl_p(&scsi->errors, 255);
+}
+return status;
+}
+
+stati

[Qemu-devel] [PATCH v3 0/2] dataplane: Enable "scsi=on"

2014-05-22 Thread Fam Zheng

This makes the SG_IO code of non-dataplane available to dataplane, so that
dataplane can use to allow scsi=on.

v3: Change parameter to VirtIOBlock pointer. Undo the swap in code movement.
(Paolo)

v2:
[1/2] Fix scsi=off case and drop VirtIOBlockReq.scsi.
[2/2] Pass conf to virtio_blk_handle_scsi_req.

Fam


Fam Zheng (2):
  virtio-blk: Factor out virtio_blk_handle_scsi_req from
virtio_blk_handle_scsi
  dataplane: Support VIRTIO_BLK_T_SCSI_CMD

 hw/block/dataplane/virtio-blk.c | 18 +-
 hw/block/virtio-blk.c   | 75 ++---
 include/hw/virtio/virtio-blk.h  |  3 ++
 3 files changed, 54 insertions(+), 42 deletions(-)

-- 
1.9.2

[Qemu-devel] [PATCH v3 1/2] virtio-blk: Factor out virtio_blk_handle_scsi_req from virtio_blk_handle_scsi

2014-05-22 Thread Fam Zheng

The common logic to process a scsi request in a VirtQueueElement is
extracted to a function to share with dataplane.

This makes VirtIOBlockReq.scsi unused, so drop it.

Signed-off-by: Fam Zheng 
---
 hw/block/virtio-blk.c  | 75 +++---
 include/hw/virtio/virtio-blk.h |  3 ++
 2 files changed, 44 insertions(+), 34 deletions(-)

diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 5e9433d..0b1446e 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -33,7 +33,6 @@ typedef struct VirtIOBlockReq
 VirtQueueElement elem;
 struct virtio_blk_inhdr *in;
 struct virtio_blk_outhdr *out;
-struct virtio_scsi_inhdr *scsi;
 QEMUIOVector qiov;
 struct VirtIOBlockReq *next;
 BlockAcctCookie acct;
@@ -125,13 +124,15 @@ static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock 
*s)
 return req;
 }
 
-static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
+int virtio_blk_handle_scsi_req(VirtIOBlock *blk,
+   VirtQueueElement *elem)
 {
+int status = VIRTIO_BLK_S_OK;
+struct virtio_scsi_inhdr *scsi = NULL;
 #ifdef __linux__
-int ret;
 int i;
+struct sg_io_hdr hdr;
 #endif
-int status = VIRTIO_BLK_S_OK;
 
 /*
  * We require at least one output segment each for the virtio_blk_outhdr
@@ -140,19 +141,18 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
  * We also at least require the virtio_blk_inhdr, the virtio_scsi_inhdr
  * and the sense buffer pointer in the input segments.
  */
-if (req->elem.out_num < 2 || req->elem.in_num < 3) {
-virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
-g_free(req);
-return;
+if (elem->out_num < 2 || elem->in_num < 3) {
+status = VIRTIO_BLK_S_IOERR;
+goto fail;
 }
 
 /*
  * The scsi inhdr is placed in the second-to-last input segment, just
  * before the regular inhdr.
  */
-req->scsi = (void *)req->elem.in_sg[req->elem.in_num - 2].iov_base;
+scsi = (void *)elem->in_sg[elem->in_num - 2].iov_base;
 
-if (!req->dev->blk.scsi) {
+if (!blk->blk.scsi) {
 status = VIRTIO_BLK_S_UNSUPP;
 goto fail;
 }
@@ -160,43 +160,42 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
 /*
  * No support for bidirection commands yet.
  */
-if (req->elem.out_num > 2 && req->elem.in_num > 3) {
+if (elem->out_num > 2 && elem->in_num > 3) {
 status = VIRTIO_BLK_S_UNSUPP;
 goto fail;
 }
 
 #ifdef __linux__
-struct sg_io_hdr hdr;
 memset(&hdr, 0, sizeof(struct sg_io_hdr));
 hdr.interface_id = 'S';
-hdr.cmd_len = req->elem.out_sg[1].iov_len;
-hdr.cmdp = req->elem.out_sg[1].iov_base;
+hdr.cmd_len = elem->out_sg[1].iov_len;
+hdr.cmdp = elem->out_sg[1].iov_base;
 hdr.dxfer_len = 0;
 
-if (req->elem.out_num > 2) {
+if (elem->out_num > 2) {
 /*
  * If there are more than the minimally required 2 output segments
  * there is write payload starting from the third iovec.
  */
 hdr.dxfer_direction = SG_DXFER_TO_DEV;
-hdr.iovec_count = req->elem.out_num - 2;
+hdr.iovec_count = elem->out_num - 2;
 
 for (i = 0; i < hdr.iovec_count; i++)
-hdr.dxfer_len += req->elem.out_sg[i + 2].iov_len;
+hdr.dxfer_len += elem->out_sg[i + 2].iov_len;
 
-hdr.dxferp = req->elem.out_sg + 2;
+hdr.dxferp = elem->out_sg + 2;
 
-} else if (req->elem.in_num > 3) {
+} else if (elem->in_num > 3) {
 /*
  * If we have more than 3 input segments the guest wants to actually
  * read data.
  */
 hdr.dxfer_direction = SG_DXFER_FROM_DEV;
-hdr.iovec_count = req->elem.in_num - 3;
+hdr.iovec_count = elem->in_num - 3;
 for (i = 0; i < hdr.iovec_count; i++)
-hdr.dxfer_len += req->elem.in_sg[i].iov_len;
+hdr.dxfer_len += elem->in_sg[i].iov_len;
 
-hdr.dxferp = req->elem.in_sg;
+hdr.dxferp = elem->in_sg;
 } else {
 /*
  * Some SCSI commands don't actually transfer any data.
@@ -204,11 +203,11 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
 hdr.dxfer_direction = SG_DXFER_NONE;
 }
 
-hdr.sbp = req->elem.in_sg[req->elem.in_num - 3].iov_base;
-hdr.mx_sb_len = req->elem.in_sg[req->elem.in_num - 3].iov_len;
+hdr.sbp = elem->in_sg[elem->in_num - 3].iov_base;
+hdr.mx_sb_len = elem->in_sg[elem->in_num - 3].iov_len;
 
-ret = bdrv_ioctl(req->dev->bs, SG_IO, &hdr);
-if (ret) {
+status = bdrv_ioctl(blk->bs, SG_IO, &hdr);
+if (status) {
 status = VIRTIO_BLK_S_UNSUPP;
 goto fail;
 }
@@ -224,23 +223,31 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
 hdr.status = CHECK_CONDITION;
 }
 
-stl_p(&req->scsi->errors,
+stl_p(&scsi->errors,
   hdr.status | (hdr.msg_status << 8) |

[Qemu-devel] [PATCH v3 2/2] dataplane: Support VIRTIO_BLK_T_SCSI_CMD

2014-05-22 Thread Fam Zheng

Signed-off-by: Fam Zheng 
---
 hw/block/dataplane/virtio-blk.c | 18 ++
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index 46a6824..03cbdec 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -201,6 +201,15 @@ static void do_flush_cmd(VirtIOBlockDataPlane *s, 
VirtQueueElement *elem,
 bdrv_aio_flush(s->blk->conf.bs, complete_flush, req);
 }
 
+static void do_scsi_cmd(VirtIOBlockDataPlane *s, VirtQueueElement *elem,
+QEMUIOVector *inhdr)
+{
+int status;
+
+status = virtio_blk_handle_scsi_req(VIRTIO_BLK(s->vdev), elem);
+complete_request_early(s, elem, inhdr, status);
+}
+
 static int process_request(VirtIOBlockDataPlane *s, VirtQueueElement *elem)
 {
 struct iovec *iov = elem->out_sg;
@@ -249,8 +258,7 @@ static int process_request(VirtIOBlockDataPlane *s, 
VirtQueueElement *elem)
 return 0;
 
 case VIRTIO_BLK_T_SCSI_CMD:
-/* TODO support SCSI commands */
-complete_request_early(s, elem, inhdr, VIRTIO_BLK_S_UNSUPP);
+do_scsi_cmd(s, elem, inhdr);
 return 0;
 
 case VIRTIO_BLK_T_FLUSH:
@@ -326,12 +334,6 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, 
VirtIOBlkConf *blk,
 return;
 }
 
-if (blk->scsi) {
-error_setg(errp,
-   "device is incompatible with x-data-plane, use scsi=off");
-return;
-}
-
 /* If dataplane is (re-)enabled while the guest is running there could be
  * block jobs that can conflict.
  */
-- 
1.9.2

Re: [Qemu-devel] [PATCH v3 0/2] dataplane: Enable "scsi=on"

2014-05-22 Thread Paolo Bonzini


Il 22/05/2014 10:22, Fam Zheng ha scritto:

This makes the SG_IO code of non-dataplane available to dataplane, so that
dataplane can use to allow scsi=on.

v3: Change parameter to VirtIOBlock pointer. Undo the swap in code movement.
(Paolo)

v2:
[1/2] Fix scsi=off case and drop VirtIOBlockReq.scsi.
[2/2] Pass conf to virtio_blk_handle_scsi_req.

Fam


Fam Zheng (2):
  virtio-blk: Factor out virtio_blk_handle_scsi_req from
virtio_blk_handle_scsi
  dataplane: Support VIRTIO_BLK_T_SCSI_CMD

 hw/block/dataplane/virtio-blk.c | 18 +-
 hw/block/virtio-blk.c   | 75 ++---
 include/hw/virtio/virtio-blk.h  |  3 ++
 3 files changed, 54 insertions(+), 42 deletions(-)



Reviewed-by: Paolo Bonzini

Re: [Qemu-devel] [PATCH v1 RFC 6/6] KVM: s390: add cpu model support

2014-05-22 Thread Paolo Bonzini


Il 22/05/2014 10:23, Michael Mueller ha scritto:

On Wed, 21 May 2014 15:22:35 +0200
Alexander Graf  wrote:

I have seen the slides from Eduardo which he presented during this years
DevConf in Brno and made my comments according the s390x implementation
on that. Is you will see, this is mostly overlapping except for the model
definition authority that I clearly see on qemu's side.

See pdf attachment.


More comments:

- "Only one machine type in s390 case which is -machine s390-virtio-ccw"

This probably should change sooner or later, as soon as the 
implementation becomes stable enough.  Versioning is necessary for live 
migration across different QEMU version.  Perhaps start versioning in 
2.2, i.e. start making s390-virtio-ccw-2.1 an alias for s390-virtio-ccw now?


Note that new virtio device features can appear at any time outside the 
s390 code, and will take part in versioning as well.


- "No enforce option"

Strongly suggest making enforce the only possible behavior.

- "Not in the s390x case, because the KVM facility mask limits the cpu 
model specific facilities"


What if the KVM facility mask changes?  For x86, nowadays new CPUID bits 
are only introduced in KVM when a new processors comes out.  But if we 
introduced an older CPUID bit, it would be a huge complication for 
backwards compatibility.  Is it different for s390?


Paolo

Re: [Qemu-devel] [PATCH v2 04/23] target-arm: preserve RAO/WI bits of ARMv7 SCTLR

2014-05-22 Thread Aggeler Fabian


On 21 May 2014, at 18:12, Peter Maydell  wrote:

> On 14 May 2014 06:43, Sergey Fedorov  wrote:
>> 
>> On 13.05.2014 20:15, Fabian Aggeler wrote:
>>> From: Svetlana Fedoseeva 
>>> 
>>> Signed-off-by: Svetlana Fedoseeva 
>>> Signed-off-by: Sergey Fedorov 
>>> Signed-off-by: Fabian Aggeler 
>>> ---
>>> target-arm/helper.c | 5 +
>>> 1 file changed, 5 insertions(+)
>>> 
>>> diff --git a/target-arm/helper.c b/target-arm/helper.c
>>> index 9c3269f..2b57ad9 100644
>>> --- a/target-arm/helper.c
>>> +++ b/target-arm/helper.c
>>> @@ -2083,6 +2083,11 @@ static void sctlr_write(CPUARMState *env, const 
>>> ARMCPRegInfo *ri,
>>> {
>>> ARMCPU *cpu = arm_env_get_cpu(env);
>>> 
>>> +if (arm_feature(env, ARM_FEATURE_V7)) {
>>> +value |= SCTLR_XP | SCTLR_U | SCTLR_nTWE | SCTLR_nTWI | SCTLR_L
>>> +| SCTLR_CP15BEN | SCTLR_P; /* These bits are RAO/WI */
>> 
>> Actually, some of these bits are RAO/WI since v6. Also, there are some
>> RAZ/WI bits varying over architecture variants. There is some overview
>> at ARM ARM v7-AP section L.7.4. Maybe it is worth to fix more precisely
>> over supported architecture variants? By the way, this patch could be
>> separated from security extensions support patch set.
> 
> Agreed. Our compliance for bits that should-be-0/1 is not great,
> but if we don't actually need to do those fixes for TZ support
> then they're probably better separated out (ie drop them from
> this patchset for the moment and submit them separately or
> later...)
> 
> Also for v8 many of these RAZ/RAO bits become RES0/RES1 and
> the rules are different...
> 
> thanks
> — PMM

Okay, I will separate them and submit them separately.

Thanks,
Fabian

[Qemu-devel] [PATCH] virtio-pci: report an error when disable msix

2014-05-22 Thread Amos Kong

QEMU remains 4k memory for PCI BAR, each msix entry takes 16 bytes.
If user assigns more than 128 vectors, msix resource isn't enough,
so msix will be disabled.

This patch addes a note when fail to init exclusive bars for msix.

 qemu -device virtio-net-pci,netdev=h1,vectors=129,mq=on \
  -netdev tap,id=h1,queues=8

Signed-off-by: Amos Kong 
---
 hw/virtio/virtio-pci.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index ce97514..ea5dcdf 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -976,6 +976,8 @@ static void virtio_pci_device_plugged(DeviceState *d)
 
 if (proxy->nvectors &&
 msix_init_exclusive_bar(&proxy->pci_dev, proxy->nvectors, 1)) {
+error_report("%s: unable to init exclusive bars for msix, disable 
msix",
+ __func__);
 proxy->nvectors = 0;
 }
 
-- 
1.9.0

Re: [Qemu-devel] [PATCH v2 01/23] target-arm: add new CPU feature for Security Extensions

2014-05-22 Thread Aggeler Fabian


On 21 May 2014, at 16:51, Peter Maydell  wrote:

> On 13 May 2014 17:15, Fabian Aggeler  wrote:
>> --- a/target-arm/cpu.h
>> +++ b/target-arm/cpu.h
>> @@ -631,6 +631,7 @@ enum arm_features {
>> ARM_FEATURE_CBAR, /* has cp15 CBAR */
>> ARM_FEATURE_CRC, /* ARMv8 CRC instructions */
>> ARM_FEATURE_CBAR_RO, /* has cp15 CBAR and it is read-only */
>> +ARM_FEATURE_SECURITY_EXTENSIONS, /* has Security Extensions */
>> };
> 
> Also this feature name is pretty long and unwieldy; something
> shorter would be nice. I think we should probably go with
> ARM_FEATURE_EL2/ARM_FEATURE_EL3 as per Edgar's
> patchset.
> 
> thanks
> -- PMM

Makes sense, I will use Edgar’s ARM_FEATURE_EL3 in the next version.

Fabian

Re: [Qemu-devel] [PATCH] virtio-pci: report an error when disable msix

2014-05-22 Thread Michael S. Tsirkin

On Thu, May 22, 2014 at 05:02:17PM +0800, Amos Kong wrote:
> QEMU remains 4k memory for PCI BAR, each msix entry takes 16 bytes.
> If user assigns more than 128 vectors, msix resource isn't enough,
> so msix will be disabled.
> 
> This patch addes a note when fail to init exclusive bars for msix.
> 
>  qemu -device virtio-net-pci,netdev=h1,vectors=129,mq=on \
>   -netdev tap,id=h1,queues=8
> 
> Signed-off-by: Amos Kong 

OK I guess, but how about removing the limit instead?

> ---
>  hw/virtio/virtio-pci.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> index ce97514..ea5dcdf 100644
> --- a/hw/virtio/virtio-pci.c
> +++ b/hw/virtio/virtio-pci.c
> @@ -976,6 +976,8 @@ static void virtio_pci_device_plugged(DeviceState *d)
>  
>  if (proxy->nvectors &&
>  msix_init_exclusive_bar(&proxy->pci_dev, proxy->nvectors, 1)) {
> +error_report("%s: unable to init exclusive bars for msix, disable 
> msix",
> + __func__);
>  proxy->nvectors = 0;
>  }
>  
> -- 
> 1.9.0

Re: [Qemu-devel] [PATCH] virtio-pci: report an error when disable msix

2014-05-22 Thread Peter Maydell

On 22 May 2014 10:02, Amos Kong  wrote:
> QEMU remains 4k memory for PCI BAR, each msix entry takes 16 bytes.
> If user assigns more than 128 vectors, msix resource isn't enough,
> so msix will be disabled.
>
> This patch addes a note when fail to init exclusive bars for msix.
>
>  qemu -device virtio-net-pci,netdev=h1,vectors=129,mq=on \
>   -netdev tap,id=h1,queues=8
>
> Signed-off-by: Amos Kong 
> ---
>  hw/virtio/virtio-pci.c | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> index ce97514..ea5dcdf 100644
> --- a/hw/virtio/virtio-pci.c
> +++ b/hw/virtio/virtio-pci.c
> @@ -976,6 +976,8 @@ static void virtio_pci_device_plugged(DeviceState *d)
>
>  if (proxy->nvectors &&
>  msix_init_exclusive_bar(&proxy->pci_dev, proxy->nvectors, 1)) {
> +error_report("%s: unable to init exclusive bars for msix, disable 
> msix",
> + __func__);

I think you probably mean "disabling".

>  proxy->nvectors = 0;
>  }

thanks
-- PMM

[Qemu-devel] [PATCH] docs: clarify that qcow2 file size is not always a cluster multiple

2014-05-22 Thread Stefan Hajnoczi

Normally one would expect that qcow2 image file lengths are multiples of
the cluster size.  This is not true in all cases and the spec should
document this so implementers remember to accept such files.

$ qemu-img create -f qcow2 foo.qcow2 2G
Formatting 'foo.qcow2', fmt=qcow2 size=2147483648 encryption=off 
cluster_size=65536 lazy_refcounts=off
$ ls -l foo.qcow2
-rw-r--r-- 1 stefanha stefanha 197120 May 22 11:40 foo.qcow2
$ bc -q
3 * (64 * 1024) + 512
197120

The extra sector are the 4 L1 table entries that a 2 GB disk with 64 KB
cluster size needs.  The rest of the L1 table is omitted from the file
but allocation will continue at the next cluster boundary.

Reported-by: Maria Kustova 
Signed-off-by: Stefan Hajnoczi 
---
 docs/specs/qcow2.txt | 4 
 1 file changed, 4 insertions(+)

diff --git a/docs/specs/qcow2.txt b/docs/specs/qcow2.txt
index f19536a..a46ee57 100644
--- a/docs/specs/qcow2.txt
+++ b/docs/specs/qcow2.txt
@@ -4,6 +4,10 @@ A qcow2 image file is organized in units of constant size, 
which are called
 (host) clusters. A cluster is the unit in which all allocations are done,
 both for actual guest data and for image metadata.
 
+If the end of the image file is not on a cluster boundary, the missing data is
+treated as zeroes.  This layout can occur when an L1 table or refcount table is
+the last thing in the file and not all entries in the table are used.
+
 Likewise, the virtual disk as seen by the guest is divided into (guest)
 clusters of the same size.
 
-- 
1.9.0

Re: [Qemu-devel] [Xen-devel] [v2][PATCH 4/8] xen, gfx passthrough: reserve 00:02.0 for INTEL IGD

2014-05-22 Thread Konrad Rzeszutek Wilk

On Thu, May 22, 2014 at 07:39:36AM +0200, Gerd Hoffmann wrote:
>   Hi,
> 
> > > According to our discussions, I realize we may have some plans or policies
> > > dedicated to how to assign devfn, but to support GFX passthrough for XEN, 
> > > I
> > > think currently it may be a better solution to adopt #1 simply like this:
> > > 
> > > diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index eaf3e61..500b3c2
> > > 100644
> > > --- a/hw/i386/pc_piix.c
> > > +++ b/hw/i386/pc_piix.c
> > > @@ -386,7 +386,7 @@ static void pc_xen_hvm_init(QEMUMachineInitArgs
> > > *args)
> > > 
> > >  bus = pci_find_primary_bus();
> > >  if (bus != NULL) {
> > > -pci_create_simple(bus, -1, "xen-platform");
> > > +pci_create_simple(bus, PCI_DEVFN(3,0), "xen-platform");
> > >  }
> > >  }
> > >  #endif
> > > 
> > > Then we can go out to plan how to assign devfn in common, is this fine?
> 
> Somewhere else in the thread someone listed a libxl config file snippet
> which is supposed to disable the xen platform device.  Which doesn't
> work with upstream qemu for some reason.
> 
> Searching ...   ah, here:
> http://lists.gnu.org/archive/html/qemu-devel/2014-05/msg03996.html
> 
> So, how about making "xen_platform_pci=0" working correctly ?
> 

You don't want that enabled by default.
Otherwise the Linux (and Windows) drivers won't enable
the PV functionality and we are using the emulated drivers (slow).

> Another useful thing would be to not create the xen platform device in
> case "-nodefaults" was specified on the command line (that switch turns
> off a bunch of other devices present by default: vga, nic, cdrom, ...).
> 
> cheers,
>   Gerd
> 
> 
> 
> ___
> Xen-devel mailing list
> xen-de...@lists.xen.org
> http://lists.xen.org/xen-devel

Re: [Qemu-devel] [PATCH 8/9] spapr_iommu: Introduce page_shift in sPAPRTCETable

2014-05-22 Thread Alexander Graf



On 22.05.14 01:45, Alexey Kardashevskiy wrote:

On 05/22/2014 08:11 AM, Alexander Graf wrote:

On 21.05.14 16:21, Alexey Kardashevskiy wrote:

At the moment only 4K pages are supported by sPAPRTCETable. Since sPAPR
spec allows other page sizes and we are going to implement them, we need
page size to be configrable.

This adds @page_shift into sPAPRTCETable and replaces SPAPR_TCE_PAGE_SHIFT
with it whereever it is possible.

This removes SPAPR_TCE_PAGE_MASK as it is no longer used.

Signed-off-by: Alexey Kardashevskiy 
---
   hw/ppc/spapr_iommu.c   | 54
+-
   hw/ppc/spapr_pci.c |  1 +
   hw/ppc/spapr_vio.c |  1 +
   include/hw/ppc/spapr.h |  3 ++-
   4 files changed, 35 insertions(+), 24 deletions(-)

diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 90de3e3..e765a6d 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -70,12 +70,13 @@ static IOMMUTLBEntry
spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr)
 if (tcet->bypass) {
   ret.perm = IOMMU_RW;
-} else if ((addr >> SPAPR_TCE_PAGE_SHIFT) < tcet->nb_table) {
+} else if ((addr >> tcet->page_shift) < tcet->nb_table) {
   /* Check if we are in bound */
-tce = tcet->table[addr >> SPAPR_TCE_PAGE_SHIFT];
-ret.iova = addr & ~SPAPR_TCE_PAGE_MASK;
-ret.translated_addr = tce & ~SPAPR_TCE_PAGE_MASK;
-ret.addr_mask = SPAPR_TCE_PAGE_MASK;
+target_ulong mask = ~((1 << tcet->page_shift) - 1);

Why target_ulong? This should be u64 or hwaddr or something along those
lines, no? Also, can the mask grow bigger than 31bits? If so you probably
want 1ULL (below as well).

In fact, we might be better off with a few more fields to tcet. Just add
page_mask and page_size in addition to the page_shift one and use them
instead of calculating them over and over again.


+tce = tcet->table[addr >> tcet->page_shift];
+ret.iova = addr & mask;
+ret.translated_addr = tce & mask;
+ret.addr_mask = ~mask;
   ret.perm = tce;
   }
   trace_spapr_iommu_xlate(tcet->liobn, addr, ret.iova, ret.perm,
@@ -113,7 +114,7 @@ static int spapr_tce_table_realize(DeviceState *dev)
   if (kvm_enabled()) {
   tcet->table = kvmppc_create_spapr_tce(tcet->liobn,
 tcet->nb_table <<
-  SPAPR_TCE_PAGE_SHIFT,
+  tcet->page_shift,
 &tcet->fd);
   }
   @@ -133,6 +134,7 @@ static int spapr_tce_table_realize(DeviceState *dev)
   }
 sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn,
+   uint32_t page_shift,
  uint32_t nb_table)
   {
   sPAPRTCETable *tcet;
@@ -149,6 +151,7 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState
*owner, uint32_t liobn,
 tcet = SPAPR_TCE_TABLE(object_new(TYPE_SPAPR_TCE_TABLE));
   tcet->liobn = liobn;
+tcet->page_shift = page_shift;
   tcet->nb_table = nb_table;
 object_property_add_child(OBJECT(owner), "tce-table",
OBJECT(tcet), NULL);
@@ -194,19 +197,20 @@ static target_ulong put_tce_emu(sPAPRTCETable
*tcet, target_ulong ioba,
   target_ulong tce)
   {
   IOMMUTLBEntry entry;
+target_ulong mask = ~((1 << tcet->page_shift) - 1);
   -if ((ioba >> SPAPR_TCE_PAGE_SHIFT) >= tcet->nb_table) {
+if ((ioba >> tcet->page_shift) >= tcet->nb_table) {
   hcall_dprintf("spapr_vio_put_tce on out-of-bounds IOBA 0x"
 TARGET_FMT_lx "\n", ioba);
   return H_PARAMETER;
   }
   -tcet->table[ioba >> SPAPR_TCE_PAGE_SHIFT] = tce;
+tcet->table[ioba >> tcet->page_shift] = tce;
 entry.target_as = &address_space_memory,
-entry.iova = ioba & ~SPAPR_TCE_PAGE_MASK;
-entry.translated_addr = tce & ~SPAPR_TCE_PAGE_MASK;
-entry.addr_mask = SPAPR_TCE_PAGE_MASK;
+entry.iova = ioba & mask;
+entry.translated_addr = tce & mask;
+entry.addr_mask = ~mask;
   entry.perm = tce;
   memory_region_notify_iommu(&tcet->iommu, entry);
   @@ -226,6 +230,7 @@ static target_ulong h_put_tce_indirect(PowerPCCPU
*cpu,
   target_ulong ret = H_PARAMETER;
   sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn);
   CPUState *cs = CPU(cpu);
+target_ulong mask;
 if (!tcet) {
   return H_PARAMETER;
@@ -235,12 +240,14 @@ static target_ulong h_put_tce_indirect(PowerPCCPU
*cpu,
   return H_PARAMETER;
   }
   -ioba &= ~SPAPR_TCE_PAGE_MASK;
-tce_list &= ~SPAPR_TCE_PAGE_MASK;
+mask = ~((1 << tcet->page_shift) - 1);
+ioba &= mask;
+
+for (i = 0; i < npages; ++i, ioba += (1 << tcet->page_shift)) {
+target_ulong off = (tce_list & ~SPAPR_TCE_RW) +
+i * sizeof(target_ulong);
+target_ulong tce

Re: [Qemu-devel] [PATCH v2 1/2] openpic: Move definition of openpic_reset

2014-05-22 Thread Alexander Graf



On 22.05.14 06:46, Paul Janzen wrote:

This patch moves the definition of openpic_reset after the various
register read/write functions. No functional change.  It is in
preparation for using the register read/write functions in
openpic_reset.

Signed-off-by: Paul Janzen 


Thanks, applied both to ppc-next.


Alex

Re: [Qemu-devel] [Qemu-ppc] [PATCH] openpic: Initialize destmask at reset

2014-05-22 Thread Alexander Graf



On 21.05.14 21:47, Alexander Graf wrote:



Am 21.05.2014 um 18:33 schrieb Paul Janzen :


On Wed, May 21, 2014 at 12:45 AM, Alexander Graf  wrote:

Or maybe it's safer overall to just call write_IRQreg_idr() instead of
setting idr directly? That would update destmask along the way as well and
we would catch all subtle corner cases.

I considered this initially but decided to go with the simpler
approach.  But you have convinced me. Specifically, the current
implementation does not reset src->output or src->nomask, which
write_IRQreg_idr() will. I will test this change and re-submit the
patch.


Do you have a simple test case for this patch? We seem to have the same bug
in the in-kernel KVM MPIC emulation code and I'd like to have it fixed there
as well, but I don't really like to do that change blindly.

I have tested this patch using a proprietary embedded operating system
which doesn't bother to initialize the IDR registers.

Do you think it'd be possible for you to test whether it works with kvm when I 
give you a patch? Do you have real hardware to run kvm on? :)


I just checked - we don't properly implement reset at all in the 
in-kernel MPIC we do in KVM. So it's merely something to keep in mind 
for now until someone sits down and implements proper reset support ;).



Alex

Re: [Qemu-devel] [PATCH 4/7] virtio-blk: use aliases instead of duplicate qdev properties

2014-05-22 Thread Stefan Hajnoczi

On Thu, May 22, 2014 at 12:04 AM, Paolo Bonzini  wrote:
> Il 21/05/2014 22:22, Stefan Hajnoczi ha scritto:
>
>> virtio-blk-pci, virtio-blk-s390, and virtio-blk-ccw all duplicate the
>> qdev properties of their VirtIOBlock child.  This approach does not work
>> well with string or pointer properties since we must be careful about
>> leaking or double-freeing them.
>>
>> Use the QOM alias property to forward property accesses to the
>> VirtIOBlock child.  This way no duplication is necessary.
>>
>> Remember to stop calling virtio_blk_set_conf() so that we don't clobber
>> the values already set on the VirtIOBlock instance.
>
>
> Which properties are _not_ being added?  This is probably needed for all
> other virtio devices so a generic solution would be nice.

I think your idea is something like:

def qdev_add_alias_properties(obj, dev):
for qdev_prop in dev:
object_property_add_alias(obj, qdev_prop.name, dev, qdev_prop.name)

That way we don't explicitly list all properties and duplicate code
when fixing up net, rng, serial, scsi, etc.

I took a quick look at net, rng, and serial.  This approach should
work because the VirtIODevice qdev properties need to be exposed
wholesale on the transport device.

If that's what you're hinting at, I'll try it in v2.

Stefan

Re: [Qemu-devel] [PATCH 4/7] virtio-blk: use aliases instead of duplicate qdev properties

2014-05-22 Thread Andreas Färber

Am 22.05.2014 00:04, schrieb Paolo Bonzini:
> Il 21/05/2014 22:22, Stefan Hajnoczi ha scritto:
>> virtio-blk-pci, virtio-blk-s390, and virtio-blk-ccw all duplicate the
>> qdev properties of their VirtIOBlock child.  This approach does not work
>> well with string or pointer properties since we must be careful about
>> leaking or double-freeing them.
>>
>> Use the QOM alias property to forward property accesses to the
>> VirtIOBlock child.  This way no duplication is necessary.
>>
>> Remember to stop calling virtio_blk_set_conf() so that we don't clobber
>> the values already set on the VirtIOBlock instance.
> 
> Which properties are _not_ being added?  This is probably needed for all
> other virtio devices so a generic solution would be nice.

"type", "realized" and the child<> property for VirtIODevice come to
mind, possibly one or two more.

If we follow a generic scheme, we could add an .instance_post_init hook
for VirtIOPCIProxy iterating over all properties and blacklisting some.

Regards,
Andreas

-- 
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg

Re: [Qemu-devel] [PATCH 1/7] qom: add object_property_add_alias()

2014-05-22 Thread Stefan Hajnoczi

On Thu, May 22, 2014 at 12:05 AM, Paolo Bonzini  wrote:
> Il 21/05/2014 22:22, Stefan Hajnoczi ha scritto:
>> +void object_property_add_alias(Object *obj, const char *name,
>> +   Object *target_obj, const char
>> *target_name,
>> +   Error **errp)
>> +{
>> +AliasProperty *prop;
>> +ObjectProperty *target_prop;
>> +
>> +target_prop = object_property_find(target_obj, target_name, errp);
>> +if (!target_prop) {
>> +return;
>> +}
>> +
>> +prop = g_malloc(sizeof(*prop));
>> +prop->target_obj = target_obj;
>> +prop->target_name = target_name;
>> +
>> +object_property_add(obj, name, target_prop->type,
>> +property_get_alias,
>> +property_set_alias,
>> +property_release_alias,
>> +prop, errp);
>> +}
>
>
> ref target_obj here, and unref in property_release_alias?

No.  I originally did this but realized it is probably not a good idea.

1. If you want to alias a property on the same object instance (foo.a
-> foo.b) this would create a refcount leak (unless someone explicitly
deletes this property to bring the refcount back to 1).
2. The virtio callers already have a child reference and I suspect
other callers would too.  Therefore we don't need to do extra
refcounting.

I had an intermediate version of this patch with a flag so you could
tell object_property_add_alias() whether or not to ref the target
object.  But in the end it seems like overengineering since the
refcount case is rare or non-existent.  The refcount is a convenience
feature just for the case where you want to alias to a random object
that you don't otherwise hold a refcount on (I couldn't think of an
example).

Do you see what I mean?

Stefan

Re: [Qemu-devel] [PATCH 4/7] virtio-blk: use aliases instead of duplicate qdev properties

2014-05-22 Thread Stefan Hajnoczi

On Thu, May 22, 2014 at 12:18 PM, Andreas Färber  wrote:
> Am 22.05.2014 00:04, schrieb Paolo Bonzini:
>> Il 21/05/2014 22:22, Stefan Hajnoczi ha scritto:
>>> virtio-blk-pci, virtio-blk-s390, and virtio-blk-ccw all duplicate the
>>> qdev properties of their VirtIOBlock child.  This approach does not work
>>> well with string or pointer properties since we must be careful about
>>> leaking or double-freeing them.
>>>
>>> Use the QOM alias property to forward property accesses to the
>>> VirtIOBlock child.  This way no duplication is necessary.
>>>
>>> Remember to stop calling virtio_blk_set_conf() so that we don't clobber
>>> the values already set on the VirtIOBlock instance.
>>
>> Which properties are _not_ being added?  This is probably needed for all
>> other virtio devices so a generic solution would be nice.
>
> "type", "realized" and the child<> property for VirtIODevice come to
> mind, possibly one or two more.
>
> If we follow a generic scheme, we could add an .instance_post_init hook
> for VirtIOPCIProxy iterating over all properties and blacklisting some.

I think the trick is to alias all the qdev properties, not the QOM
ones.  That way we get all the explicitly declared properties and none
of the implicit ones.

Stefan

Re: [Qemu-devel] [PATCH 8/9] spapr_iommu: Introduce page_shift in sPAPRTCETable

2014-05-22 Thread Alexey Kardashevskiy

On 05/22/2014 08:09 PM, Alexander Graf wrote:
> 
> On 22.05.14 01:45, Alexey Kardashevskiy wrote:
>> On 05/22/2014 08:11 AM, Alexander Graf wrote:
>>> On 21.05.14 16:21, Alexey Kardashevskiy wrote:
 At the moment only 4K pages are supported by sPAPRTCETable. Since sPAPR
 spec allows other page sizes and we are going to implement them, we need
 page size to be configrable.

 This adds @page_shift into sPAPRTCETable and replaces SPAPR_TCE_PAGE_SHIFT
 with it whereever it is possible.

 This removes SPAPR_TCE_PAGE_MASK as it is no longer used.

 Signed-off-by: Alexey Kardashevskiy 
 ---
hw/ppc/spapr_iommu.c   | 54
 +-
hw/ppc/spapr_pci.c |  1 +
hw/ppc/spapr_vio.c |  1 +
include/hw/ppc/spapr.h |  3 ++-
4 files changed, 35 insertions(+), 24 deletions(-)

 diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
 index 90de3e3..e765a6d 100644
 --- a/hw/ppc/spapr_iommu.c
 +++ b/hw/ppc/spapr_iommu.c
 @@ -70,12 +70,13 @@ static IOMMUTLBEntry
 spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr)
  if (tcet->bypass) {
ret.perm = IOMMU_RW;
 -} else if ((addr >> SPAPR_TCE_PAGE_SHIFT) < tcet->nb_table) {
 +} else if ((addr >> tcet->page_shift) < tcet->nb_table) {
/* Check if we are in bound */
 -tce = tcet->table[addr >> SPAPR_TCE_PAGE_SHIFT];
 -ret.iova = addr & ~SPAPR_TCE_PAGE_MASK;
 -ret.translated_addr = tce & ~SPAPR_TCE_PAGE_MASK;
 -ret.addr_mask = SPAPR_TCE_PAGE_MASK;
 +target_ulong mask = ~((1 << tcet->page_shift) - 1);
>>> Why target_ulong? This should be u64 or hwaddr or something along those
>>> lines, no? Also, can the mask grow bigger than 31bits? If so you probably
>>> want 1ULL (below as well).
>>>
>>> In fact, we might be better off with a few more fields to tcet. Just add
>>> page_mask and page_size in addition to the page_shift one and use them
>>> instead of calculating them over and over again.
>>>
 +tce = tcet->table[addr >> tcet->page_shift];
 +ret.iova = addr & mask;
 +ret.translated_addr = tce & mask;
 +ret.addr_mask = ~mask;
ret.perm = tce;
}
trace_spapr_iommu_xlate(tcet->liobn, addr, ret.iova, ret.perm,
 @@ -113,7 +114,7 @@ static int spapr_tce_table_realize(DeviceState *dev)
if (kvm_enabled()) {
tcet->table = kvmppc_create_spapr_tce(tcet->liobn,
  tcet->nb_table <<
 -  SPAPR_TCE_PAGE_SHIFT,
 +  tcet->page_shift,
  &tcet->fd);
}
@@ -133,6 +134,7 @@ static int spapr_tce_table_realize(DeviceState
 *dev)
}
  sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t
 liobn,
 +   uint32_t page_shift,
   uint32_t nb_table)
{
sPAPRTCETable *tcet;
 @@ -149,6 +151,7 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState
 *owner, uint32_t liobn,
  tcet = SPAPR_TCE_TABLE(object_new(TYPE_SPAPR_TCE_TABLE));
tcet->liobn = liobn;
 +tcet->page_shift = page_shift;
tcet->nb_table = nb_table;
  object_property_add_child(OBJECT(owner), "tce-table",
 OBJECT(tcet), NULL);
 @@ -194,19 +197,20 @@ static target_ulong put_tce_emu(sPAPRTCETable
 *tcet, target_ulong ioba,
target_ulong tce)
{
IOMMUTLBEntry entry;
 +target_ulong mask = ~((1 << tcet->page_shift) - 1);
-if ((ioba >> SPAPR_TCE_PAGE_SHIFT) >= tcet->nb_table) {
 +if ((ioba >> tcet->page_shift) >= tcet->nb_table) {
hcall_dprintf("spapr_vio_put_tce on out-of-bounds IOBA 0x"
  TARGET_FMT_lx "\n", ioba);
return H_PARAMETER;
}
-tcet->table[ioba >> SPAPR_TCE_PAGE_SHIFT] = tce;
 +tcet->table[ioba >> tcet->page_shift] = tce;
  entry.target_as = &address_space_memory,
 -entry.iova = ioba & ~SPAPR_TCE_PAGE_MASK;
 -entry.translated_addr = tce & ~SPAPR_TCE_PAGE_MASK;
 -entry.addr_mask = SPAPR_TCE_PAGE_MASK;
 +entry.iova = ioba & mask;
 +entry.translated_addr = tce & mask;
 +entry.addr_mask = ~mask;
entry.perm = tce;
memory_region_notify_iommu(&tcet->iommu, entry);
@@ -226,6 +230,7 @@ static target_ulong h_put_tce_indirect(PowerPCCPU
 *cpu,
target_ulong ret = H_PARAMETER;
sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn);
CPUState *cs = CPU(cpu);
>

Re: [Qemu-devel] [PATCH 4/7] virtio-blk: use aliases instead of duplicate qdev properties

2014-05-22 Thread Andreas Färber

Am 22.05.2014 12:24, schrieb Stefan Hajnoczi:
> On Thu, May 22, 2014 at 12:18 PM, Andreas Färber  wrote:
>> Am 22.05.2014 00:04, schrieb Paolo Bonzini:
>>> Il 21/05/2014 22:22, Stefan Hajnoczi ha scritto:
 virtio-blk-pci, virtio-blk-s390, and virtio-blk-ccw all duplicate the
 qdev properties of their VirtIOBlock child.  This approach does not work
 well with string or pointer properties since we must be careful about
 leaking or double-freeing them.

 Use the QOM alias property to forward property accesses to the
 VirtIOBlock child.  This way no duplication is necessary.

 Remember to stop calling virtio_blk_set_conf() so that we don't clobber
 the values already set on the VirtIOBlock instance.
>>>
>>> Which properties are _not_ being added?  This is probably needed for all
>>> other virtio devices so a generic solution would be nice.
>>
>> "type", "realized" and the child<> property for VirtIODevice come to
>> mind, possibly one or two more.
>>
>> If we follow a generic scheme, we could add an .instance_post_init hook
>> for VirtIOPCIProxy iterating over all properties and blacklisting some.
> 
> I think the trick is to alias all the qdev properties, not the QOM
> ones.  That way we get all the explicitly declared properties and none
> of the implicit ones.

I wouldn't oppose that, but you then need to iterate over parent classes
until you hit VirtioDeviceClass (or DeviceClass?), to avoid properties
falling through the cracks.

I just figured it easier and in line with your QMP patch to avoid
distinguishing them in new code. But a quick solution is more important
than futureproofness here, so I'll take or ack whatever works here.

Andreas

-- 
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg

Re: [Qemu-devel] [PATCH 8/9] spapr_iommu: Introduce page_shift in sPAPRTCETable

2014-05-22 Thread Alexander Graf



On 22.05.14 12:24, Alexey Kardashevskiy wrote:

On 05/22/2014 08:09 PM, Alexander Graf wrote:

On 22.05.14 01:45, Alexey Kardashevskiy wrote:

On 05/22/2014 08:11 AM, Alexander Graf wrote:

On 21.05.14 16:21, Alexey Kardashevskiy wrote:

At the moment only 4K pages are supported by sPAPRTCETable. Since sPAPR
spec allows other page sizes and we are going to implement them, we need
page size to be configrable.

This adds @page_shift into sPAPRTCETable and replaces SPAPR_TCE_PAGE_SHIFT
with it whereever it is possible.

This removes SPAPR_TCE_PAGE_MASK as it is no longer used.

Signed-off-by: Alexey Kardashevskiy 


[...]


diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index fdd4c07..c9850d4 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -656,6 +656,7 @@ static void spapr_phb_finish_realize(sPAPRPHBState
*sphb, Error **errp)
sPAPRTCETable *tcet;
  tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn,
+   SPAPR_TCE_PAGE_SHIFT,
   0x4000 >> SPAPR_TCE_PAGE_SHIFT);
if (!tcet) {
error_setg(errp, "Unable to create TCE table for %s",
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
index b84e481..d7e9e6a 100644
--- a/hw/ppc/spapr_vio.c
+++ b/hw/ppc/spapr_vio.c
@@ -457,6 +457,7 @@ static int spapr_vio_busdev_init(DeviceState *qdev)
if (pc->rtce_window_size) {
uint32_t liobn = SPAPR_VIO_BASE_LIOBN | dev->reg;
dev->tcet = spapr_tce_new_table(qdev, liobn,
+SPAPR_TCE_PAGE_SHIFT,

I don't quite understand who defines what the TCE page size is for a given
device. Can you try to explain this to me?

If it is default window (for PCI) or window for VIO - it is 4K. If it is a
dynamic DMA window - page size is a parameter of RTAS call which creates
the window.

Could we change that default size for non-dynamic windows somehow? 4k is
really fine grained.

No, this is hardcoded in a million places and old distros. Normally it is
maximum 1GB or 2MB table, not too bad. And with DDW support, we can make
the default one really small (64MB?) and lose even less.

SPAPR:
R1–7.3.31–4. For the Dynamic DMA Windows (DDW) option: The platform must
provide a default DMA window
for each PE, and all of the following must be true:
a. The window is defined by the “ibm,dma-window” property in the OF device
tree.
b. The window is defined with 4 KB I/O pages.
c. The window is located entirely below 4 GB.


Cool, would be interesting to see how that affects performance. I like 
most of this patch set btw, I think it should be good to go in in the 
next round.



Alex

Re: [Qemu-devel] [PATCH v2] spapr: Enable dynamic change of the supported hypercalls list

2014-05-22 Thread Alexander Graf



On 21.05.14 17:21, Alexey Kardashevskiy wrote:

At the moment the "ibm,hypertas-functions" list is fixed. However some
calls should be listed there if they are supported by QEMU or the host
kernel.

This enables hyperrtas_prop to grow on stack by adding
a SPAPR_HYPERRTAS_ADD macro. "qemu,hypertas-functions" is converted as well.

The first user of this is going to be a "multi-tce" property.

Signed-off-by: Alexey Kardashevskiy 
---
Changes:
v2:
* replaced alloca() with GString
---
  hw/ppc/spapr.c | 30 +++---
  1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 0a61246..3b28211 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -293,6 +293,10 @@ static size_t create_page_sizes_prop(CPUPPCState *env, 
uint32_t *prop,
  }  \
  } while (0)
  
+static inline void add_str(GString *s, const gchar *s1)


Please remove the "inline" :). Otherwise this looks a lot nicer than 
before :)



Alex

Re: [Qemu-devel] [PATCH 8/9] spapr_iommu: Introduce page_shift in sPAPRTCETable

2014-05-22 Thread Alexey Kardashevskiy

On 05/22/2014 08:45 PM, Alexander Graf wrote:
> 
> On 22.05.14 12:24, Alexey Kardashevskiy wrote:
>> On 05/22/2014 08:09 PM, Alexander Graf wrote:
>>> On 22.05.14 01:45, Alexey Kardashevskiy wrote:
 On 05/22/2014 08:11 AM, Alexander Graf wrote:
> On 21.05.14 16:21, Alexey Kardashevskiy wrote:
>> At the moment only 4K pages are supported by sPAPRTCETable. Since sPAPR
>> spec allows other page sizes and we are going to implement them, we need
>> page size to be configrable.
>>
>> This adds @page_shift into sPAPRTCETable and replaces
>> SPAPR_TCE_PAGE_SHIFT
>> with it whereever it is possible.
>>
>> This removes SPAPR_TCE_PAGE_MASK as it is no longer used.
>>
>> Signed-off-by: Alexey Kardashevskiy 
> 
> [...]
> 
>> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
>> index fdd4c07..c9850d4 100644
>> --- a/hw/ppc/spapr_pci.c
>> +++ b/hw/ppc/spapr_pci.c
>> @@ -656,6 +656,7 @@ static void spapr_phb_finish_realize(sPAPRPHBState
>> *sphb, Error **errp)
>> sPAPRTCETable *tcet;
>>   tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn,
>> +   SPAPR_TCE_PAGE_SHIFT,
>>0x4000 >> SPAPR_TCE_PAGE_SHIFT);
>> if (!tcet) {
>> error_setg(errp, "Unable to create TCE table for %s",
>> diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
>> index b84e481..d7e9e6a 100644
>> --- a/hw/ppc/spapr_vio.c
>> +++ b/hw/ppc/spapr_vio.c
>> @@ -457,6 +457,7 @@ static int spapr_vio_busdev_init(DeviceState *qdev)
>> if (pc->rtce_window_size) {
>> uint32_t liobn = SPAPR_VIO_BASE_LIOBN | dev->reg;
>> dev->tcet = spapr_tce_new_table(qdev, liobn,
>> +SPAPR_TCE_PAGE_SHIFT,
> I don't quite understand who defines what the TCE page size is for a
> given
> device. Can you try to explain this to me?
 If it is default window (for PCI) or window for VIO - it is 4K. If it is a
 dynamic DMA window - page size is a parameter of RTAS call which creates
 the window.
>>> Could we change that default size for non-dynamic windows somehow? 4k is
>>> really fine grained.
>> No, this is hardcoded in a million places and old distros. Normally it is
>> maximum 1GB or 2MB table, not too bad. And with DDW support, we can make
>> the default one really small (64MB?) and lose even less.
>>
>> SPAPR:
>> R1–7.3.31–4. For the Dynamic DMA Windows (DDW) option: The platform must
>> provide a default DMA window
>> for each PE, and all of the following must be true:
>> a. The window is defined by the “ibm,dma-window” property in the OF device
>> tree.
>> b. The window is defined with 4 KB I/O pages.
>> c. The window is located entirely below 4 GB.
> 
> Cool, would be interesting to see how that affects performance. I like most
> of this patch set btw, I think it should be good to go in in the next round.


10Gbit ethernet is ok but 40Gbit seems to suffer from this limitation.



-- 
Alexey

Re: [Qemu-devel] [PATCH 8/9] spapr_iommu: Introduce page_shift in sPAPRTCETable

2014-05-22 Thread Alexander Graf



On 22.05.14 12:46, Alexey Kardashevskiy wrote:

On 05/22/2014 08:45 PM, Alexander Graf wrote:

On 22.05.14 12:24, Alexey Kardashevskiy wrote:

On 05/22/2014 08:09 PM, Alexander Graf wrote:

On 22.05.14 01:45, Alexey Kardashevskiy wrote:

On 05/22/2014 08:11 AM, Alexander Graf wrote:

On 21.05.14 16:21, Alexey Kardashevskiy wrote:

At the moment only 4K pages are supported by sPAPRTCETable. Since sPAPR
spec allows other page sizes and we are going to implement them, we need
page size to be configrable.

This adds @page_shift into sPAPRTCETable and replaces
SPAPR_TCE_PAGE_SHIFT
with it whereever it is possible.

This removes SPAPR_TCE_PAGE_MASK as it is no longer used.

Signed-off-by: Alexey Kardashevskiy 

[...]


diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index fdd4c07..c9850d4 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -656,6 +656,7 @@ static void spapr_phb_finish_realize(sPAPRPHBState
*sphb, Error **errp)
 sPAPRTCETable *tcet;
   tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn,
+   SPAPR_TCE_PAGE_SHIFT,
0x4000 >> SPAPR_TCE_PAGE_SHIFT);
 if (!tcet) {
 error_setg(errp, "Unable to create TCE table for %s",
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
index b84e481..d7e9e6a 100644
--- a/hw/ppc/spapr_vio.c
+++ b/hw/ppc/spapr_vio.c
@@ -457,6 +457,7 @@ static int spapr_vio_busdev_init(DeviceState *qdev)
 if (pc->rtce_window_size) {
 uint32_t liobn = SPAPR_VIO_BASE_LIOBN | dev->reg;
 dev->tcet = spapr_tce_new_table(qdev, liobn,
+SPAPR_TCE_PAGE_SHIFT,

I don't quite understand who defines what the TCE page size is for a
given
device. Can you try to explain this to me?

If it is default window (for PCI) or window for VIO - it is 4K. If it is a
dynamic DMA window - page size is a parameter of RTAS call which creates
the window.

Could we change that default size for non-dynamic windows somehow? 4k is
really fine grained.

No, this is hardcoded in a million places and old distros. Normally it is
maximum 1GB or 2MB table, not too bad. And with DDW support, we can make
the default one really small (64MB?) and lose even less.

SPAPR:
R1–7.3.31–4. For the Dynamic DMA Windows (DDW) option: The platform must
provide a default DMA window
for each PE, and all of the following must be true:
a. The window is defined by the “ibm,dma-window” property in the OF device
tree.
b. The window is defined with 4 KB I/O pages.
c. The window is located entirely below 4 GB.

Cool, would be interesting to see how that affects performance. I like most
of this patch set btw, I think it should be good to go in in the next round.


10Gbit ethernet is ok but 40Gbit seems to suffer from this limitation.


Oh, I was implying that it'd be interesting to see how we fare when we 
do a really small default DMA window to force the guest to use DDW and 
preferably even with very large pages.



Alex

Re: [Qemu-devel] [RFC 0/3] cpu: add device_add foo-x86_64-cpu support

2014-05-22 Thread Andreas Färber

Hi,

Am 22.05.2014 04:33, schrieb chen.fan.f...@cn.fujitsu.com:
>I think if we want to use 'device/device_add' to implement CPU,
> we must do some check before qemu_init_vcpu(). how can we to do that?

We ran into such problems before... If need be, we can change from the
old parent_realize scheme to the base class calling the derived realize
function in-order, or we can add new hooks to CPUClass as necessary.

Consider me a bit skeptical about MAX_CPUMASK_BITS in 1/3. This should
at least be tied to the maximum allowed for QEMUMachine/MachineClass
rather than hardcoded to 255, which people may forget to synchronize.
There was a recent attempt to increase the limits.

2/3 looks good apart from the subject; I could cherry-pick that, seeing
there is Reviewed-by, if you like.

3/3 is doing a bit much to digest at once for my taste.

Regards,
Andreas

> On Tue, 2014-05-13 at 18:08 +0800, Chen Fan wrote: 
>> this patches tried to make cpu hotplug with device_add, 
>> and made -device foo-x86_64-cpu available，also we can
>> set apic-id property with command line, if without setting
>> apic-id property, we added first unoccupied apic id as the
>> default new apic id. and hotplug cpu with device_add, we
>> must make check of APIC ID after cpu object initialization
>> that was different from 'cpu_add' command which check 'ids'
>> at the beginning.
>>
>> Chen Fan (3):
>>   using CPUMASK bitmaps to calculate cpu index
>>   cpu: introduce CpuTopoInfo structure for argument simplification
>>   cpu: add device_add foo-x86_64-cpu support
>>
>>  exec.c  |  9 +++--
>>  include/qom/cpu.h   | 11 ++
>>  include/sysemu/sysemu.h |  7 
>>  qdev-monitor.c  | 11 ++
>>  target-i386/cpu.c   | 91 
>> -
>>  target-i386/topology.h  | 51 ++-
>>  6 files changed, 151 insertions(+), 29 deletions(-)

-- 
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg

Re: [Qemu-devel] [v2][PATCH 4/8] xen, gfx passthrough: reserve 00:02.0 for INTEL IGD

2014-05-22 Thread Chen, Tiejun

> -Original Message-
> From: Gerd Hoffmann [mailto:kra...@redhat.com]
> Sent: Thursday, May 22, 2014 2:45 PM
> To: Chen, Tiejun
> Cc: Anthony PERARD; Daniel P. Berrange; peter.mayd...@linaro.org;
> xen-de...@lists.xensource.com; m...@redhat.com;
> stefano.stabell...@eu.citrix.com; Kay, Allen M; kelly.zyta...@amd.com;
> qemu-devel@nongnu.org; Zhang, Yang Z; anth...@codemonkey.ws
> Subject: Re: [Qemu-devel] [v2][PATCH 4/8] xen, gfx passthrough: reserve
> 00:02.0 for INTEL IGD
> 
>   Hi,
> 
> > > Another useful thing would be to not create the xen platform device
> > > in case "-nodefaults" was specified on the command line (that switch
> > > turns off a bunch of other devices present by default: vga, nic, cdrom, 
> > > ...).
> >
> > Currently looks 'xen-platform' itself can't be created, not those devices
> existed on that.
> 
> The error message looks more like libxl tries to hot-unplug the xen platform
> device.
> 
> Attached patch (untested!) hooks up the xen platform device to the default
> device code we have in qemu.  Two effects:
> 
>   (1) As mentioned above the device will not be created in case
>   -nodefaults  is specified on the command line.
>   (2) Autocreating the device is also turned off in case xen-platform
>   is added manually via -device.
> 
> With the patch applied you should be able to move the xen-platform device to
> some other place with a simple 'qemu -device xen-platform,addr=$slot'.
> 

Gerd,

Sorry, I may misunderstand what you mean previously then have a wrong test.

So this still doesn't work actually. 

After applied your patch, 'xen-platform' is always disabled by default, right? 
So 00:02.0 is left naturally to be assigned to IGD as we expect like this,

tchen0@tchen0-HVM-domU:~$ lspci
00:00.0 Host bridge: Intel Corporation 4th Gen Core Processor DRAM Controller 
(rev 06)
00:01.0 ISA bridge: Intel Corporation 82371SB PIIX3 ISA [Natoma/Triton II]
00:01.1 IDE interface: Intel Corporation 82371SB PIIX3 IDE [Natoma/Triton II]
00:01.3 Bridge: Intel Corporation 82371AB/EB/MB PIIX4 ACPI (rev 03)
00:02.0 VGA compatible controller: Intel Corporation Xeon E3-1200 v3/4th Gen 
Core Processor Integrated Graphics Controller (rev 06)
00:03.0 USB controller: Intel Corporation 8 Series/C220 Series Chipset Family 
USB EHCI #2 (rev 04)
00:1f.0 ISA bridge: Intel Corporation Q87 Express LPC Controller (rev 04)

Then this is fine but if you intend to add a 'qemu -device 
xen-platform,addr=$slot', this doesn't work well. In my case,

gfx_passthru=1
pci=["00:02.0@2", "00:1a.0"]
xen_platform_pci=0
device_model_args_hvm = ['-device', 'xen-platform,addr=0x3']

tchen0@tchen0-linux:~/workspace$ sudo xl cr domu-cfg
Parsing config from domu-cfg
libxl: error: libxl_qmp.c:287:qmp_handle_error_response: received an error 
message from QMP server: Unsupported bus. Bus doesn't have property 
'acpi-pcihp-bsel' set
libxl: error: libxl_create.c:1277:domcreate_attach_pci: libxl_device_pci_add 
failed: -3

Thanks
Tiejun

[Qemu-devel] [PATCH 1/2] vmstate: Add helper to enable GHashTable migration

2014-05-22 Thread Alexey Kardashevskiy

This adds a VMSTATE_HASH_V macro. This implements put/get callbacks for it.
This implements a qemu_hash_init() wrapper to save key/value sizes.

Signed-off-by: Alexey Kardashevskiy 
---
 include/migration/vmstate.h | 10 +
 include/qemu-common.h   | 13 +++
 vmstate.c   | 54 +
 3 files changed, 77 insertions(+)

diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 7e45048..6af599d 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -166,6 +166,7 @@ extern const VMStateInfo vmstate_info_timer;
 extern const VMStateInfo vmstate_info_buffer;
 extern const VMStateInfo vmstate_info_unused_buffer;
 extern const VMStateInfo vmstate_info_bitmap;
+static const VMStateInfo vmstate_info_hash;
 
 #define type_check_2darray(t1,t2,n,m) ((t1(*)[n][m])0 - (t2*)0)
 #define type_check_array(t1,t2,n) ((t1(*)[n])0 - (t2*)0)
@@ -740,6 +741,15 @@ extern const VMStateInfo vmstate_info_bitmap;
 #define VMSTATE_BUFFER_UNSAFE(_field, _state, _version, _size)\
 VMSTATE_BUFFER_UNSAFE_INFO(_field, _state, _version, vmstate_info_buffer, 
_size)
 
+#define VMSTATE_HASH_V(_field, _state, _version) {  \
+.name   = (stringify(_field)),  \
+.version_id = (_version),   \
+.size   = sizeof(qemu_hash),\
+.info   = &vmstate_info_hash,   \
+.flags  = VMS_SINGLE,   \
+.offset = vmstate_offset_value(_state, _field, qemu_hash),  \
+}
+
 #define VMSTATE_UNUSED_V(_v, _size)   \
 VMSTATE_UNUSED_BUFFER(NULL, _v, _size)
 
diff --git a/include/qemu-common.h b/include/qemu-common.h
index 3f3fd60..ee973e7 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -462,4 +462,17 @@ int parse_debug_env(const char *name, int max, int 
initial);
 
 const char *qemu_ether_ntoa(const MACAddr *mac);
 
+typedef struct qemu_hash {
+GHashTable *hash;
+int key_size;
+int value_size;
+} qemu_hash;
+
+static inline void qemu_hash_init(qemu_hash *h, int key_size, int value_size)
+{
+h->key_size = key_size;
+h->value_size = value_size;
+h->hash = g_hash_table_new_full(g_int_hash, g_int_equal, g_free, g_free);
+}
+
 #endif
diff --git a/vmstate.c b/vmstate.c
index b5882fa..2148e73 100644
--- a/vmstate.c
+++ b/vmstate.c
@@ -667,3 +667,57 @@ const VMStateInfo vmstate_info_bitmap = {
 .get = get_bitmap,
 .put = put_bitmap,
 };
+
+/* Save restore for qemu_hash which is a wrapper over GHashTable */
+static int get_hash(QEMUFile *f, void *pv, size_t size)
+{
+qemu_hash *h = pv;
+uint32_t num = g_hash_table_size(h->hash);
+gpointer key, value;
+
+num = qemu_get_be32(f);
+
+for ( ; num; --num) {
+int i;
+
+key = g_malloc0(h->key_size);
+for (i = 0; i < h->key_size; ++i) {
+((uint8_t *)key)[i] = qemu_get_byte(f);
+}
+value = g_malloc0(h->value_size);
+for (i = 0; i < h->value_size; ++i) {
+((uint8_t *)value)[i] = qemu_get_byte(f);
+}
+g_hash_table_insert(h->hash, key, value);
+}
+
+return 0;
+}
+
+static void put_hash(QEMUFile *f, void *pv, size_t size)
+{
+qemu_hash *h = pv;
+uint32_t num = g_hash_table_size(h->hash);
+GHashTableIter iter;
+gpointer key, value;
+
+qemu_put_be32(f, num);
+
+g_hash_table_iter_init(&iter, h->hash);
+while (g_hash_table_iter_next (&iter, &key, &value)) {
+int i;
+
+for (i = 0; i < h->key_size; ++i) {
+qemu_put_byte(f, ((uint8_t *)key)[i]);
+}
+for (i = 0; i < h->value_size; ++i) {
+qemu_put_byte(f, ((uint8_t *)value)[i]);
+}
+}
+}
+
+static const VMStateInfo vmstate_info_hash = {
+.name = "qemu-hash-desc",
+.get  = get_hash,
+.put  = put_hash,
+};
-- 
1.9.rc0

[Qemu-devel] [PATCH 2/2] spapr_pci: Use XICS interrupt allocator and do not cache interrupts in PHB

2014-05-22 Thread Alexey Kardashevskiy

Currently SPAPR PHB keeps track of all allocated MSI/MISX interrupt as
XICS used to be unable to reuse interrupts which becomes a problem for
dynamic MSI reconfiguration which is happening on guest driver reload or
PCI hot (un)plug. Another problem is that PHB has a limit of devices
supporting MSI/MSIX (SPAPR_MSIX_MAX_DEVS=32) and there is no good reason
for that.

This makes use of new XICS ability to reuse interrupts.

This removes cached MSI configuration from SPAPR PHB so the first IRQ number
of a device is stored in MSI/MSIX config space so there is no need to store
this anywhere else. From now on, SPAPR PHB only keeps flags telling what type
of interrupt for which device it has configured in order to return error if
(for example) MSIX was enabled and the guest is trying to disable MSI which
it has not enabled.

This removes a limit for the maximum number of MSIX-enabled devices per PHB,
now XICS and PCI bus capacity are the only limitation.

This changes migration stream as it fixes vmstate_spapr_pci_msi::name which was
wrong since the beginning.

This fixed traces to be more informative.

Signed-off-by: Alexey Kardashevskiy 
---
 hw/ppc/spapr_pci.c  | 144 +---
 include/hw/pci-host/spapr.h |  13 ++--
 trace-events|   5 +-
 3 files changed, 64 insertions(+), 98 deletions(-)

diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index e574014..6dc0c50 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -220,36 +220,12 @@ static void rtas_write_pci_config(PowerPCCPU *cpu, 
sPAPREnvironment *spapr,
 }
 
 /*
- * Find an entry with config_addr or returns the empty one if not found AND
- * alloc_new is set.
- * At the moment the msi_table entries are never released so there is
- * no point to look till the end of the list if we need to find the free entry.
- */
-static int spapr_msicfg_find(sPAPRPHBState *phb, uint32_t config_addr,
- bool alloc_new)
-{
-int i;
-
-for (i = 0; i < SPAPR_MSIX_MAX_DEVS; ++i) {
-if (!phb->msi_table[i].nvec) {
-break;
-}
-if (phb->msi_table[i].config_addr == config_addr) {
-return i;
-}
-}
-if ((i < SPAPR_MSIX_MAX_DEVS) && alloc_new) {
-trace_spapr_pci_msi("Allocating new MSI config", i, config_addr);
-return i;
-}
-
-return -1;
-}
-
-/*
  * Set MSI/MSIX message data.
  * This is required for msi_notify()/msix_notify() which
  * will write at the addresses via spapr_msi_write().
+ *
+ * If hwaddr == 0, all entries will have .data == first_irq i.e.
+ * table will be reset.
  */
 static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, bool msix,
  unsigned first_irq, unsigned req_num)
@@ -263,9 +239,12 @@ static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, 
bool msix,
 return;
 }
 
-for (i = 0; i < req_num; ++i, ++msg.data) {
+for (i = 0; i < req_num; ++i) {
 msix_set_message(pdev, i, msg);
 trace_spapr_pci_msi_setup(pdev->name, i, msg.address);
+if (addr) {
+++msg.data;
+}
 }
 }
 
@@ -280,9 +259,12 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, 
sPAPREnvironment *spapr,
 unsigned int req_num = rtas_ld(args, 4); /* 0 == remove all */
 unsigned int seq_num = rtas_ld(args, 5);
 unsigned int ret_intr_type;
-int ndev, irq, max_irqs = 0;
+unsigned int irq, max_irqs = 0, num = 0;
 sPAPRPHBState *phb = NULL;
 PCIDevice *pdev = NULL;
+bool msix = false;
+spapr_pci_msi *msi;
+int *config_addr_key;
 
 switch (func) {
 case RTAS_CHANGE_MSI_FN:
@@ -310,13 +292,18 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, 
sPAPREnvironment *spapr,
 
 /* Releasing MSIs */
 if (!req_num) {
-ndev = spapr_msicfg_find(phb, config_addr, false);
-if (ndev < 0) {
-trace_spapr_pci_msi("MSI has not been enabled", -1, config_addr);
+msi = (spapr_pci_msi *) g_hash_table_lookup(phb->msi.hash, 
&config_addr);
+if (!msi) {
+trace_spapr_pci_msi("Releasing wrong config", config_addr);
 rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
 return;
 }
-trace_spapr_pci_msi("Released MSIs", ndev, config_addr);
+
+xics_free(spapr->icp, msi->first_irq, msi->num);
+spapr_msi_setmsg(pdev, 0, msix, 0, num);
+g_hash_table_remove(phb->msi.hash, &config_addr);
+
+trace_spapr_pci_msi("Released MSIs", config_addr);
 rtas_st(rets, 0, RTAS_OUT_SUCCESS);
 rtas_st(rets, 1, 0);
 return;
@@ -324,15 +311,6 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, 
sPAPREnvironment *spapr,
 
 /* Enabling MSI */
 
-/* Find a device number in the map to add or reuse the existing one */
-ndev = spapr_msicfg_find(phb, config_addr, true);
-if (ndev >= SPAPR_MSIX_MAX_DEVS || ndev < 0) {
-error_report("No free entry for a new MSI device");

Re: [Qemu-devel] [PATCH v2 8/8] spapr_pci: Use XICS interrupt allocator and do not cache interrupts in PHB

2014-05-22 Thread Alexey Kardashevskiy


On 05/22/2014 05:16 PM, Alexander Graf wrote:> 
> 
>> Am 22.05.2014 um 08:53 schrieb Alexey Kardashevskiy :
>>
>>> On 05/21/2014 10:42 PM, Alexey Kardashevskiy wrote:
 On 05/21/2014 08:35 PM, Alexander Graf wrote:

> On 21.05.14 12:13, Alexey Kardashevskiy wrote:
>> On 05/21/2014 07:50 PM, Alexander Graf wrote:
>>> On 21.05.14 11:33, Alexey Kardashevskiy wrote:
 On 05/21/2014 07:13 PM, Alexander Graf wrote:
> On 21.05.14 11:11, Michael S. Tsirkin wrote:
>> On Wed, May 21, 2014 at 11:06:09AM +0200, Alexander Graf wrote:
>>> On 21.05.14 10:52, Alexey Kardashevskiy wrote:
 On 05/21/2014 06:40 PM, Alexander Graf wrote:
> On 15.05.14 11:59, Alexey Kardashevskiy wrote:
> Currently SPAPR PHB keeps track of all allocated MSI/MISX
> interrupt as
> XICS used to be unable to reuse interrupts which becomes a
> problem for
> dynamic MSI reconfiguration which is happening on guest driver
> reload or
> PCI hot (un)plug. Another problem is that PHB has a limit of 
> devices
> supporting MSI/MSIX (SPAPR_MSIX_MAX_DEVS=32) and there is no good
> reason
> for that.
>
> This makes use of new XICS ability to reuse interrupts.
>
> This removes cached MSI configuration from SPAPR PHB so the first
> IRQ
> number
> of a device is stored in MSI/MSIX config space so there is no
> need to
> store
> this anywhere else. From now on, SPAPR PHB only keeps flags 
> telling
> what
> type
> of interrupt for which device it has configured in order to return
> error if
> (for example) MSIX was enabled and the guest is trying to disable
> MSI
> which
> it has not enabled.
>
> This removes a limit for the maximum number of MSIX-enabled 
> devices
> per PHB,
> now XICS and PCI bus capacity are the only limitation.
>
> This changes migration stream as it fixes
> vmstate_spapr_pci_msi::name
> which was
> wrong since the beginning.
>
> This fixed traces to be more informative.
>
> Signed-off-by: Alexey Kardashevskiy 
> ---
>
> In reality either MSIX or MSI is enabled, never both. So I could
> remove
> msi/msix
> bitmaps from this patch, would it make sense?
 Is this a hard requirement? Does a device have to choose between
 MSIX and
 MSI or could it theoretically have both enabled? Is this a PCI
 limitation,
 a PAPR/XICS limitation or just a limitation of your implementation?
>>> My implementation does not have this limitation, I asked if I can
>>> simplify
>>> code by introducing one :)
>>>
>>> I cannot see any reason why PCI cannot have both MSI and MSIX 
>>> enabled
>>> but
>>> it does not seem to be used by anyone => cannot debug and confirm.
>>>
>>> PAPR spec assumes that if the guest tries enabling MSIX when MSI is
>>> already
>>> enabled, this is a "change", not enabling both types. But it also
>>> says MSI
>>> and MSIX vector numbers are not shared. Hm.
>> Yeah, I'm not aware of any limitation on hardware here and I'd
>> rather not impose one.
>>
>> Michael, do you know of any hardware that uses MSI *and* MSI-X at
>> the same time?
>>
>>
>> Alex
> No, and the PCI spec says:
>  A function is permitted to implement both MSI and MSI-X, but
> system
>  software is
>  prohibited from enabling both at the same time. If system 
> software
>  enables both at the same time, the result is undefined.
 Ah, cool. So yes Alexey, feel free to impose it :).
>>> Heh. This solves just half of the problem - I still have to keep track 
>>> of
>>> what device got MSI/MSIX configured via that ibm,change-msi interface. I
>>> was hoping I can store such flag somewhere in a device PCI config space
>>> but
>>> MSI/MSIX enable bit is not good as it is not set when those calls are
>>> made.
>>> And I cannot rely on address/data fields much as the guest can change 
>>> them
>>> (I already use them to store IRQ numbers and btw it is missing checks 
>>> when
>>> I read them back for disposal, I'll fix in next round).
>>>
>>> Or on "enable" event I could put IRQ numbers to .data of MSI config 
>>> space
>>> and on "disable" check if it i

Re: [Qemu-devel] [PATCH 8/9] spapr_iommu: Introduce page_shift in sPAPRTCETable

2014-05-22 Thread Alexey Kardashevskiy

On 05/22/2014 08:48 PM, Alexander Graf wrote:
> 
> On 22.05.14 12:46, Alexey Kardashevskiy wrote:
>> On 05/22/2014 08:45 PM, Alexander Graf wrote:
>>> On 22.05.14 12:24, Alexey Kardashevskiy wrote:
 On 05/22/2014 08:09 PM, Alexander Graf wrote:
> On 22.05.14 01:45, Alexey Kardashevskiy wrote:
>> On 05/22/2014 08:11 AM, Alexander Graf wrote:
>>> On 21.05.14 16:21, Alexey Kardashevskiy wrote:
 At the moment only 4K pages are supported by sPAPRTCETable. Since
 sPAPR
 spec allows other page sizes and we are going to implement them, we
 need
 page size to be configrable.

 This adds @page_shift into sPAPRTCETable and replaces
 SPAPR_TCE_PAGE_SHIFT
 with it whereever it is possible.

 This removes SPAPR_TCE_PAGE_MASK as it is no longer used.

 Signed-off-by: Alexey Kardashevskiy 
>>> [...]
>>>
 diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
 index fdd4c07..c9850d4 100644
 --- a/hw/ppc/spapr_pci.c
 +++ b/hw/ppc/spapr_pci.c
 @@ -656,6 +656,7 @@ static void spapr_phb_finish_realize(sPAPRPHBState
 *sphb, Error **errp)
  sPAPRTCETable *tcet;
tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn,
 +   SPAPR_TCE_PAGE_SHIFT,
 0x4000 >>
 SPAPR_TCE_PAGE_SHIFT);
  if (!tcet) {
  error_setg(errp, "Unable to create TCE table for %s",
 diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
 index b84e481..d7e9e6a 100644
 --- a/hw/ppc/spapr_vio.c
 +++ b/hw/ppc/spapr_vio.c
 @@ -457,6 +457,7 @@ static int spapr_vio_busdev_init(DeviceState
 *qdev)
  if (pc->rtce_window_size) {
  uint32_t liobn = SPAPR_VIO_BASE_LIOBN | dev->reg;
  dev->tcet = spapr_tce_new_table(qdev, liobn,
 +SPAPR_TCE_PAGE_SHIFT,
>>> I don't quite understand who defines what the TCE page size is for a
>>> given
>>> device. Can you try to explain this to me?
>> If it is default window (for PCI) or window for VIO - it is 4K. If it
>> is a
>> dynamic DMA window - page size is a parameter of RTAS call which creates
>> the window.
> Could we change that default size for non-dynamic windows somehow? 4k is
> really fine grained.
 No, this is hardcoded in a million places and old distros. Normally it is
 maximum 1GB or 2MB table, not too bad. And with DDW support, we can make
 the default one really small (64MB?) and lose even less.

 SPAPR:
 R1–7.3.31–4. For the Dynamic DMA Windows (DDW) option: The platform must
 provide a default DMA window
 for each PE, and all of the following must be true:
 a. The window is defined by the “ibm,dma-window” property in the OF device
 tree.
 b. The window is defined with 4 KB I/O pages.
 c. The window is located entirely below 4 GB.
>>> Cool, would be interesting to see how that affects performance. I like most
>>> of this patch set btw, I think it should be good to go in in the next
>>> round.
>>
>> 10Gbit ethernet is ok but 40Gbit seems to suffer from this limitation.
> 
> Oh, I was implying that it'd be interesting to see how we fare when we do a
> really small default DMA window to force the guest to use DDW and
> preferably even with very large pages.


Guest switches to DDW always if the feature is there. SPAPR DDW API even
allows deleting the default one if the platform supports "reset" (which I
do not implement for now).



-- 
Alexey

Re: [Qemu-devel] [PATCH 1/2] vmstate: Add helper to enable GHashTable migration

2014-05-22 Thread Alexander Graf



On 22.05.14 12:53, Alexey Kardashevskiy wrote:

This adds a VMSTATE_HASH_V macro. This implements put/get callbacks for it.
This implements a qemu_hash_init() wrapper to save key/value sizes.

Signed-off-by: Alexey Kardashevskiy 
---
  include/migration/vmstate.h | 10 +
  include/qemu-common.h   | 13 +++
  vmstate.c   | 54 +
  3 files changed, 77 insertions(+)

diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 7e45048..6af599d 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -166,6 +166,7 @@ extern const VMStateInfo vmstate_info_timer;
  extern const VMStateInfo vmstate_info_buffer;
  extern const VMStateInfo vmstate_info_unused_buffer;
  extern const VMStateInfo vmstate_info_bitmap;
+static const VMStateInfo vmstate_info_hash;
  
  #define type_check_2darray(t1,t2,n,m) ((t1(*)[n][m])0 - (t2*)0)

  #define type_check_array(t1,t2,n) ((t1(*)[n])0 - (t2*)0)
@@ -740,6 +741,15 @@ extern const VMStateInfo vmstate_info_bitmap;
  #define VMSTATE_BUFFER_UNSAFE(_field, _state, _version, _size)\
  VMSTATE_BUFFER_UNSAFE_INFO(_field, _state, _version, vmstate_info_buffer, 
_size)
  
+#define VMSTATE_HASH_V(_field, _state, _version) {  \

+.name   = (stringify(_field)),  \
+.version_id = (_version),   \
+.size   = sizeof(qemu_hash),\
+.info   = &vmstate_info_hash,   \
+.flags  = VMS_SINGLE,   \
+.offset = vmstate_offset_value(_state, _field, qemu_hash),  \
+}
+
  #define VMSTATE_UNUSED_V(_v, _size)   \
  VMSTATE_UNUSED_BUFFER(NULL, _v, _size)
  
diff --git a/include/qemu-common.h b/include/qemu-common.h

index 3f3fd60..ee973e7 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -462,4 +462,17 @@ int parse_debug_env(const char *name, int max, int 
initial);
  
  const char *qemu_ether_ntoa(const MACAddr *mac);
  
+typedef struct qemu_hash {

+GHashTable *hash;
+int key_size;
+int value_size;
+} qemu_hash;
+
+static inline void qemu_hash_init(qemu_hash *h, int key_size, int value_size)
+{
+h->key_size = key_size;
+h->value_size = value_size;
+h->hash = g_hash_table_new_full(g_int_hash, g_int_equal, g_free, g_free);
+}
+
  #endif
diff --git a/vmstate.c b/vmstate.c
index b5882fa..2148e73 100644
--- a/vmstate.c
+++ b/vmstate.c
@@ -667,3 +667,57 @@ const VMStateInfo vmstate_info_bitmap = {
  .get = get_bitmap,
  .put = put_bitmap,
  };
+
+/* Save restore for qemu_hash which is a wrapper over GHashTable */
+static int get_hash(QEMUFile *f, void *pv, size_t size)
+{
+qemu_hash *h = pv;
+uint32_t num = g_hash_table_size(h->hash);
+gpointer key, value;
+
+num = qemu_get_be32(f);
+
+for ( ; num; --num) {
+int i;
+
+key = g_malloc0(h->key_size);
+for (i = 0; i < h->key_size; ++i) {
+((uint8_t *)key)[i] = qemu_get_byte(f);
+}
+value = g_malloc0(h->value_size);
+for (i = 0; i < h->value_size; ++i) {
+((uint8_t *)value)[i] = qemu_get_byte(f);
+}
+g_hash_table_insert(h->hash, key, value);
+}
+
+return 0;
+}
+
+static void put_hash(QEMUFile *f, void *pv, size_t size)
+{
+qemu_hash *h = pv;
+uint32_t num = g_hash_table_size(h->hash);
+GHashTableIter iter;
+gpointer key, value;
+
+qemu_put_be32(f, num);
+
+g_hash_table_iter_init(&iter, h->hash);
+while (g_hash_table_iter_next (&iter, &key, &value)) {
+int i;
+
+for (i = 0; i < h->key_size; ++i) {
+qemu_put_byte(f, ((uint8_t *)key)[i]);
+}
+for (i = 0; i < h->value_size; ++i) {
+qemu_put_byte(f, ((uint8_t *)value)[i]);


I think the only way to do this safely would be to do a recursive 
introspective walk through the hash table's key and value properties. 
Every key and every property can be an object again, right?


We would basically impose a limit onto ourselves that every member of 
the hash table would have to be a GObject again.



Alex

Re: [Qemu-devel] [PATCH v2 8/8] spapr_pci: Use XICS interrupt allocator and do not cache interrupts in PHB

2014-05-22 Thread Alexander Graf



On 22.05.14 12:53, Alexey Kardashevskiy wrote:

On 05/22/2014 05:16 PM, Alexander Graf wrote:>

Am 22.05.2014 um 08:53 schrieb Alexey Kardashevskiy :


On 05/21/2014 10:42 PM, Alexey Kardashevskiy wrote:

On 05/21/2014 08:35 PM, Alexander Graf wrote:


On 21.05.14 12:13, Alexey Kardashevskiy wrote:

On 05/21/2014 07:50 PM, Alexander Graf wrote:

On 21.05.14 11:33, Alexey Kardashevskiy wrote:

On 05/21/2014 07:13 PM, Alexander Graf wrote:

On 21.05.14 11:11, Michael S. Tsirkin wrote:

On Wed, May 21, 2014 at 11:06:09AM +0200, Alexander Graf wrote:

On 21.05.14 10:52, Alexey Kardashevskiy wrote:

On 05/21/2014 06:40 PM, Alexander Graf wrote:

On 15.05.14 11:59, Alexey Kardashevskiy wrote:
Currently SPAPR PHB keeps track of all allocated MSI/MISX
interrupt as
XICS used to be unable to reuse interrupts which becomes a
problem for
dynamic MSI reconfiguration which is happening on guest driver
reload or
PCI hot (un)plug. Another problem is that PHB has a limit of devices
supporting MSI/MSIX (SPAPR_MSIX_MAX_DEVS=32) and there is no good
reason
for that.

This makes use of new XICS ability to reuse interrupts.

This removes cached MSI configuration from SPAPR PHB so the first
IRQ
number
of a device is stored in MSI/MSIX config space so there is no
need to
store
this anywhere else. From now on, SPAPR PHB only keeps flags telling
what
type
of interrupt for which device it has configured in order to return
error if
(for example) MSIX was enabled and the guest is trying to disable
MSI
which
it has not enabled.

This removes a limit for the maximum number of MSIX-enabled devices
per PHB,
now XICS and PCI bus capacity are the only limitation.

This changes migration stream as it fixes
vmstate_spapr_pci_msi::name
which was
wrong since the beginning.

This fixed traces to be more informative.

Signed-off-by: Alexey Kardashevskiy 
---

In reality either MSIX or MSI is enabled, never both. So I could
remove
msi/msix
bitmaps from this patch, would it make sense?

Is this a hard requirement? Does a device have to choose between
MSIX and
MSI or could it theoretically have both enabled? Is this a PCI
limitation,
a PAPR/XICS limitation or just a limitation of your implementation?

My implementation does not have this limitation, I asked if I can
simplify
code by introducing one :)

I cannot see any reason why PCI cannot have both MSI and MSIX enabled
but
it does not seem to be used by anyone => cannot debug and confirm.

PAPR spec assumes that if the guest tries enabling MSIX when MSI is
already
enabled, this is a "change", not enabling both types. But it also
says MSI
and MSIX vector numbers are not shared. Hm.

Yeah, I'm not aware of any limitation on hardware here and I'd
rather not impose one.

Michael, do you know of any hardware that uses MSI *and* MSI-X at
the same time?


Alex

No, and the PCI spec says:
  A function is permitted to implement both MSI and MSI-X, but
system
  software is
  prohibited from enabling both at the same time. If system software
  enables both at the same time, the result is undefined.

Ah, cool. So yes Alexey, feel free to impose it :).

Heh. This solves just half of the problem - I still have to keep track of
what device got MSI/MSIX configured via that ibm,change-msi interface. I
was hoping I can store such flag somewhere in a device PCI config space
but
MSI/MSIX enable bit is not good as it is not set when those calls are
made.
And I cannot rely on address/data fields much as the guest can change them
(I already use them to store IRQ numbers and btw it is missing checks when
I read them back for disposal, I'll fix in next round).

Or on "enable" event I could put IRQ numbers to .data of MSI config space
and on "disable" check if it is not zero, then configuration took place,
then I can remove my msi[]/msix[] flag arrays. If the guest did any change
to MSI/MSIX config space (it does not on SPAPR except weird selftest
cases), I compare .data with what ICS can possibly have and either reject
"disable" or handle it and if it breaks XICS - that's too bad for the
stupid guest. Would that be acceptable?

Can't you prohibit the guest from writing to the MSI configuration
registers itself? Then you don't need to do sanity checks.

I could for emulated devices but VFIO uses the same code. For example,
there is an IBM SCSI IPR card which does a "self test". For that, it saves
MSIX BAR content, does reboot via some backdoor interface and restores MSIX
BAR. It has been solved for VFIO in the host kernel by restoring MSIX data
from cached values when guest is trying to restore it with what it thinks
is actual MSIX data (it is virtualized because of x86). But there is cache

We already have a cache because we don't access the real PCI registers with
msi_set_message(), no?


For emulated devices there is no cache. And in any case the guest is
allowed to write to it... Who knows what AIX does? I do not.


Tried GHashTable for keeping bus:dev.fn <-> (irq, num), more or less ok but
how to mig

Re: [Qemu-devel] [PATCH] xen: make xen-platform a default device

2014-05-22 Thread Chen, Tiejun

> -Original Message-
> From: qemu-devel-bounces+tiejun.chen=intel@nongnu.org
> [mailto:qemu-devel-bounces+tiejun.chen=intel@nongnu.org] On Behalf Of
> Michael S. Tsirkin
> Sent: Thursday, May 22, 2014 3:22 PM
> To: Gerd Hoffmann
> Cc: qemu-devel@nongnu.org; Anthony Liguori
> Subject: Re: [Qemu-devel] [PATCH] xen: make xen-platform a default device
> 
> On Thu, May 22, 2014 at 09:20:50AM +0200, Gerd Hoffmann wrote:
> > Patch hooks up the xen platform device to the default device code we
> > have in qemu.  Two effects:
> >
> >   (1) The device will not be created in case -nodefaults is specified
> >   on the command line.
> >   (2) Autocreating the device is also turned off in case xen-platform
> >   is added manually via -device.
> >
> > With the patch applied you can move the xen-platform device to some
> > other place with a simple 'qemu -device xen-platform,addr=$slot'.
> >
> > Tested-by: Tiejun Chen 

Sorry, I'm not sure if this works well as Gerd expect.

I already reply something online, please take a look at that before apply. 

Thanks
Tiejun

> > Signed-off-by: Gerd Hoffmann 
> 
> Applied, thanks!
> 
> > ---
> >  hw/i386/pc_piix.c| 2 +-
> >  include/hw/xen/xen.h | 1 +
> >  vl.c | 3 +++
> >  3 files changed, 5 insertions(+), 1 deletion(-)
> >
> > diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index
> > eaf3e61..f987d03 100644
> > --- a/hw/i386/pc_piix.c
> > +++ b/hw/i386/pc_piix.c
> > @@ -385,7 +385,7 @@ static void pc_xen_hvm_init(QEMUMachineInitArgs
> *args)
> >  pc_init_pci(args);
> >
> >  bus = pci_find_primary_bus();
> > -if (bus != NULL) {
> > +if (bus != NULL && default_xenplatform) {
> >  pci_create_simple(bus, -1, "xen-platform");
> >  }
> >  }
> > diff --git a/include/hw/xen/xen.h b/include/hw/xen/xen.h index
> > 85fda3d..b350413 100644
> > --- a/include/hw/xen/xen.h
> > +++ b/include/hw/xen/xen.h
> > @@ -20,6 +20,7 @@ enum xen_mode {
> >
> >  extern uint32_t xen_domid;
> >  extern enum xen_mode xen_mode;
> > +extern int default_xenplatform;
> >
> >  extern bool xen_allowed;
> >
> > diff --git a/vl.c b/vl.c
> > index 709d8cd..673148e 100644
> > --- a/vl.c
> > +++ b/vl.c
> > @@ -226,6 +226,7 @@ static int default_floppy = 1;  static int
> > default_cdrom = 1;  static int default_sdcard = 1;  static int
> > default_vga = 1;
> > +int default_xenplatform = 1;
> >
> >  static struct {
> >  const char *driver;
> > @@ -247,6 +248,7 @@ static struct {
> >  { .driver = "isa-cirrus-vga",   .flag = &default_vga   },
> >  { .driver = "vmware-svga",  .flag = &default_vga   },
> >  { .driver = "qxl-vga",  .flag = &default_vga   },
> > +{ .driver = "xen-platform", .flag = &default_xenplatform },
> >  };
> >
> >  static QemuOptsList qemu_rtc_opts = { @@ -4101,6 +4103,7 @@ int
> > main(int argc, char **argv, char **envp)
> >  default_monitor = 0;
> >  default_net = 0;
> >  default_vga = 0;
> > +default_xenplatform = 0;
> >  }
> >
> >  if (is_daemonized()) {
> > --
> > 1.8.3.1

[Qemu-devel] [PATCH 0/3] console: text terminal updates

2014-05-22 Thread Gerd Hoffmann

  Hi,

These tree patches make the text terminal consoles support multiple
windows, i.e. work correctly in case a displaychangelistener binds
a text terminal explicitly to a fixed QemuConsole by setting dcl->con.

Use case: 
https://www.kraxel.org/cgit/qemu/commit/?h=rebase/ui-gtk-next&id=dc0bdb463d0b9cb0f3f2bd898ced9b9322f7d3ce

please review,
  Gerd

Gerd Hoffmann (3):
  console: update text terminal surface unconditionally
  console: rework text terminal cursor logic
  console: add kbd_put_keysym_console

 include/ui/console.h |   1 +
 ui/console.c | 184 ++-
 2 files changed, 94 insertions(+), 91 deletions(-)

-- 
1.8.3.1

[Qemu-devel] [PATCH 1/3] console: update text terminal surface unconditionally

2014-05-22 Thread Gerd Hoffmann

These days each QemuConsole has its own private DisplaySurface,
so we can simply render updates all the time.

Signed-off-by: Gerd Hoffmann 
---
 ui/console.c | 127 ++-
 1 file changed, 56 insertions(+), 71 deletions(-)

diff --git a/ui/console.c b/ui/console.c
index c6087d8..85f46ae 100644
--- a/ui/console.c
+++ b/ui/console.c
@@ -475,6 +475,9 @@ static inline void text_update_xy(QemuConsole *s, int x, 
int y)
 
 static void invalidate_xy(QemuConsole *s, int x, int y)
 {
+if (!qemu_console_is_visible(s)) {
+return;
+}
 if (s->update_x0 > x * FONT_WIDTH)
 s->update_x0 = x * FONT_WIDTH;
 if (s->update_y0 > y * FONT_HEIGHT)
@@ -490,25 +493,20 @@ static void update_xy(QemuConsole *s, int x, int y)
 TextCell *c;
 int y1, y2;
 
-if (!qemu_console_is_visible(s)) {
-return;
-}
-
 if (s->ds->have_text) {
 text_update_xy(s, x, y);
 }
 
-if (s->ds->have_gfx) {
-y1 = (s->y_base + y) % s->total_height;
-y2 = y1 - s->y_displayed;
-if (y2 < 0)
-y2 += s->total_height;
-if (y2 < s->height) {
-c = &s->cells[y1 * s->width + x];
-vga_putcharxy(s, x, y2, c->ch,
-  &(c->t_attrib));
-invalidate_xy(s, x, y2);
-}
+y1 = (s->y_base + y) % s->total_height;
+y2 = y1 - s->y_displayed;
+if (y2 < 0) {
+y2 += s->total_height;
+}
+if (y2 < s->height) {
+c = &s->cells[y1 * s->width + x];
+vga_putcharxy(s, x, y2, c->ch,
+  &(c->t_attrib));
+invalidate_xy(s, x, y2);
 }
 }
 
@@ -518,33 +516,28 @@ static void console_show_cursor(QemuConsole *s, int show)
 int y, y1;
 int x = s->x;
 
-if (!qemu_console_is_visible(s)) {
-return;
-}
-
 if (s->ds->have_text) {
 s->cursor_invalidate = 1;
 }
 
-if (s->ds->have_gfx) {
-if (x >= s->width) {
-x = s->width - 1;
-}
-y1 = (s->y_base + s->y) % s->total_height;
-y = y1 - s->y_displayed;
-if (y < 0)
-y += s->total_height;
-if (y < s->height) {
-c = &s->cells[y1 * s->width + x];
-if (show && s->cursor_visible_phase) {
-TextAttributes t_attrib = s->t_attrib_default;
-t_attrib.invers = !(t_attrib.invers); /* invert fg and bg */
-vga_putcharxy(s, x, y, c->ch, &t_attrib);
-} else {
-vga_putcharxy(s, x, y, c->ch, &(c->t_attrib));
-}
-invalidate_xy(s, x, y);
+if (x >= s->width) {
+x = s->width - 1;
+}
+y1 = (s->y_base + s->y) % s->total_height;
+y = y1 - s->y_displayed;
+if (y < 0) {
+y += s->total_height;
+}
+if (y < s->height) {
+c = &s->cells[y1 * s->width + x];
+if (show && s->cursor_visible_phase) {
+TextAttributes t_attrib = s->t_attrib_default;
+t_attrib.invers = !(t_attrib.invers); /* invert fg and bg */
+vga_putcharxy(s, x, y, c->ch, &t_attrib);
+} else {
+vga_putcharxy(s, x, y, c->ch, &(c->t_attrib));
 }
+invalidate_xy(s, x, y);
 }
 }
 
@@ -554,10 +547,6 @@ static void console_refresh(QemuConsole *s)
 TextCell *c;
 int x, y, y1;
 
-if (!qemu_console_is_visible(s)) {
-return;
-}
-
 if (s->ds->have_text) {
 s->text_x[0] = 0;
 s->text_y[0] = 0;
@@ -566,25 +555,23 @@ static void console_refresh(QemuConsole *s)
 s->cursor_invalidate = 1;
 }
 
-if (s->ds->have_gfx) {
-vga_fill_rect(s, 0, 0, surface_width(surface), surface_height(surface),
-  color_table_rgb[0][COLOR_BLACK]);
-y1 = s->y_displayed;
-for (y = 0; y < s->height; y++) {
-c = s->cells + y1 * s->width;
-for (x = 0; x < s->width; x++) {
-vga_putcharxy(s, x, y, c->ch,
-  &(c->t_attrib));
-c++;
-}
-if (++y1 == s->total_height) {
-y1 = 0;
-}
+vga_fill_rect(s, 0, 0, surface_width(surface), surface_height(surface),
+  color_table_rgb[0][COLOR_BLACK]);
+y1 = s->y_displayed;
+for (y = 0; y < s->height; y++) {
+c = s->cells + y1 * s->width;
+for (x = 0; x < s->width; x++) {
+vga_putcharxy(s, x, y, c->ch,
+  &(c->t_attrib));
+c++;
+}
+if (++y1 == s->total_height) {
+y1 = 0;
 }
-console_show_cursor(s, 1);
-dpy_gfx_update(s, 0, 0,
-   surface_width(surface), surface_height(surface));
 }
+console_show_cursor(s, 1);
+dpy_gfx_update(s, 0, 0,
+   surface_width(surface), surface_height(surface));
 }
 
 static void console_scroll(QemuConsole *s, int ydelta)
@@ -64

Re: [Qemu-devel] [PATCH v2] spapr: Enable dynamic change of the supported hypercalls list

2014-05-22 Thread Alexey Kardashevskiy

On 05/22/2014 08:47 PM, Alexander Graf wrote:
> 
> On 21.05.14 17:21, Alexey Kardashevskiy wrote:
>> At the moment the "ibm,hypertas-functions" list is fixed. However some
>> calls should be listed there if they are supported by QEMU or the host
>> kernel.
>>
>> This enables hyperrtas_prop to grow on stack by adding
>> a SPAPR_HYPERRTAS_ADD macro. "qemu,hypertas-functions" is converted as well.
>>
>> The first user of this is going to be a "multi-tce" property.
>>
>> Signed-off-by: Alexey Kardashevskiy 
>> ---
>> Changes:
>> v2:
>> * replaced alloca() with GString
>> ---
>>   hw/ppc/spapr.c | 30 +++---
>>   1 file changed, 23 insertions(+), 7 deletions(-)
>>
>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>> index 0a61246..3b28211 100644
>> --- a/hw/ppc/spapr.c
>> +++ b/hw/ppc/spapr.c
>> @@ -293,6 +293,10 @@ static size_t create_page_sizes_prop(CPUPPCState
>> *env, uint32_t *prop,
>>   }  \
>>   } while (0)
>>   +static inline void add_str(GString *s, const gchar *s1)
> 
> Please remove the "inline" :). Otherwise this looks a lot nicer than before :)

Good :) What now? Have you finished with this set and I can repost it?


-- 
Alexey

Re: [Qemu-devel] [PATCH] xen: make xen-platform a default device

2014-05-22 Thread Michael S. Tsirkin

On Thu, May 22, 2014 at 10:57:54AM +, Chen, Tiejun wrote:
> > -Original Message-
> > From: qemu-devel-bounces+tiejun.chen=intel@nongnu.org
> > [mailto:qemu-devel-bounces+tiejun.chen=intel@nongnu.org] On Behalf Of
> > Michael S. Tsirkin
> > Sent: Thursday, May 22, 2014 3:22 PM
> > To: Gerd Hoffmann
> > Cc: qemu-devel@nongnu.org; Anthony Liguori
> > Subject: Re: [Qemu-devel] [PATCH] xen: make xen-platform a default device
> > 
> > On Thu, May 22, 2014 at 09:20:50AM +0200, Gerd Hoffmann wrote:
> > > Patch hooks up the xen platform device to the default device code we
> > > have in qemu.  Two effects:
> > >
> > >   (1) The device will not be created in case -nodefaults is specified
> > >   on the command line.
> > >   (2) Autocreating the device is also turned off in case xen-platform
> > >   is added manually via -device.
> > >
> > > With the patch applied you can move the xen-platform device to some
> > > other place with a simple 'qemu -device xen-platform,addr=$slot'.
> > >
> > > Tested-by: Tiejun Chen 
> 
> Sorry, I'm not sure if this works well as Gerd expect.
> 
> I already reply something online, please take a look at that before apply. 
> 
> Thanks
> Tiejun

No worries, I'll wait.

> > > Signed-off-by: Gerd Hoffmann 
> > 
> > Applied, thanks!
> > 
> > > ---
> > >  hw/i386/pc_piix.c| 2 +-
> > >  include/hw/xen/xen.h | 1 +
> > >  vl.c | 3 +++
> > >  3 files changed, 5 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index
> > > eaf3e61..f987d03 100644
> > > --- a/hw/i386/pc_piix.c
> > > +++ b/hw/i386/pc_piix.c
> > > @@ -385,7 +385,7 @@ static void pc_xen_hvm_init(QEMUMachineInitArgs
> > *args)
> > >  pc_init_pci(args);
> > >
> > >  bus = pci_find_primary_bus();
> > > -if (bus != NULL) {
> > > +if (bus != NULL && default_xenplatform) {
> > >  pci_create_simple(bus, -1, "xen-platform");
> > >  }
> > >  }
> > > diff --git a/include/hw/xen/xen.h b/include/hw/xen/xen.h index
> > > 85fda3d..b350413 100644
> > > --- a/include/hw/xen/xen.h
> > > +++ b/include/hw/xen/xen.h
> > > @@ -20,6 +20,7 @@ enum xen_mode {
> > >
> > >  extern uint32_t xen_domid;
> > >  extern enum xen_mode xen_mode;
> > > +extern int default_xenplatform;
> > >
> > >  extern bool xen_allowed;
> > >
> > > diff --git a/vl.c b/vl.c
> > > index 709d8cd..673148e 100644
> > > --- a/vl.c
> > > +++ b/vl.c
> > > @@ -226,6 +226,7 @@ static int default_floppy = 1;  static int
> > > default_cdrom = 1;  static int default_sdcard = 1;  static int
> > > default_vga = 1;
> > > +int default_xenplatform = 1;
> > >
> > >  static struct {
> > >  const char *driver;
> > > @@ -247,6 +248,7 @@ static struct {
> > >  { .driver = "isa-cirrus-vga",   .flag = &default_vga   },
> > >  { .driver = "vmware-svga",  .flag = &default_vga   },
> > >  { .driver = "qxl-vga",  .flag = &default_vga   },
> > > +{ .driver = "xen-platform", .flag = &default_xenplatform },
> > >  };
> > >
> > >  static QemuOptsList qemu_rtc_opts = { @@ -4101,6 +4103,7 @@ int
> > > main(int argc, char **argv, char **envp)
> > >  default_monitor = 0;
> > >  default_net = 0;
> > >  default_vga = 0;
> > > +default_xenplatform = 0;
> > >  }
> > >
> > >  if (is_daemonized()) {
> > > --
> > > 1.8.3.1

[Qemu-devel] [PATCH 3/3] console: add kbd_put_keysym_console

2014-05-22 Thread Gerd Hoffmann

So you can send keysyms to a specific (text terminal) console.

Signed-off-by: Gerd Hoffmann 
---
 include/ui/console.h | 1 +
 ui/console.c | 9 ++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/include/ui/console.h b/include/ui/console.h
index 8a86617..b513e20 100644
--- a/include/ui/console.h
+++ b/include/ui/console.h
@@ -81,6 +81,7 @@ void do_mouse_set(Monitor *mon, const QDict *qdict);
 #define QEMU_KEY_CTRL_PAGEUP 0xe406
 #define QEMU_KEY_CTRL_PAGEDOWN   0xe407
 
+void kbd_put_keysym_console(QemuConsole *s, int keysym);
 void kbd_put_keysym(int keysym);
 
 /* consoles */
diff --git a/ui/console.c b/ui/console.c
index f6ce0ef..75ec3af 100644
--- a/ui/console.c
+++ b/ui/console.c
@@ -1056,13 +1056,11 @@ static void kbd_send_chars(void *opaque)
 }
 
 /* called when an ascii key is pressed */
-void kbd_put_keysym(int keysym)
+void kbd_put_keysym_console(QemuConsole *s, int keysym)
 {
-QemuConsole *s;
 uint8_t buf[16], *q;
 int c;
 
-s = active_console;
 if (!s || (s->console_type == GRAPHIC_CONSOLE))
 return;
 
@@ -,6 +1109,11 @@ void kbd_put_keysym(int keysym)
 }
 }
 
+void kbd_put_keysym(int keysym)
+{
+kbd_put_keysym_console(active_console, keysym);
+}
+
 static void text_console_invalidate(void *opaque)
 {
 QemuConsole *s = (QemuConsole *) opaque;
-- 
1.8.3.1

Re: [Qemu-devel] [PATCH v2] spapr: Enable dynamic change of the supported hypercalls list

2014-05-22 Thread Alexander Graf



On 22.05.14 13:01, Alexey Kardashevskiy wrote:

On 05/22/2014 08:47 PM, Alexander Graf wrote:

On 21.05.14 17:21, Alexey Kardashevskiy wrote:

At the moment the "ibm,hypertas-functions" list is fixed. However some
calls should be listed there if they are supported by QEMU or the host
kernel.

This enables hyperrtas_prop to grow on stack by adding
a SPAPR_HYPERRTAS_ADD macro. "qemu,hypertas-functions" is converted as well.

The first user of this is going to be a "multi-tce" property.

Signed-off-by: Alexey Kardashevskiy 
---
Changes:
v2:
* replaced alloca() with GString
---
   hw/ppc/spapr.c | 30 +++---
   1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 0a61246..3b28211 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -293,6 +293,10 @@ static size_t create_page_sizes_prop(CPUPPCState
*env, uint32_t *prop,
   }  \
   } while (0)
   +static inline void add_str(GString *s, const gchar *s1)

Please remove the "inline" :). Otherwise this looks a lot nicer than before :)

Good :) What now? Have you finished with this set and I can repost it?


Yup


Alex

Re: [Qemu-devel] [v2][PATCH 4/8] xen, gfx passthrough: reserve 00:02.0 for INTEL IGD

2014-05-22 Thread Gonglei (Arei)


> -Original Message-
> From: qemu-devel-bounces+arei.gonglei=huawei@nongnu.org
> [mailto:qemu-devel-bounces+arei.gonglei=huawei@nongnu.org] On
> Behalf Of Chen, Tiejun
> Sent: Thursday, May 22, 2014 6:50 PM
> To: Gerd Hoffmann
> Cc: peter.mayd...@linaro.org; xen-de...@lists.xensource.com;
> stefano.stabell...@eu.citrix.com; m...@redhat.com; Kay, Allen M;
> kelly.zyta...@amd.com; qemu-devel@nongnu.org; Zhang, Yang Z;
> anth...@codemonkey.ws; Anthony PERARD
> Subject: Re: [Qemu-devel] [v2][PATCH 4/8] xen, gfx passthrough: reserve
> 00:02.0 for INTEL IGD
> 
> > -Original Message-
> > From: Gerd Hoffmann [mailto:kra...@redhat.com]
> > Sent: Thursday, May 22, 2014 2:45 PM
> > To: Chen, Tiejun
> > Cc: Anthony PERARD; Daniel P. Berrange; peter.mayd...@linaro.org;
> > xen-de...@lists.xensource.com; m...@redhat.com;
> > stefano.stabell...@eu.citrix.com; Kay, Allen M; kelly.zyta...@amd.com;
> > qemu-devel@nongnu.org; Zhang, Yang Z; anth...@codemonkey.ws
> > Subject: Re: [Qemu-devel] [v2][PATCH 4/8] xen, gfx passthrough: reserve
> > 00:02.0 for INTEL IGD
> >
> >   Hi,
> >
> > > > Another useful thing would be to not create the xen platform device
> > > > in case "-nodefaults" was specified on the command line (that switch
> > > > turns off a bunch of other devices present by default: vga, nic, cdrom, 
> > > > ...).
> > >
> > > Currently looks 'xen-platform' itself can't be created, not those devices
> > existed on that.
> >
> > The error message looks more like libxl tries to hot-unplug the xen platform
> > device.
> >
> > Attached patch (untested!) hooks up the xen platform device to the default
> > device code we have in qemu.  Two effects:
> >
> >   (1) As mentioned above the device will not be created in case
> >   -nodefaults  is specified on the command line.
> >   (2) Autocreating the device is also turned off in case xen-platform
> >   is added manually via -device.
> >
> > With the patch applied you should be able to move the xen-platform device to
> > some other place with a simple 'qemu -device xen-platform,addr=$slot'.
> >
> 
> Gerd,
> 
> Sorry, I may misunderstand what you mean previously then have a wrong test.
> 
> So this still doesn't work actually.
> 
> After applied your patch, 'xen-platform' is always disabled by default, 
> right? So
> 00:02.0 is left naturally to be assigned to IGD as we expect like this,
> 
> tchen0@tchen0-HVM-domU:~$ lspci
> 00:00.0 Host bridge: Intel Corporation 4th Gen Core Processor DRAM
> Controller (rev 06)
> 00:01.0 ISA bridge: Intel Corporation 82371SB PIIX3 ISA [Natoma/Triton II]
> 00:01.1 IDE interface: Intel Corporation 82371SB PIIX3 IDE [Natoma/Triton II]
> 00:01.3 Bridge: Intel Corporation 82371AB/EB/MB PIIX4 ACPI (rev 03)
> 00:02.0 VGA compatible controller: Intel Corporation Xeon E3-1200 v3/4th Gen
> Core Processor Integrated Graphics Controller (rev 06)
> 00:03.0 USB controller: Intel Corporation 8 Series/C220 Series Chipset Family
> USB EHCI #2 (rev 04)
> 00:1f.0 ISA bridge: Intel Corporation Q87 Express LPC Controller (rev 04)
> 
> Then this is fine but if you intend to add a 'qemu -device
> xen-platform,addr=$slot', this doesn't work well. In my case,
> 
> gfx_passthru=1
> pci=["00:02.0@2", "00:1a.0"]
> xen_platform_pci=0
> device_model_args_hvm = ['-device', 'xen-platform,addr=0x3']
> 
> tchen0@tchen0-linux:~/workspace$ sudo xl cr domu-cfg
> Parsing config from domu-cfg
> libxl: error: libxl_qmp.c:287:qmp_handle_error_response: received an error
> message from QMP server: Unsupported bus. Bus doesn't have property
> 'acpi-pcihp-bsel' set
> libxl: error: libxl_create.c:1277:domcreate_attach_pci: libxl_device_pci_add
> failed: -3
> 
Maybe you can assign explicitly the bus name, such as:
device_model_args_hvm = ['-device', 'xen-platform,bus=pci.0,addr=0x3']


Best regards,
-Gonglei

Re: [Qemu-devel] [PATCH 1/2] vmstate: Add helper to enable GHashTable migration

2014-05-22 Thread Alexey Kardashevskiy

On 05/22/2014 08:57 PM, Alexander Graf wrote:
> 
> On 22.05.14 12:53, Alexey Kardashevskiy wrote:
>> This adds a VMSTATE_HASH_V macro. This implements put/get callbacks for it.
>> This implements a qemu_hash_init() wrapper to save key/value sizes.
>>
>> Signed-off-by: Alexey Kardashevskiy 
>> ---
>>   include/migration/vmstate.h | 10 +
>>   include/qemu-common.h   | 13 +++
>>   vmstate.c   | 54
>> +
>>   3 files changed, 77 insertions(+)
>>
>> diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
>> index 7e45048..6af599d 100644
>> --- a/include/migration/vmstate.h
>> +++ b/include/migration/vmstate.h
>> @@ -166,6 +166,7 @@ extern const VMStateInfo vmstate_info_timer;
>>   extern const VMStateInfo vmstate_info_buffer;
>>   extern const VMStateInfo vmstate_info_unused_buffer;
>>   extern const VMStateInfo vmstate_info_bitmap;
>> +static const VMStateInfo vmstate_info_hash;
>> #define type_check_2darray(t1,t2,n,m) ((t1(*)[n][m])0 - (t2*)0)
>>   #define type_check_array(t1,t2,n) ((t1(*)[n])0 - (t2*)0)
>> @@ -740,6 +741,15 @@ extern const VMStateInfo vmstate_info_bitmap;
>>   #define VMSTATE_BUFFER_UNSAFE(_field, _state, _version, _size)\
>>   VMSTATE_BUFFER_UNSAFE_INFO(_field, _state, _version,
>> vmstate_info_buffer, _size)
>>   +#define VMSTATE_HASH_V(_field, _state, _version) {  \
>> +.name   = (stringify(_field)),  \
>> +.version_id = (_version),   \
>> +.size   = sizeof(qemu_hash),\
>> +.info   = &vmstate_info_hash,   \
>> +.flags  = VMS_SINGLE,   \
>> +.offset = vmstate_offset_value(_state, _field, qemu_hash),  \
>> +}
>> +
>>   #define VMSTATE_UNUSED_V(_v, _size)   \
>>   VMSTATE_UNUSED_BUFFER(NULL, _v, _size)
>>   diff --git a/include/qemu-common.h b/include/qemu-common.h
>> index 3f3fd60..ee973e7 100644
>> --- a/include/qemu-common.h
>> +++ b/include/qemu-common.h
>> @@ -462,4 +462,17 @@ int parse_debug_env(const char *name, int max, int
>> initial);
>> const char *qemu_ether_ntoa(const MACAddr *mac);
>>   +typedef struct qemu_hash {
>> +GHashTable *hash;
>> +int key_size;
>> +int value_size;
>> +} qemu_hash;
>> +
>> +static inline void qemu_hash_init(qemu_hash *h, int key_size, int
>> value_size)
>> +{
>> +h->key_size = key_size;
>> +h->value_size = value_size;
>> +h->hash = g_hash_table_new_full(g_int_hash, g_int_equal, g_free,
>> g_free);
>> +}
>> +
>>   #endif
>> diff --git a/vmstate.c b/vmstate.c
>> index b5882fa..2148e73 100644
>> --- a/vmstate.c
>> +++ b/vmstate.c
>> @@ -667,3 +667,57 @@ const VMStateInfo vmstate_info_bitmap = {
>>   .get = get_bitmap,
>>   .put = put_bitmap,
>>   };
>> +
>> +/* Save restore for qemu_hash which is a wrapper over GHashTable */
>> +static int get_hash(QEMUFile *f, void *pv, size_t size)
>> +{
>> +qemu_hash *h = pv;
>> +uint32_t num = g_hash_table_size(h->hash);
>> +gpointer key, value;
>> +
>> +num = qemu_get_be32(f);
>> +
>> +for ( ; num; --num) {
>> +int i;
>> +
>> +key = g_malloc0(h->key_size);
>> +for (i = 0; i < h->key_size; ++i) {
>> +((uint8_t *)key)[i] = qemu_get_byte(f);
>> +}
>> +value = g_malloc0(h->value_size);
>> +for (i = 0; i < h->value_size; ++i) {
>> +((uint8_t *)value)[i] = qemu_get_byte(f);
>> +}
>> +g_hash_table_insert(h->hash, key, value);
>> +}
>> +
>> +return 0;
>> +}
>> +
>> +static void put_hash(QEMUFile *f, void *pv, size_t size)
>> +{
>> +qemu_hash *h = pv;
>> +uint32_t num = g_hash_table_size(h->hash);
>> +GHashTableIter iter;
>> +gpointer key, value;
>> +
>> +qemu_put_be32(f, num);
>> +
>> +g_hash_table_iter_init(&iter, h->hash);
>> +while (g_hash_table_iter_next (&iter, &key, &value)) {
>> +int i;
>> +
>> +for (i = 0; i < h->key_size; ++i) {
>> +qemu_put_byte(f, ((uint8_t *)key)[i]);
>> +}
>> +for (i = 0; i < h->value_size; ++i) {
>> +qemu_put_byte(f, ((uint8_t *)value)[i]);
> 
> I think the only way to do this safely would be to do a recursive
> introspective walk through the hash table's key and value properties. Every
> key and every property can be an object again, right?
>
> We would basically impose a limit onto ourselves that every member of the
> hash table would have to be a GObject again.

This is a bit too much for the task I am trying to solve :)

@key here is uin32_t and there is no point in making it an object...



-- 
Alexey

Re: [Qemu-devel] [PATCH 4/7] virtio-blk: use aliases instead of duplicate qdev properties

2014-05-22 Thread Paolo Bonzini


Il 22/05/2014 12:17, Stefan Hajnoczi ha scritto:

I took a quick look at net, rng, and serial.  This approach should
work because the VirtIODevice qdev properties need to be exposed
wholesale on the transport device.

If that's what you're hinting at, I'll try it in v2.


Yes, exactly.  scsi too.

Paolo

Re: [Qemu-devel] [PATCH 1/7] qom: add object_property_add_alias()

2014-05-22 Thread Paolo Bonzini


Il 22/05/2014 12:23, Stefan Hajnoczi ha scritto:

I had an intermediate version of this patch with a flag so you could
tell object_property_add_alias() whether or not to ref the target
object.  But in the end it seems like overengineering since the
refcount case is rare or non-existent.  The refcount is a convenience
feature just for the case where you want to alias to a random object
that you don't otherwise hold a refcount on (I couldn't think of an
example).

Do you see what I mean?


Yes, and it makes sense.  Perhaps add a comment for v2.

Paolo

[Qemu-devel] [PATCH V6 0/8] PSCI v0.2 support for KVM ARM/ARM64

2014-05-22 Thread Pranavkumar Sawargaonkar

This patchset adds the QEMU side changes for providing PSCI v0.2 to VM.

ChangeLog:

V6:
- Add psci_version field in ARMCPU struct.
- Misc cleanups suggested on RFC V5 of this patch.
 ( http://www.spinics.net/lists/kvm-arm/msg09400.html)
- Dropping RFC prefix from patch.

V5:
- Updated "scripts/update-linux-headers.sh" to include linux/psci.h
- Synced linux headers using update-linux-headers.sh with linux-next tree
- Added per cpu field for kvm init features.
- Set psci-0.2 compatible string in generated dtb sucn that it works for 
  guest kernel not having psci 0.2 support.
- Added QEMU_KVM_CAP_ARM_PSCI_0_2 define in target-arm/kvm-consts.h

V4:
- Rebase this patch against v11 patchset for in-kernel PSCI v0.2 emulation
 (http://www.spinics.net/lists/kvm-arm/msg09182.html)
- Used PSCI 0.2 DT bindings for linux kernel.
  (http://www.spinics.net/lists/arm-kernel/msg326044.html).

V3:
 - Rebase this patchset against v8 patchset for in-kernel PSCI v0.2 emulation
   (http://www.spinics.net/lists/kvm-arm/msg08780.html)
 - Added common kvm_arm_vcpu_init() function for kvm arm and kvm arm64

V2:
 - Rebase this patchset against v6 patchset for in-kernel PSCI v0.2 emulation
   (http://www.spinics.net/lists/arm-kernel/msg319037.html)
 - Handle KVM_EXIT_SYSTEM_EVENT in kvm-all.c:kvm_cpu_exec()
 - Drop change in kvm_arm_get_host_cpu_features()
 - Improve comments and description of kvm_arch_reset_vcpu() implementation

V1:
 - Initial RFC patchset

Pranavkumar Sawargaonkar (8):
  update-linux-headers.sh: Add psci.h to linux header sync-up script
  linux-headers: Update KVM headers from linux-next tag next-20140508
  kvm: Handle exit reason KVM_EXIT_SYSTEM_EVENT
  target-arm: Common kvm_arm_vcpu_init() for KVM ARM and KVM ARM64
  target-arm: Enable KVM_ARM_VCPU_PSCI_0_2 feature when possible
  target-arm: Implement kvm_arch_reset_vcpu() for KVM ARM64
  target-arm: Introduce per-CPU field for PSCI version
  hw/arm/virt: Use PSCI v0.2 compatible string when KVM or TCG provides
it

 hw/arm/virt.c   |   16 ++-
 kvm-all.c   |   16 +++
 linux-headers/asm-arm/kvm.h |   10 +++--
 linux-headers/asm-arm64/kvm.h   |   10 +++--
 linux-headers/asm-s390/kvm.h|   28 
 linux-headers/linux/kvm.h   |   14 +-
 linux-headers/linux/psci.h  |   90 +++
 scripts/update-linux-headers.sh |3 +-
 target-arm/cpu-qom.h|9 
 target-arm/cpu.c|1 +
 target-arm/kvm.c|   11 +
 target-arm/kvm32.c  |   16 ---
 target-arm/kvm64.c  |   26 ---
 target-arm/kvm_arm.h|   12 ++
 14 files changed, 239 insertions(+), 23 deletions(-)
 create mode 100644 linux-headers/linux/psci.h

-- 
1.7.9.5

[Qemu-devel] [PATCH V6 2/8] linux-headers: Update KVM headers from linux-next tag next-20140508

2014-05-22 Thread Pranavkumar Sawargaonkar

Syncup KVM related linux headers from linux-next tree using
scripts/update-linux-headers.sh.

Signed-off-by: Pranavkumar Sawargaonkar 
Signed-off-by: Anup Patel 
---
 linux-headers/asm-arm/kvm.h   |   10 +++--
 linux-headers/asm-arm64/kvm.h |   10 +++--
 linux-headers/asm-s390/kvm.h  |   28 +
 linux-headers/linux/kvm.h |   14 ++-
 linux-headers/linux/psci.h|   90 +
 5 files changed, 143 insertions(+), 9 deletions(-)
 create mode 100644 linux-headers/linux/psci.h

diff --git a/linux-headers/asm-arm/kvm.h b/linux-headers/asm-arm/kvm.h
index ef0c878..e6ebdd3 100644
--- a/linux-headers/asm-arm/kvm.h
+++ b/linux-headers/asm-arm/kvm.h
@@ -20,6 +20,7 @@
 #define __ARM_KVM_H__
 
 #include 
+#include 
 #include 
 
 #define __KVM_HAVE_GUEST_DEBUG
@@ -83,6 +84,7 @@ struct kvm_regs {
 #define KVM_VGIC_V2_CPU_SIZE   0x2000
 
 #define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */
+#define KVM_ARM_VCPU_PSCI_0_2  1 /* CPU uses PSCI v0.2 */
 
 struct kvm_vcpu_init {
__u32 target;
@@ -201,9 +203,9 @@ struct kvm_arch_memory_slot {
 #define KVM_PSCI_FN_CPU_ON KVM_PSCI_FN(2)
 #define KVM_PSCI_FN_MIGRATEKVM_PSCI_FN(3)
 
-#define KVM_PSCI_RET_SUCCESS   0
-#define KVM_PSCI_RET_NI((unsigned long)-1)
-#define KVM_PSCI_RET_INVAL ((unsigned long)-2)
-#define KVM_PSCI_RET_DENIED((unsigned long)-3)
+#define KVM_PSCI_RET_SUCCESS   PSCI_RET_SUCCESS
+#define KVM_PSCI_RET_NIPSCI_RET_NOT_SUPPORTED
+#define KVM_PSCI_RET_INVAL PSCI_RET_INVALID_PARAMS
+#define KVM_PSCI_RET_DENIEDPSCI_RET_DENIED
 
 #endif /* __ARM_KVM_H__ */
diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h
index eaf54a3..e6471da 100644
--- a/linux-headers/asm-arm64/kvm.h
+++ b/linux-headers/asm-arm64/kvm.h
@@ -31,6 +31,7 @@
 #define KVM_NR_SPSR5
 
 #ifndef __ASSEMBLY__
+#include 
 #include 
 #include 
 
@@ -77,6 +78,7 @@ struct kvm_regs {
 
 #define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */
 #define KVM_ARM_VCPU_EL1_32BIT 1 /* CPU running a 32bit VM */
+#define KVM_ARM_VCPU_PSCI_0_2  2 /* CPU uses PSCI v0.2 */
 
 struct kvm_vcpu_init {
__u32 target;
@@ -186,10 +188,10 @@ struct kvm_arch_memory_slot {
 #define KVM_PSCI_FN_CPU_ON KVM_PSCI_FN(2)
 #define KVM_PSCI_FN_MIGRATEKVM_PSCI_FN(3)
 
-#define KVM_PSCI_RET_SUCCESS   0
-#define KVM_PSCI_RET_NI((unsigned long)-1)
-#define KVM_PSCI_RET_INVAL ((unsigned long)-2)
-#define KVM_PSCI_RET_DENIED((unsigned long)-3)
+#define KVM_PSCI_RET_SUCCESS   PSCI_RET_SUCCESS
+#define KVM_PSCI_RET_NIPSCI_RET_NOT_SUPPORTED
+#define KVM_PSCI_RET_INVAL PSCI_RET_INVALID_PARAMS
+#define KVM_PSCI_RET_DENIEDPSCI_RET_DENIED
 
 #endif
 
diff --git a/linux-headers/asm-s390/kvm.h b/linux-headers/asm-s390/kvm.h
index c003c6a..98bedf3 100644
--- a/linux-headers/asm-s390/kvm.h
+++ b/linux-headers/asm-s390/kvm.h
@@ -15,6 +15,7 @@
 #include 
 
 #define __KVM_S390
+#define __KVM_HAVE_GUEST_DEBUG
 
 /* Device control API: s390-specific devices */
 #define KVM_DEV_FLIC_GET_ALL_IRQS  1
@@ -54,6 +55,13 @@ struct kvm_s390_io_adapter_req {
__u64 addr;
 };
 
+/* kvm attr_group  on vm fd */
+#define KVM_S390_VM_MEM_CTRL   0
+
+/* kvm attributes for mem_ctrl */
+#define KVM_S390_VM_MEM_ENABLE_CMMA0
+#define KVM_S390_VM_MEM_CLR_CMMA   1
+
 /* for KVM_GET_REGS and KVM_SET_REGS */
 struct kvm_regs {
/* general purpose regs for s390 */
@@ -72,11 +80,31 @@ struct kvm_fpu {
__u64 fprs[16];
 };
 
+#define KVM_GUESTDBG_USE_HW_BP 0x0001
+
+#define KVM_HW_BP  1
+#define KVM_HW_WP_WRITE2
+#define KVM_SINGLESTEP 4
+
 struct kvm_debug_exit_arch {
+   __u64 addr;
+   __u8 type;
+   __u8 pad[7]; /* Should be set to 0 */
+};
+
+struct kvm_hw_breakpoint {
+   __u64 addr;
+   __u64 phys_addr;
+   __u64 len;
+   __u8 type;
+   __u8 pad[7]; /* Should be set to 0 */
 };
 
 /* for KVM_SET_GUEST_DEBUG */
 struct kvm_guest_debug_arch {
+   __u32 nr_hw_bp;
+   __u32 pad; /* Should be set to 0 */
+   struct kvm_hw_breakpoint *hw_bp;
 };
 
 #define KVM_SYNC_PREFIX (1UL << 0)
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index b278ab3..6a9a0fc 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -171,6 +171,7 @@ struct kvm_pit_config {
 #define KVM_EXIT_WATCHDOG 21
 #define KVM_EXIT_S390_TSCH22
 #define KVM_EXIT_EPR  23
+#define KVM_EXIT_SYSTEM_EVENT 24
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
@@ -301,6 +302,13 @@ struct kvm_run {
struct {
__u32 epr;

[Qemu-devel] [PATCH V6 3/8] kvm: Handle exit reason KVM_EXIT_SYSTEM_EVENT

2014-05-22 Thread Pranavkumar Sawargaonkar

In-kernel PSCI v0.2 emulation of KVM ARM/ARM64 forwards SYSTEM_OFF
and SYSTEM_RESET function calls to QEMU using KVM_EXIT_SYSTEM_EVENT
exit reason.

This patch updates kvm_cpu_exec() to handle KVM_SYSTEM_EVENT_SHUTDOWN
and KVM_SYSTEM_EVENT_RESET system-level events from QEMU-side.

Signed-off-by: Pranavkumar Sawargaonkar 
Signed-off-by: Anup Patel 
Reviewed-by: Peter Maydell 
---
 kvm-all.c |   16 
 1 file changed, 16 insertions(+)

diff --git a/kvm-all.c b/kvm-all.c
index a343ede..f87c1d5 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1713,6 +1713,22 @@ int kvm_cpu_exec(CPUState *cpu)
 case KVM_EXIT_INTERNAL_ERROR:
 ret = kvm_handle_internal_error(cpu, run);
 break;
+case KVM_EXIT_SYSTEM_EVENT:
+switch (run->system_event.type) {
+case KVM_SYSTEM_EVENT_SHUTDOWN:
+qemu_system_shutdown_request();
+ret = EXCP_INTERRUPT;
+break;
+case KVM_SYSTEM_EVENT_RESET:
+qemu_system_reset_request();
+ret = EXCP_INTERRUPT;
+break;
+default:
+DPRINTF("kvm_arch_handle_exit\n");
+ret = kvm_arch_handle_exit(cpu, run);
+break;
+}
+break;
 default:
 DPRINTF("kvm_arch_handle_exit\n");
 ret = kvm_arch_handle_exit(cpu, run);
-- 
1.7.9.5

[Qemu-devel] [PATCH V6 1/8] update-linux-headers.sh: Add psci.h to linux header sync-up script

2014-05-22 Thread Pranavkumar Sawargaonkar

We will be using linux/psci.h for KVM ARM/ARM64 hence add it to
linux header sync-up script.

Signed-off-by: Pranavkumar Sawargaonkar 
Signed-off-by: Anup Patel 
---
 scripts/update-linux-headers.sh |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh
index 120a694..35d1960 100755
--- a/scripts/update-linux-headers.sh
+++ b/scripts/update-linux-headers.sh
@@ -61,7 +61,8 @@ done
 
 rm -rf "$output/linux-headers/linux"
 mkdir -p "$output/linux-headers/linux"
-for header in kvm.h kvm_para.h vfio.h vhost.h virtio_config.h virtio_ring.h; do
+for header in kvm.h kvm_para.h psci.h vfio.h vhost.h virtio_config.h \
+virtio_ring.h; do
 cp "$tmpdir/include/linux/$header" "$output/linux-headers/linux"
 done
 rm -rf "$output/linux-headers/asm-generic"
-- 
1.7.9.5

[Qemu-devel] [PATCH V6 4/8] target-arm: Common kvm_arm_vcpu_init() for KVM ARM and KVM ARM64

2014-05-22 Thread Pranavkumar Sawargaonkar

Introduce a common kvm_arm_vcpu_init() for doing KVM_ARM_VCPU_INIT
ioctl in KVM ARM and KVM ARM64. This also helps us factor-out few
common code lines from kvm_arch_init_vcpu() for KVM ARM/ARM64.

Signed-off-by: Pranavkumar Sawargaonkar 
Signed-off-by: Anup Patel 
---
 target-arm/cpu-qom.h |3 +++
 target-arm/kvm.c |   11 +++
 target-arm/kvm32.c   |   12 +++-
 target-arm/kvm64.c   |   18 +++---
 target-arm/kvm_arm.h |   12 
 5 files changed, 44 insertions(+), 12 deletions(-)

diff --git a/target-arm/cpu-qom.h b/target-arm/cpu-qom.h
index edc7f26..2bd7df8 100644
--- a/target-arm/cpu-qom.h
+++ b/target-arm/cpu-qom.h
@@ -102,6 +102,9 @@ typedef struct ARMCPU {
  */
 uint32_t kvm_target;
 
+/* KVM init features for this CPU */
+uint32_t kvm_init_features[7];
+
 /* The instance init functions for implementation-specific subclasses
  * set these fields to specify the implementation-dependent values of
  * various constant registers and reset values of non-constant
diff --git a/target-arm/kvm.c b/target-arm/kvm.c
index 39202d7..319784d 100644
--- a/target-arm/kvm.c
+++ b/target-arm/kvm.c
@@ -27,6 +27,17 @@ const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
 KVM_CAP_LAST_INFO
 };
 
+int kvm_arm_vcpu_init(CPUState *cs)
+{
+ARMCPU *cpu = ARM_CPU(cs);
+struct kvm_vcpu_init init;
+
+init.target = cpu->kvm_target;
+memcpy(init.features, cpu->kvm_init_features, sizeof(init.features));
+
+return kvm_vcpu_ioctl(cs, KVM_ARM_VCPU_INIT, &init);
+}
+
 bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
   int *fdarray,
   struct kvm_vcpu_init *init)
diff --git a/target-arm/kvm32.c b/target-arm/kvm32.c
index b79750c..b142e90 100644
--- a/target-arm/kvm32.c
+++ b/target-arm/kvm32.c
@@ -166,7 +166,6 @@ static int compare_u64(const void *a, const void *b)
 
 int kvm_arch_init_vcpu(CPUState *cs)
 {
-struct kvm_vcpu_init init;
 int i, ret, arraylen;
 uint64_t v;
 struct kvm_one_reg r;
@@ -179,15 +178,18 @@ int kvm_arch_init_vcpu(CPUState *cs)
 return -EINVAL;
 }
 
-init.target = cpu->kvm_target;
-memset(init.features, 0, sizeof(init.features));
+/* Determine init features for this CPU */
+memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features));
 if (cpu->start_powered_off) {
-init.features[0] = 1 << KVM_ARM_VCPU_POWER_OFF;
+cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_POWER_OFF;
 }
-ret = kvm_vcpu_ioctl(cs, KVM_ARM_VCPU_INIT, &init);
+
+/* Do KVM_ARM_VCPU_INIT ioctl */
+ret = kvm_arm_vcpu_init(cs);
 if (ret) {
 return ret;
 }
+
 /* Query the kernel to make sure it supports 32 VFP
  * registers: QEMU's "cortex-a15" CPU is always a
  * VFP-D32 core. The simplest way to do this is just
diff --git a/target-arm/kvm64.c b/target-arm/kvm64.c
index c729b9e..1980ddc 100644
--- a/target-arm/kvm64.c
+++ b/target-arm/kvm64.c
@@ -77,9 +77,8 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUClass *ahcc)
 
 int kvm_arch_init_vcpu(CPUState *cs)
 {
-ARMCPU *cpu = ARM_CPU(cs);
-struct kvm_vcpu_init init;
 int ret;
+ARMCPU *cpu = ARM_CPU(cs);
 
 if (cpu->kvm_target == QEMU_KVM_ARM_TARGET_NONE ||
 !arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
@@ -87,16 +86,21 @@ int kvm_arch_init_vcpu(CPUState *cs)
 return -EINVAL;
 }
 
-init.target = cpu->kvm_target;
-memset(init.features, 0, sizeof(init.features));
+/* Determine init features for this CPU */
+memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features));
 if (cpu->start_powered_off) {
-init.features[0] = 1 << KVM_ARM_VCPU_POWER_OFF;
+cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_POWER_OFF;
+}
+
+/* Do KVM_ARM_VCPU_INIT ioctl */
+ret = kvm_arm_vcpu_init(cs);
+if (ret) {
+return ret;
 }
-ret = kvm_vcpu_ioctl(cs, KVM_ARM_VCPU_INIT, &init);
 
 /* TODO : support for save/restore/reset of system regs via tuple list */
 
-return ret;
+return 0;
 }
 
 #define AARCH64_CORE_REG(x)   (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
diff --git a/target-arm/kvm_arm.h b/target-arm/kvm_arm.h
index dc4e233..af93105 100644
--- a/target-arm/kvm_arm.h
+++ b/target-arm/kvm_arm.h
@@ -15,6 +15,18 @@
 #include "exec/memory.h"
 
 /**
+ * kvm_arm_vcpu_init:
+ * @cs: CPUState
+ *
+ * Initialize (or reinitialize) the VCPU by invoking the
+ * KVM_ARM_VCPU_INIT ioctl with the CPU type and feature
+ * bitmask specified in the CPUState.
+ *
+ * Returns: 0 if success else < 0 error code
+ */
+int kvm_arm_vcpu_init(CPUState *cs);
+
+/**
  * kvm_arm_register_device:
  * @mr: memory region for this device
  * @devid: the KVM device ID
-- 
1.7.9.5

[Qemu-devel] [PATCH v2 7/9] spapr_iommu: Get rid of window_size in sPAPRTCETable

2014-05-22 Thread Alexey Kardashevskiy

This removes window_size as it is basically a copy of nb_table
shifted by SPAPR_TCE_PAGE_SHIFT. As new dynamic DMA windows are
going to support windows as big as the entire RAM and this number
will be bigger that 32 capacity, we will have to do something
about @window_size anyway and removal seems to be the right way to go.

This removes dma_window_start/dma_window_size from sPAPRPHBState as
they are no longer used.

Signed-off-by: Alexey Kardashevskiy 
---
Changes:
v2:
* bumped minimum version in VMState descriptor
---
 hw/ppc/spapr_iommu.c| 43 ---
 hw/ppc/spapr_pci.c  |  6 ++
 hw/ppc/spapr_vio.c  |  4 +++-
 include/hw/pci-host/spapr.h |  2 --
 include/hw/ppc/spapr.h  |  3 +--
 target-ppc/kvm.c|  4 ++--
 target-ppc/kvm_ppc.h|  2 +-
 7 files changed, 25 insertions(+), 39 deletions(-)

diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 0dd6509..99d1d6e 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -70,7 +70,7 @@ static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion 
*iommu, hwaddr addr)
 
 if (tcet->bypass) {
 ret.perm = IOMMU_RW;
-} else if (addr < tcet->window_size) {
+} else if ((addr >> SPAPR_TCE_PAGE_SHIFT) < tcet->nb_table) {
 /* Check if we are in bound */
 tce = tcet->table[addr >> SPAPR_TCE_PAGE_SHIFT];
 ret.iova = addr & ~SPAPR_TCE_PAGE_MASK;
@@ -84,25 +84,15 @@ static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion 
*iommu, hwaddr addr)
 return ret;
 }
 
-static int spapr_tce_table_pre_load(void *opaque)
-{
-sPAPRTCETable *tcet = SPAPR_TCE_TABLE(opaque);
-
-tcet->nb_table = tcet->window_size >> SPAPR_TCE_PAGE_SHIFT;
-
-return 0;
-}
-
 static const VMStateDescription vmstate_spapr_tce_table = {
 .name = "spapr_iommu",
-.version_id = 1,
-.minimum_version_id = 1,
+.version_id = 2,
+.minimum_version_id = 2,
 .minimum_version_id_old = 1,
-.pre_load = spapr_tce_table_pre_load,
 .fields  = (VMStateField []) {
 /* Sanity check */
 VMSTATE_UINT32_EQUAL(liobn, sPAPRTCETable),
-VMSTATE_UINT32_EQUAL(window_size, sPAPRTCETable),
+VMSTATE_UINT32_EQUAL(nb_table, sPAPRTCETable),
 
 /* IOMMU state */
 VMSTATE_BOOL(bypass, sPAPRTCETable),
@@ -122,16 +112,15 @@ static int spapr_tce_table_realize(DeviceState *dev)
 
 if (kvm_enabled()) {
 tcet->table = kvmppc_create_spapr_tce(tcet->liobn,
-  tcet->window_size,
+  tcet->nb_table <<
+  SPAPR_TCE_PAGE_SHIFT,
   &tcet->fd);
 }
 
 if (!tcet->table) {
-size_t table_size = (tcet->window_size >> SPAPR_TCE_PAGE_SHIFT)
-* sizeof(uint64_t);
+size_t table_size = tcet->nb_table * sizeof(uint64_t);
 tcet->table = g_malloc0(table_size);
 }
-tcet->nb_table = tcet->window_size >> SPAPR_TCE_PAGE_SHIFT;
 
 trace_spapr_iommu_new_table(tcet->liobn, tcet, tcet->table, tcet->fd);
 
@@ -143,7 +132,8 @@ static int spapr_tce_table_realize(DeviceState *dev)
 return 0;
 }
 
-sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn, size_t 
window_size)
+sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn,
+   uint32_t nb_table)
 {
 sPAPRTCETable *tcet;
 
@@ -153,13 +143,13 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, 
uint32_t liobn, size_t wi
 return NULL;
 }
 
-if (!window_size) {
+if (!nb_table) {
 return NULL;
 }
 
 tcet = SPAPR_TCE_TABLE(object_new(TYPE_SPAPR_TCE_TABLE));
 tcet->liobn = liobn;
-tcet->window_size = window_size;
+tcet->nb_table = nb_table;
 
 object_property_add_child(OBJECT(owner), "tce-table", OBJECT(tcet), NULL);
 
@@ -176,7 +166,7 @@ static void spapr_tce_table_finalize(Object *obj)
 
 if (!kvm_enabled() ||
 (kvmppc_remove_spapr_tce(tcet->table, tcet->fd,
- tcet->window_size) != 0)) {
+ tcet->nb_table) != 0)) {
 g_free(tcet->table);
 }
 }
@@ -194,8 +184,7 @@ void spapr_tce_set_bypass(sPAPRTCETable *tcet, bool bypass)
 static void spapr_tce_reset(DeviceState *dev)
 {
 sPAPRTCETable *tcet = SPAPR_TCE_TABLE(dev);
-size_t table_size = (tcet->window_size >> SPAPR_TCE_PAGE_SHIFT)
-* sizeof(uint64_t);
+size_t table_size = tcet->nb_table * sizeof(uint64_t);
 
 tcet->bypass = false;
 memset(tcet->table, 0, table_size);
@@ -206,7 +195,7 @@ static target_ulong put_tce_emu(sPAPRTCETable *tcet, 
target_ulong ioba,
 {
 IOMMUTLBEntry entry;
 
-if (ioba >= tcet->window_size) {
+if ((ioba >> SPAPR_TCE_PAGE_SHIFT) >= tcet->nb_table) {
 hcall_dprintf("spapr_vio_put_tce on out-of-bounds IOBA 0x"

[Qemu-devel] [PATCH V6 7/8] target-arm: Introduce per-CPU field for PSCI version

2014-05-22 Thread Pranavkumar Sawargaonkar

We require to know the PSCI version available to given CPU at
potentially many places. Currently, we need to know PSCI version
when generating DTB for virt machine.

This patch introduce per-CPU 32bit field representing the PSCI
version available to the CPU. The encoding of this 32bit field
is same as described in PSCI v0.2 spec.

Signed-off-by: Pranavkumar Sawargaonkar 
Signed-off-by: Anup Patel 
---
 target-arm/cpu-qom.h |6 ++
 target-arm/cpu.c |1 +
 target-arm/kvm32.c   |1 +
 target-arm/kvm64.c   |1 +
 4 files changed, 9 insertions(+)

diff --git a/target-arm/cpu-qom.h b/target-arm/cpu-qom.h
index 2bd7df8..eaee944 100644
--- a/target-arm/cpu-qom.h
+++ b/target-arm/cpu-qom.h
@@ -94,6 +94,12 @@ typedef struct ARMCPU {
 /* 'compatible' string for this CPU for Linux device trees */
 const char *dtb_compatible;
 
+/* PSCI version for this CPU
+ * Bits[31:16] = Major Version
+ * Bits[15:0] = Minor Version
+ */
+uint32_t psci_version;
+
 /* Should CPU start in PSCI powered-off state? */
 bool start_powered_off;
 
diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index 6c6f2b3..589f34d 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -260,6 +260,7 @@ static void arm_cpu_initfn(Object *obj)
  * picky DTB consumer will also provide a helpful error message.
  */
 cpu->dtb_compatible = "qemu,unknown";
+cpu->psci_version = 1; /* By default assume PSCI v0.1 */
 cpu->kvm_target = QEMU_KVM_ARM_TARGET_NONE;
 
 if (tcg_enabled() && !inited) {
diff --git a/target-arm/kvm32.c b/target-arm/kvm32.c
index 52d626c..068af7d 100644
--- a/target-arm/kvm32.c
+++ b/target-arm/kvm32.c
@@ -184,6 +184,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
 cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_POWER_OFF;
 }
 if (kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PSCI_0_2)) {
+cpu->psci_version = 2;
 cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PSCI_0_2;
 }
 
diff --git a/target-arm/kvm64.c b/target-arm/kvm64.c
index feeac4a..2634baf 100644
--- a/target-arm/kvm64.c
+++ b/target-arm/kvm64.c
@@ -92,6 +92,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
 cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_POWER_OFF;
 }
 if (kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PSCI_0_2)) {
+cpu->psci_version = 2;
 cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PSCI_0_2;
 }
 
-- 
1.7.9.5

[Qemu-devel] [PATCH V6 5/8] target-arm: Enable KVM_ARM_VCPU_PSCI_0_2 feature when possible

2014-05-22 Thread Pranavkumar Sawargaonkar

Latest linux kernel supports in-kernel emulation of PSCI v0.2 but
to enable it we need to select KVM_ARM_VCPU_PSCI_0_2 feature using
KVM_ARM_VCPU_INIT ioctl.

Also, we can use KVM_ARM_VCPU_PSCI_0_2 feature for VCPU only when
linux kernel has KVM_CAP_ARM_PSCI_0_2 capability.

This patch updates kvm_arch_init_vcpu() to enable KVM_ARM_VCPU_PSCI_0_2
feature for VCPU when KVM ARM/ARM64 has KVM_CAP_ARM_PSCI_0_2 capability.

Signed-off-by: Pranavkumar Sawargaonkar 
Signed-off-by: Anup Patel 
---
 target-arm/kvm32.c |3 +++
 target-arm/kvm64.c |3 +++
 2 files changed, 6 insertions(+)

diff --git a/target-arm/kvm32.c b/target-arm/kvm32.c
index b142e90..52d626c 100644
--- a/target-arm/kvm32.c
+++ b/target-arm/kvm32.c
@@ -183,6 +183,9 @@ int kvm_arch_init_vcpu(CPUState *cs)
 if (cpu->start_powered_off) {
 cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_POWER_OFF;
 }
+if (kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PSCI_0_2)) {
+cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PSCI_0_2;
+}
 
 /* Do KVM_ARM_VCPU_INIT ioctl */
 ret = kvm_arm_vcpu_init(cs);
diff --git a/target-arm/kvm64.c b/target-arm/kvm64.c
index 1980ddc..dcf621f 100644
--- a/target-arm/kvm64.c
+++ b/target-arm/kvm64.c
@@ -91,6 +91,9 @@ int kvm_arch_init_vcpu(CPUState *cs)
 if (cpu->start_powered_off) {
 cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_POWER_OFF;
 }
+if (kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PSCI_0_2)) {
+cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PSCI_0_2;
+}
 
 /* Do KVM_ARM_VCPU_INIT ioctl */
 ret = kvm_arm_vcpu_init(cs);
-- 
1.7.9.5

[Qemu-devel] [PATCH V6 6/8] target-arm: Implement kvm_arch_reset_vcpu() for KVM ARM64

2014-05-22 Thread Pranavkumar Sawargaonkar

To implement kvm_arch_reset_vcpu(), we simply re-init the VCPU
using kvm_arm_vcpu_init() so that all registers of VCPU are set
to their reset values by in-kernel KVM code.

Signed-off-by: Pranavkumar Sawargaonkar 
Signed-off-by: Anup Patel 
Reviewed-by: Peter Maydell 
---
 target-arm/kvm64.c |4 
 1 file changed, 4 insertions(+)

diff --git a/target-arm/kvm64.c b/target-arm/kvm64.c
index dcf621f..feeac4a 100644
--- a/target-arm/kvm64.c
+++ b/target-arm/kvm64.c
@@ -269,4 +269,8 @@ int kvm_arch_get_registers(CPUState *cs)
 
 void kvm_arm_reset_vcpu(ARMCPU *cpu)
 {
+/* Re-init VCPU so that all registers are set to
+ * their respective reset values.
+ */
+kvm_arm_vcpu_init(CPU(cpu));
 }
-- 
1.7.9.5

[Qemu-devel] [PATCH v2 0/9] spapr_pci: Prepare for VFIO

2014-05-22 Thread Alexey Kardashevskiy

This patchset prepares QEMU for VFIO support on SPAPR. It also does
preparations for Dynamic DMA window feature which allows to
create DMA windows with 16MB IOMMU pages which will allow to map
the entire guest RAM for DMA at almost no cost.

Changelogs are in the patches.

Alexey Kardashevskiy (9):
  spapr: Enable dynamic change of the supported hypercalls list
  spapr_iommu: Enable multiple TCE requests
  spapr_pci: Introduce a finish_realize() callback
  spapr_pci: spapr_iommu: Make DMA window a subregion
  spapr_pci: Allow multiple TCE tables per PHB
  spapr_iommu: Convert old qdev_init_nofail() to
object_property_set_bool
  spapr_iommu: Get rid of window_size in sPAPRTCETable
  spapr_iommu: Introduce page_shift in sPAPRTCETable
  spapr_iommu: Introduce bus_offset in sPAPRTCETable

 hw/ppc/spapr.c  |  33 +++--
 hw/ppc/spapr_iommu.c| 172 +---
 hw/ppc/spapr_pci.c  |  96 -
 hw/ppc/spapr_vio.c  |   6 +-
 include/hw/pci-host/spapr.h |  18 -
 include/hw/ppc/spapr.h  |   8 ++-
 target-ppc/kvm.c|  11 ++-
 target-ppc/kvm_ppc.h|   8 ++-
 trace-events|   2 +
 9 files changed, 278 insertions(+), 76 deletions(-)

-- 
1.9.rc0

[Qemu-devel] [PATCH v2 4/9] spapr_pci: spapr_iommu: Make DMA window a subregion

2014-05-22 Thread Alexey Kardashevskiy

Currently the default DMA window is represented by a single MemoryRegion.
However there can be more than just one window so we need
a "root" memory region to be separated from the actual DMA window(s).

This introduces a "root" IOMMU memory region and adds a subregion for
the default DMA 32bit window. Following patches will add other
subregion(s).

This initializes a default DMA window subregion size to the guest RAM
size as this window can be switched into "bypass" mode which implements
direct DMA mapping.

Signed-off-by: Alexey Kardashevskiy 
---
 hw/ppc/spapr_iommu.c|  2 +-
 hw/ppc/spapr_pci.c  | 20 ++--
 include/hw/pci-host/spapr.h |  1 +
 3 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index ab5037c..241ceeb 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -136,7 +136,7 @@ static int spapr_tce_table_realize(DeviceState *dev)
 trace_spapr_iommu_new_table(tcet->liobn, tcet, tcet->table, tcet->fd);
 
 memory_region_init_iommu(&tcet->iommu, OBJECT(dev), &spapr_iommu_ops,
- "iommu-spapr", UINT64_MAX);
+ "iommu-spapr", ram_size);
 
 QLIST_INSERT_HEAD(&spapr_tce_tables, tcet, list);
 
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index b141e83..f1684c2 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -610,6 +610,20 @@ static void spapr_phb_realize(DeviceState *dev, Error 
**errp)
PCI_DEVFN(0, 0), PCI_NUM_PINS, TYPE_PCI_BUS);
 phb->bus = bus;
 
+/*
+ * Initialize PHB address space.
+ * By default there will be at least one subregion for default
+ * 32bit DMA window.
+ * Later the guest might want to create another DMA window
+ * which will become another memory subregion.
+ */
+sprintf(namebuf, "%s.iommu-root", sphb->dtbusname);
+
+memory_region_init(&sphb->iommu_root, OBJECT(sphb),
+   namebuf, UINT64_MAX);
+address_space_init(&sphb->iommu_as, &sphb->iommu_root,
+   sphb->dtbusname);
+
 pci_setup_iommu(bus, spapr_pci_dma_iommu, sphb);
 
 pci_bus_set_route_irq_fn(bus, spapr_route_intx_pin_to_irq);
@@ -648,8 +662,10 @@ static void spapr_phb_finish_realize(sPAPRPHBState *sphb, 
Error **errp)
sphb->dtbusname);
 return ;
 }
-address_space_init(&sphb->iommu_as, spapr_tce_get_iommu(sphb->tcet),
-   sphb->dtbusname);
+
+/* Register default 32bit DMA window */
+memory_region_add_subregion(&sphb->iommu_root, 0,
+spapr_tce_get_iommu(sphb->tcet));
 }
 
 static void spapr_phb_reset(DeviceState *qdev)
diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
index ab29281..c98ebdf 100644
--- a/include/hw/pci-host/spapr.h
+++ b/include/hw/pci-host/spapr.h
@@ -64,6 +64,7 @@ typedef struct sPAPRPHBState {
 uint64_t dma_window_size;
 sPAPRTCETable *tcet;
 AddressSpace iommu_as;
+MemoryRegion iommu_root;
 
 struct spapr_pci_lsi {
 uint32_t irq;
-- 
1.9.rc0

[Qemu-devel] [PATCH V6 8/8] hw/arm/virt: Use PSCI v0.2 compatible string when KVM or TCG provides it

2014-05-22 Thread Pranavkumar Sawargaonkar

If we have PSCI v0.2 emulation available for KVM ARM/ARM64 or TCG then
we need to provide PSCI v0.2 compatible string via generated DTB.

Signed-off-by: Pranavkumar Sawargaonkar 
Signed-off-by: Anup Patel 
---
 hw/arm/virt.c |   16 +++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index ea4f02d..442363c 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -180,10 +180,23 @@ static void create_fdt(VirtBoardInfo *vbi)
 "clk24mhz");
 qemu_fdt_setprop_cell(fdt, "/apb-pclk", "phandle", vbi->clock_phandle);
 
+}
+
+static void fdt_add_psci_node(const VirtBoardInfo *vbi)
+{
+void *fdt = vbi->fdt;
+ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(0));
+
 /* No PSCI for TCG yet */
 if (kvm_enabled()) {
 qemu_fdt_add_subnode(fdt, "/psci");
-qemu_fdt_setprop_string(fdt, "/psci", "compatible", "arm,psci");
+if (armcpu->psci_version == 2) {
+const char comp[] = "arm,psci-0.2\0arm,psci";
+qemu_fdt_setprop(fdt, "/psci", "compatible", comp, sizeof(comp));
+} else {
+qemu_fdt_setprop_string(fdt, "/psci", "compatible", "arm,psci");
+}
+
 qemu_fdt_setprop_string(fdt, "/psci", "method", "hvc");
 qemu_fdt_setprop_cell(fdt, "/psci", "cpu_suspend",
   PSCI_FN_CPU_SUSPEND);
@@ -446,6 +459,7 @@ static void machvirt_init(QEMUMachineInitArgs *args)
 object_property_set_bool(cpuobj, true, "realized", NULL);
 }
 fdt_add_cpu_nodes(vbi);
+fdt_add_psci_node(vbi);
 
 memory_region_init_ram(ram, NULL, "mach-virt.ram", args->ram_size);
 vmstate_register_ram_global(ram);
-- 
1.7.9.5

[Qemu-devel] [PATCH v2 8/9] spapr_iommu: Introduce page_shift in sPAPRTCETable

2014-05-22 Thread Alexey Kardashevskiy

At the moment only 4K pages are supported by sPAPRTCETable. Since sPAPR
spec allows other page sizes and we are going to implement them, we need
page size to be configrable.

This adds @page_shift into sPAPRTCETable and replaces SPAPR_TCE_PAGE_SHIFT
with it whereever it is possible.

This removes SPAPR_TCE_PAGE_MASK as it is no longer used.

Signed-off-by: Alexey Kardashevskiy 
---
Changes:
v2:
* target_ulong for "mask" replaced with hwaddr
* added page_mask and page_size local variables where possible
---
 hw/ppc/spapr_iommu.c   | 66 +++---
 hw/ppc/spapr_pci.c |  1 +
 hw/ppc/spapr_vio.c |  1 +
 include/hw/ppc/spapr.h |  3 ++-
 4 files changed, 45 insertions(+), 26 deletions(-)

diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 99d1d6e..65f9a89 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -70,12 +70,14 @@ static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion 
*iommu, hwaddr addr)
 
 if (tcet->bypass) {
 ret.perm = IOMMU_RW;
-} else if ((addr >> SPAPR_TCE_PAGE_SHIFT) < tcet->nb_table) {
+} else if ((addr >> tcet->page_shift) < tcet->nb_table) {
 /* Check if we are in bound */
-tce = tcet->table[addr >> SPAPR_TCE_PAGE_SHIFT];
-ret.iova = addr & ~SPAPR_TCE_PAGE_MASK;
-ret.translated_addr = tce & ~SPAPR_TCE_PAGE_MASK;
-ret.addr_mask = SPAPR_TCE_PAGE_MASK;
+hwaddr page_mask = ~((1 << tcet->page_shift) - 1);
+
+tce = tcet->table[addr >> tcet->page_shift];
+ret.iova = addr & page_mask;
+ret.translated_addr = tce & page_mask;
+ret.addr_mask = ~page_mask;
 ret.perm = tce;
 }
 trace_spapr_iommu_xlate(tcet->liobn, addr, ret.iova, ret.perm,
@@ -113,7 +115,7 @@ static int spapr_tce_table_realize(DeviceState *dev)
 if (kvm_enabled()) {
 tcet->table = kvmppc_create_spapr_tce(tcet->liobn,
   tcet->nb_table <<
-  SPAPR_TCE_PAGE_SHIFT,
+  tcet->page_shift,
   &tcet->fd);
 }
 
@@ -133,6 +135,7 @@ static int spapr_tce_table_realize(DeviceState *dev)
 }
 
 sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn,
+   uint32_t page_shift,
uint32_t nb_table)
 {
 sPAPRTCETable *tcet;
@@ -149,6 +152,7 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, 
uint32_t liobn,
 
 tcet = SPAPR_TCE_TABLE(object_new(TYPE_SPAPR_TCE_TABLE));
 tcet->liobn = liobn;
+tcet->page_shift = page_shift;
 tcet->nb_table = nb_table;
 
 object_property_add_child(OBJECT(owner), "tce-table", OBJECT(tcet), NULL);
@@ -194,19 +198,20 @@ static target_ulong put_tce_emu(sPAPRTCETable *tcet, 
target_ulong ioba,
 target_ulong tce)
 {
 IOMMUTLBEntry entry;
+hwaddr page_mask = ~((1 << tcet->page_shift) - 1);
 
-if ((ioba >> SPAPR_TCE_PAGE_SHIFT) >= tcet->nb_table) {
+if ((ioba >> tcet->page_shift) >= tcet->nb_table) {
 hcall_dprintf("spapr_vio_put_tce on out-of-bounds IOBA 0x"
   TARGET_FMT_lx "\n", ioba);
 return H_PARAMETER;
 }
 
-tcet->table[ioba >> SPAPR_TCE_PAGE_SHIFT] = tce;
+tcet->table[ioba >> tcet->page_shift] = tce;
 
 entry.target_as = &address_space_memory,
-entry.iova = ioba & ~SPAPR_TCE_PAGE_MASK;
-entry.translated_addr = tce & ~SPAPR_TCE_PAGE_MASK;
-entry.addr_mask = SPAPR_TCE_PAGE_MASK;
+entry.iova = ioba & page_mask;
+entry.translated_addr = tce & page_mask;
+entry.addr_mask = ~page_mask;
 entry.perm = tce;
 memory_region_notify_iommu(&tcet->iommu, entry);
 
@@ -226,6 +231,7 @@ static target_ulong h_put_tce_indirect(PowerPCCPU *cpu,
 target_ulong ret = H_PARAMETER;
 sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn);
 CPUState *cs = CPU(cpu);
+hwaddr page_mask, page_size;
 
 if (!tcet) {
 return H_PARAMETER;
@@ -235,12 +241,15 @@ static target_ulong h_put_tce_indirect(PowerPCCPU *cpu,
 return H_PARAMETER;
 }
 
-ioba &= ~SPAPR_TCE_PAGE_MASK;
-tce_list &= ~SPAPR_TCE_PAGE_MASK;
+page_mask = ~((1 << tcet->page_shift) - 1);
+page_size = (1 << tcet->page_shift);
+ioba &= page_mask;
+
+for (i = 0; i < npages; ++i, ioba += page_size) {
+target_ulong off = (tce_list & ~SPAPR_TCE_RW) +
+i * sizeof(target_ulong);
+target_ulong tce = ldq_phys(cs->as, off);
 
-for (i = 0; i < npages; ++i, ioba += SPAPR_TCE_PAGE_SIZE) {
-target_ulong tce = ldq_phys(cs->as, tce_list +
-i * sizeof(target_ulong));
 ret = put_tce_emu(tcet, ioba, tce);
 if (ret) {
 break;
@@ -267,6 +276,7 @@ static target_ulong h_stuff_tce(PowerPCCPU *cpu, 
sPAPREnvironment *spapr,

[Qemu-devel] [PATCH 2/3] console: rework text terminal cursor logic

2014-05-22 Thread Gerd Hoffmann

Have a global timer.  Update all visible terminal windows syncronously.
Right now this can be the active_console only, but that will change
soon.  The global timer will disable itself if not needed, so we only
have to care start it if needed.  Which might be at console switch time
or when a new displaychangelistener is registered.

Signed-off-by: Gerd Hoffmann 
---
 ui/console.c | 50 --
 1 file changed, 32 insertions(+), 18 deletions(-)

diff --git a/ui/console.c b/ui/console.c
index 85f46ae..f6ce0ef 100644
--- a/ui/console.c
+++ b/ui/console.c
@@ -143,8 +143,6 @@ struct QemuConsole {
 TextCell *cells;
 int text_x[2], text_y[2], cursor_invalidate;
 int echo;
-bool cursor_visible_phase;
-QEMUTimer *cursor_timer;
 
 int update_x0;
 int update_y0;
@@ -177,10 +175,14 @@ static DisplayState *display_state;
 static QemuConsole *active_console;
 static QemuConsole *consoles[MAX_CONSOLES];
 static int nb_consoles = 0;
+static bool cursor_visible_phase;
+static QEMUTimer *cursor_timer;
 
 static void text_console_do_init(CharDriverState *chr, DisplayState *ds);
 static void dpy_refresh(DisplayState *s);
 static DisplayState *get_alloc_displaystate(void);
+static void text_console_update_cursor_timer(void);
+static void text_console_update_cursor(void *opaque);
 
 static void gui_update(void *opaque)
 {
@@ -530,7 +532,7 @@ static void console_show_cursor(QemuConsole *s, int show)
 }
 if (y < s->height) {
 c = &s->cells[y1 * s->width + x];
-if (show && s->cursor_visible_phase) {
+if (show && cursor_visible_phase) {
 TextAttributes t_attrib = s->t_attrib_default;
 t_attrib.invers = !(t_attrib.invers); /* invert fg and bg */
 vga_putcharxy(s, x, y, c->ch, &t_attrib);
@@ -989,9 +991,6 @@ void console_select(unsigned int index)
 if (s) {
 DisplayState *ds = s->ds;
 
-if (active_console && active_console->cursor_timer) {
-timer_del(active_console->cursor_timer);
-}
 active_console = s;
 if (ds->have_gfx) {
 QLIST_FOREACH(dcl, &ds->listeners, next) {
@@ -1008,10 +1007,7 @@ void console_select(unsigned int index)
 if (ds->have_text) {
 dpy_text_resize(s, s->width, s->height);
 }
-if (s->cursor_timer) {
-timer_mod(s->cursor_timer,
-   qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 
CONSOLE_CURSOR_PERIOD / 2);
-}
+text_console_update_cursor(NULL);
 }
 }
 
@@ -1314,6 +1310,7 @@ void register_displaychangelistener(DisplayChangeListener 
*dcl)
 dcl->ops->dpy_gfx_switch(dcl, dummy);
 }
 }
+text_console_update_cursor(NULL);
 }
 
 void update_displaychangelistener(DisplayChangeListener *dcl,
@@ -1537,6 +1534,8 @@ static DisplayState *get_alloc_displaystate(void)
 {
 if (!display_state) {
 display_state = g_new0(DisplayState, 1);
+cursor_timer = timer_new_ms(QEMU_CLOCK_REALTIME,
+text_console_update_cursor, NULL);
 }
 return display_state;
 }
@@ -1696,14 +1695,32 @@ static void text_console_set_echo(CharDriverState *chr, 
bool echo)
 s->echo = echo;
 }
 
+static void text_console_update_cursor_timer(void)
+{
+timer_mod(cursor_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
+  + CONSOLE_CURSOR_PERIOD / 2);
+}
+
 static void text_console_update_cursor(void *opaque)
 {
-QemuConsole *s = opaque;
+QemuConsole *s;
+int i, count = 0;
+
+cursor_visible_phase = !cursor_visible_phase;
 
-s->cursor_visible_phase = !s->cursor_visible_phase;
-graphic_hw_invalidate(s);
-timer_mod(s->cursor_timer,
-   qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 
CONSOLE_CURSOR_PERIOD / 2);
+for (i = 0; i < nb_consoles; i++) {
+s = consoles[i];
+if (qemu_console_is_graphic(s) ||
+!qemu_console_is_visible(s)) {
+continue;
+}
+count++;
+graphic_hw_invalidate(s);
+}
+
+if (count) {
+text_console_update_cursor_timer();
+}
 }
 
 static const GraphicHwOps text_console_ops = {
@@ -1739,9 +1756,6 @@ static void text_console_do_init(CharDriverState *chr, 
DisplayState *ds)
 s->surface = qemu_create_displaysurface(g_width, g_height);
 }
 
-s->cursor_timer =
-timer_new_ms(QEMU_CLOCK_REALTIME, text_console_update_cursor, s);
-
 s->hw_ops = &text_console_ops;
 s->hw = s;
 
-- 
1.8.3.1

Re: [Qemu-devel] [v2][PATCH 4/8] xen, gfx passthrough: reserve 00:02.0 for INTEL IGD

2014-05-22 Thread Gerd Hoffmann

  Hi,

> After applied your patch, 'xen-platform' is always disabled by default, right?

Only in case -nodefaults is passed on the qemu command line (don't know
whenever libxl does that).

> gfx_passthru=1
> pci=["00:02.0@2", "00:1a.0"]
> xen_platform_pci=0
  ^^
That line isn't needed ...

> device_model_args_hvm = ['-device', 'xen-platform,addr=0x3']

That alone should be enough to move away the platform device.

cheers,
  Gerd

[Qemu-devel] [PATCH v2 3/9] spapr_pci: Introduce a finish_realize() callback

2014-05-22 Thread Alexey Kardashevskiy

The spapr-pci PHB initializes IOMMU for emulated devices only.
The upcoming VFIO support will do it different. However both emulated
and VFIO PHB types share most of the initialization code.
For the type specific things a new finish_realize() callback is
introduced.

This introduces sPAPRPHBClass derived from PCIHostBridgeClass and
adds the callback pointer.

This implements finish_realize() for emulated devices.

Signed-off-by: Alexey Kardashevskiy 
---

The difference to VFIO is that for VFIO we have to ask the kernel
about DMA window size before calling spapr_tce_new_table() and
then we have to tell VFIO KVM device about LIOBN <-> IOMMU link.
---
 hw/ppc/spapr_pci.c  | 38 ++
 include/hw/pci-host/spapr.h | 14 ++
 2 files changed, 40 insertions(+), 12 deletions(-)

diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 1db73f2..b141e83 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -528,6 +528,7 @@ static void spapr_phb_realize(DeviceState *dev, Error 
**errp)
 SysBusDevice *s = SYS_BUS_DEVICE(dev);
 sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s);
 PCIHostState *phb = PCI_HOST_BRIDGE(s);
+sPAPRPHBClass *info = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(s);
 char *namebuf;
 int i;
 PCIBus *bus;
@@ -609,18 +610,6 @@ static void spapr_phb_realize(DeviceState *dev, Error 
**errp)
PCI_DEVFN(0, 0), PCI_NUM_PINS, TYPE_PCI_BUS);
 phb->bus = bus;
 
-sphb->dma_window_start = 0;
-sphb->dma_window_size = 0x4000;
-sphb->tcet = spapr_tce_new_table(dev, sphb->dma_liobn,
- sphb->dma_window_size);
-if (!sphb->tcet) {
-error_setg(errp, "Unable to create TCE table for %s",
-   sphb->dtbusname);
-return;
-}
-address_space_init(&sphb->iommu_as, spapr_tce_get_iommu(sphb->tcet),
-   sphb->dtbusname);
-
 pci_setup_iommu(bus, spapr_pci_dma_iommu, sphb);
 
 pci_bus_set_route_irq_fn(bus, spapr_route_intx_pin_to_irq);
@@ -639,6 +628,28 @@ static void spapr_phb_realize(DeviceState *dev, Error 
**errp)
 
 sphb->lsi_table[i].irq = irq;
 }
+
+if (!info->finish_realize) {
+error_setg(errp, "finish_realize not defined");
+return;
+}
+
+info->finish_realize(sphb, errp);
+}
+
+static void spapr_phb_finish_realize(sPAPRPHBState *sphb, Error **errp)
+{
+sphb->dma_window_start = 0;
+sphb->dma_window_size = 0x4000;
+sphb->tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn,
+ sphb->dma_window_size);
+if (!sphb->tcet) {
+error_setg(errp, "Unable to create TCE table for %s",
+   sphb->dtbusname);
+return ;
+}
+address_space_init(&sphb->iommu_as, spapr_tce_get_iommu(sphb->tcet),
+   sphb->dtbusname);
 }
 
 static void spapr_phb_reset(DeviceState *qdev)
@@ -722,6 +733,7 @@ static void spapr_phb_class_init(ObjectClass *klass, void 
*data)
 {
 PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass);
 DeviceClass *dc = DEVICE_CLASS(klass);
+sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_CLASS(klass);
 
 hc->root_bus_path = spapr_phb_root_bus_path;
 dc->realize = spapr_phb_realize;
@@ -730,6 +742,7 @@ static void spapr_phb_class_init(ObjectClass *klass, void 
*data)
 dc->vmsd = &vmstate_spapr_pci;
 set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
 dc->cannot_instantiate_with_device_add_yet = false;
+spc->finish_realize = spapr_phb_finish_realize;
 }
 
 static const TypeInfo spapr_phb_info = {
@@ -737,6 +750,7 @@ static const TypeInfo spapr_phb_info = {
 .parent= TYPE_PCI_HOST_BRIDGE,
 .instance_size = sizeof(sPAPRPHBState),
 .class_init= spapr_phb_class_init,
+.class_size= sizeof(sPAPRPHBClass),
 };
 
 PCIHostState *spapr_create_phb(sPAPREnvironment *spapr, int index)
diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
index 970b4a9..ab29281 100644
--- a/include/hw/pci-host/spapr.h
+++ b/include/hw/pci-host/spapr.h
@@ -34,6 +34,20 @@
 #define SPAPR_PCI_HOST_BRIDGE(obj) \
 OBJECT_CHECK(sPAPRPHBState, (obj), TYPE_SPAPR_PCI_HOST_BRIDGE)
 
+#define SPAPR_PCI_HOST_BRIDGE_CLASS(klass) \
+ OBJECT_CLASS_CHECK(sPAPRPHBClass, (klass), TYPE_SPAPR_PCI_HOST_BRIDGE)
+#define SPAPR_PCI_HOST_BRIDGE_GET_CLASS(obj) \
+ OBJECT_GET_CLASS(sPAPRPHBClass, (obj), TYPE_SPAPR_PCI_HOST_BRIDGE)
+
+typedef struct sPAPRPHBClass sPAPRPHBClass;
+typedef struct sPAPRPHBState sPAPRPHBState;
+
+struct sPAPRPHBClass {
+PCIHostBridgeClass parent_class;
+
+void (*finish_realize)(sPAPRPHBState *sphb, Error **errp);
+};
+
 typedef struct sPAPRPHBState {
 PCIHostState parent_obj;
 
-- 
1.9.rc0

[Qemu-devel] [PATCH v2 9/9] spapr_iommu: Introduce bus_offset in sPAPRTCETable

2014-05-22 Thread Alexey Kardashevskiy

This adds @bus_offset into sPAPRTCETable to tell where TCE table starts
from. It is set to 0 for emulated devices. Dynamic DMA windows will use
other offset.

Signed-off-by: Alexey Kardashevskiy 
---
 hw/ppc/spapr_iommu.c   | 13 +
 hw/ppc/spapr_pci.c |  5 +++--
 hw/ppc/spapr_vio.c |  1 +
 include/hw/ppc/spapr.h |  2 ++
 4 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 65f9a89..1c69cf7 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -135,6 +135,7 @@ static int spapr_tce_table_realize(DeviceState *dev)
 }
 
 sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn,
+   uint64_t bus_offset,
uint32_t page_shift,
uint32_t nb_table)
 {
@@ -152,6 +153,7 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, 
uint32_t liobn,
 
 tcet = SPAPR_TCE_TABLE(object_new(TYPE_SPAPR_TCE_TABLE));
 tcet->liobn = liobn;
+tcet->bus_offset = bus_offset;
 tcet->page_shift = page_shift;
 tcet->nb_table = nb_table;
 
@@ -199,14 +201,15 @@ static target_ulong put_tce_emu(sPAPRTCETable *tcet, 
target_ulong ioba,
 {
 IOMMUTLBEntry entry;
 hwaddr page_mask = ~((1 << tcet->page_shift) - 1);
+unsigned long index = (ioba - tcet->bus_offset) >> tcet->page_shift;
 
-if ((ioba >> tcet->page_shift) >= tcet->nb_table) {
+if (index >= tcet->nb_table) {
 hcall_dprintf("spapr_vio_put_tce on out-of-bounds IOBA 0x"
   TARGET_FMT_lx "\n", ioba);
 return H_PARAMETER;
 }
 
-tcet->table[ioba >> tcet->page_shift] = tce;
+tcet->table[index] = tce;
 
 entry.target_as = &address_space_memory,
 entry.iova = ioba & page_mask;
@@ -325,13 +328,15 @@ static target_ulong h_put_tce(PowerPCCPU *cpu, 
sPAPREnvironment *spapr,
 static target_ulong get_tce_emu(sPAPRTCETable *tcet, target_ulong ioba,
 target_ulong *tce)
 {
-if ((ioba >> tcet->page_shift) >= tcet->nb_table) {
+unsigned long index = (ioba - tcet->bus_offset) >> tcet->page_shift;
+
+if (index >= tcet->nb_table) {
 hcall_dprintf("spapr_iommu_get_tce on out-of-bounds IOBA 0x"
   TARGET_FMT_lx "\n", ioba);
 return H_PARAMETER;
 }
 
-*tce = tcet->table[ioba >> tcet->page_shift];
+*tce = tcet->table[index];
 
 return H_SUCCESS;
 }
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index c9850d4..ddfd8bb 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -656,6 +656,7 @@ static void spapr_phb_finish_realize(sPAPRPHBState *sphb, 
Error **errp)
 sPAPRTCETable *tcet;
 
 tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn,
+   0,
SPAPR_TCE_PAGE_SHIFT,
0x4000 >> SPAPR_TCE_PAGE_SHIFT);
 if (!tcet) {
@@ -816,8 +817,8 @@ static int spapr_phb_children_dt(Object *child, void 
*opaque)
 }
 
 spapr_dma_dt(p->fdt, p->node_off, "ibm,dma-window",
- tcet->liobn, 0,
- tcet->nb_table << SPAPR_TCE_PAGE_SHIFT);
+ tcet->liobn, tcet->bus_offset,
+ tcet->nb_table << tcet->page_shift);
 /* Stop after the first window */
 
 return 1;
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
index d7e9e6a..48b0125 100644
--- a/hw/ppc/spapr_vio.c
+++ b/hw/ppc/spapr_vio.c
@@ -457,6 +457,7 @@ static int spapr_vio_busdev_init(DeviceState *qdev)
 if (pc->rtce_window_size) {
 uint32_t liobn = SPAPR_VIO_BASE_LIOBN | dev->reg;
 dev->tcet = spapr_tce_new_table(qdev, liobn,
+0,
 SPAPR_TCE_PAGE_SHIFT,
 pc->rtce_window_size >>
 SPAPR_TCE_PAGE_SHIFT);
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 2e88e52..c848c50 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -391,6 +391,7 @@ struct sPAPRTCETable {
 DeviceState parent;
 uint32_t liobn;
 uint32_t nb_table;
+uint64_t bus_offset;
 uint32_t page_shift;
 uint64_t *table;
 bool bypass;
@@ -402,6 +403,7 @@ struct sPAPRTCETable {
 void spapr_events_init(sPAPREnvironment *spapr);
 void spapr_events_fdt_skel(void *fdt, uint32_t epow_irq);
 sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn,
+   uint64_t bus_offset,
uint32_t page_shift,
uint32_t nb_table);
 MemoryRegion *spapr_tce_get_iommu(sPAPRTCETable *tcet);
-- 
1.9.rc0

[Qemu-devel] [PATCH v2 6/9] spapr_iommu: Convert old qdev_init_nofail() to object_property_set_bool

2014-05-22 Thread Alexey Kardashevskiy

qdev_init_nofail() was replaced by object_property_set_bool("realized")
all over the QEMU so do we.

Signed-off-by: Alexey Kardashevskiy 
---
 hw/ppc/spapr_iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 241ceeb..0dd6509 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -163,7 +163,7 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, 
uint32_t liobn, size_t wi
 
 object_property_add_child(OBJECT(owner), "tce-table", OBJECT(tcet), NULL);
 
-qdev_init_nofail(DEVICE(tcet));
+object_property_set_bool(OBJECT(tcet), true, "realized", NULL);
 
 return tcet;
 }
-- 
1.9.rc0

[Qemu-devel] [PATCH v2 2/9] spapr_iommu: Enable multiple TCE requests

2014-05-22 Thread Alexey Kardashevskiy

Currently only single TCE entry per request is supported (H_PUT_TCE).
However PAPR+ specification allows multiple entry requests such as
H_PUT_TCE_INDIRECT and H_STUFF_TCE. Having less transitions to the host
kernel via ioctls, support of these calls can accelerate IOMMU operations.

This implements H_STUFF_TCE and H_PUT_TCE_INDIRECT.

This advertises "multi-tce" capability to the guest if the host kernel
supports it (KVM_CAP_SPAPR_MULTITCE) or guest is running in TCG mode.

Signed-off-by: Alexey Kardashevskiy 
---
Changes:
v2:
* multi-tce enabled explicitely for TCG, it was implicit
* kvmppc_spapr_use_multitce() does not handle TCG anymore

v1:
* removed checks for liobn as the check is performed already in
spapr_tce_find_by_liobn
* added hcall-multi-tce if the host kernel supports the capability
---
 hw/ppc/spapr.c   |  3 ++
 hw/ppc/spapr_iommu.c | 78 
 target-ppc/kvm.c |  7 +
 target-ppc/kvm_ppc.h |  6 
 trace-events |  2 ++
 5 files changed, 96 insertions(+)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index a16945d..921fe5d 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -498,6 +498,9 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
 /* RTAS */
 _FDT((fdt_begin_node(fdt, "rtas")));
 
+if (!kvm_enabled() || kvmppc_spapr_use_multitce()) {
+add_str(hypertas, "hcall-multi-tce");
+}
 _FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas->str,
hypertas->len)));
 g_string_free(hypertas, TRUE);
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 72493d8..ab5037c 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -224,6 +224,82 @@ static target_ulong put_tce_emu(sPAPRTCETable *tcet, 
target_ulong ioba,
 return H_SUCCESS;
 }
 
+static target_ulong h_put_tce_indirect(PowerPCCPU *cpu,
+   sPAPREnvironment *spapr,
+   target_ulong opcode, target_ulong *args)
+{
+int i;
+target_ulong liobn = args[0];
+target_ulong ioba = args[1];
+target_ulong ioba1 = ioba;
+target_ulong tce_list = args[2];
+target_ulong npages = args[3];
+target_ulong ret = H_PARAMETER;
+sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn);
+CPUState *cs = CPU(cpu);
+
+if (!tcet) {
+return H_PARAMETER;
+}
+
+if (npages > 512) {
+return H_PARAMETER;
+}
+
+ioba &= ~SPAPR_TCE_PAGE_MASK;
+tce_list &= ~SPAPR_TCE_PAGE_MASK;
+
+for (i = 0; i < npages; ++i, ioba += SPAPR_TCE_PAGE_SIZE) {
+target_ulong tce = ldq_phys(cs->as, tce_list +
+i * sizeof(target_ulong));
+ret = put_tce_emu(tcet, ioba, tce);
+if (ret) {
+break;
+}
+}
+
+/* Trace last successful or the first problematic entry */
+i = i ? (i - 1) : 0;
+trace_spapr_iommu_indirect(liobn, ioba1, tce_list, i,
+   ldq_phys(cs->as,
+   tce_list + i * sizeof(target_ulong)),
+   ret);
+
+return ret;
+}
+
+static target_ulong h_stuff_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+  target_ulong opcode, target_ulong *args)
+{
+int i;
+target_ulong liobn = args[0];
+target_ulong ioba = args[1];
+target_ulong tce_value = args[2];
+target_ulong npages = args[3];
+target_ulong ret = H_PARAMETER;
+sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn);
+
+if (!tcet) {
+return H_PARAMETER;
+}
+
+if (npages > tcet->nb_table) {
+return H_PARAMETER;
+}
+
+ioba &= ~SPAPR_TCE_PAGE_MASK;
+
+for (i = 0; i < npages; ++i, ioba += SPAPR_TCE_PAGE_SIZE) {
+ret = put_tce_emu(tcet, ioba, tce_value);
+if (ret) {
+break;
+}
+}
+trace_spapr_iommu_stuff(liobn, ioba, tce_value, npages, ret);
+
+return ret;
+}
+
 static target_ulong h_put_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr,
   target_ulong opcode, target_ulong *args)
 {
@@ -332,6 +408,8 @@ static void spapr_tce_table_class_init(ObjectClass *klass, 
void *data)
 /* hcall-tce */
 spapr_register_hypercall(H_PUT_TCE, h_put_tce);
 spapr_register_hypercall(H_GET_TCE, h_get_tce);
+spapr_register_hypercall(H_PUT_TCE_INDIRECT, h_put_tce_indirect);
+spapr_register_hypercall(H_STUFF_TCE, h_stuff_tce);
 }
 
 static TypeInfo spapr_tce_table_info = {
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index ca31027..bcf2db8 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -62,6 +62,7 @@ static int cap_booke_sregs;
 static int cap_ppc_smt;
 static int cap_ppc_rma;
 static int cap_spapr_tce;
+static int cap_spapr_multitce;
 static int cap_hior;
 static int cap_one_reg;
 static int cap_epr;
@@ -98,6 +99,7 @@ int kvm_arch_init(KVMState *s)
 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);

[Qemu-devel] [PATCH v2 5/9] spapr_pci: Allow multiple TCE tables per PHB

2014-05-22 Thread Alexey Kardashevskiy

At the moment sPAPRPHBState contains a @tcet pointer to the only
TCE table. However sPAPR spec allows having more than one DMA window.

Since the TCE object is already a child of SPAPR PHB object, there is
no need to keep an additional pointer to it in sPAPRPHBState so remove it.

This changes the way sPAPRPHBState::reset performs reset of sPAPRTCETable
objects.

This changes the default DMA window properties calculation.

Signed-off-by: Alexey Kardashevskiy 
---

The only reason for sPAPRPHBState to keep a pointer to sPAPRTCETable is
to have a direct link to calculate default 32bit window properties.
So I decided to replace the link with a spapr_phb_children_dt() loop
and use first TCE table for default window. Is that ok or ugly?
---
 hw/ppc/spapr_pci.c  | 54 -
 include/hw/pci-host/spapr.h |  1 -
 2 files changed, 43 insertions(+), 12 deletions(-)

diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index f1684c2..aa29116 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -653,11 +653,13 @@ static void spapr_phb_realize(DeviceState *dev, Error 
**errp)
 
 static void spapr_phb_finish_realize(sPAPRPHBState *sphb, Error **errp)
 {
+sPAPRTCETable *tcet;
+
 sphb->dma_window_start = 0;
 sphb->dma_window_size = 0x4000;
-sphb->tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn,
- sphb->dma_window_size);
-if (!sphb->tcet) {
+tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn,
+   sphb->dma_window_size);
+if (!tcet) {
 error_setg(errp, "Unable to create TCE table for %s",
sphb->dtbusname);
 return ;
@@ -665,16 +667,24 @@ static void spapr_phb_finish_realize(sPAPRPHBState *sphb, 
Error **errp)
 
 /* Register default 32bit DMA window */
 memory_region_add_subregion(&sphb->iommu_root, 0,
-spapr_tce_get_iommu(sphb->tcet));
+spapr_tce_get_iommu(tcet));
+}
+
+static int spapr_phb_children_reset(Object *child, void *opaque)
+{
+DeviceState *dev = (DeviceState *) object_dynamic_cast(child, TYPE_DEVICE);
+
+if (dev) {
+device_reset(dev);
+}
+
+return 0;
 }
 
 static void spapr_phb_reset(DeviceState *qdev)
 {
-SysBusDevice *s = SYS_BUS_DEVICE(qdev);
-sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s);
-
 /* Reset the IOMMU state */
-device_reset(DEVICE(sphb->tcet));
+object_child_foreach(OBJECT(qdev), spapr_phb_children_reset, NULL);
 }
 
 static Property spapr_phb_properties[] = {
@@ -791,6 +801,29 @@ PCIHostState *spapr_create_phb(sPAPREnvironment *spapr, 
int index)
 #define b_fff(x)b_x((x), 8, 3)  /* function number */
 #define b_(x)   b_x((x), 0, 8)  /* register number */
 
+typedef struct sPAPRTCEDT {
+void *fdt;
+int node_off;
+} sPAPRTCEDT;
+
+static int spapr_phb_children_dt(Object *child, void *opaque)
+{
+sPAPRTCEDT *p = opaque;
+sPAPRTCETable *tcet;
+
+tcet = (sPAPRTCETable *) object_dynamic_cast(child, TYPE_SPAPR_TCE_TABLE);
+if (!tcet) {
+return 0;
+}
+
+spapr_dma_dt(p->fdt, p->node_off, "ibm,dma-window",
+ tcet->liobn, 0,
+ tcet->window_size);
+/* Stop after the first window */
+
+return 1;
+}
+
 int spapr_populate_pci_dt(sPAPRPHBState *phb,
   uint32_t xics_phandle,
   void *fdt)
@@ -870,9 +903,8 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
 _FDT(fdt_setprop(fdt, bus_off, "interrupt-map", &interrupt_map,
  sizeof(interrupt_map)));
 
-spapr_dma_dt(fdt, bus_off, "ibm,dma-window",
- phb->dma_liobn, phb->dma_window_start,
- phb->dma_window_size);
+object_child_foreach(OBJECT(phb), spapr_phb_children_dt,
+ &((sPAPRTCEDT){ .fdt = fdt, .node_off = bus_off }));
 
 return 0;
 }
diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
index c98ebdf..5ea4745 100644
--- a/include/hw/pci-host/spapr.h
+++ b/include/hw/pci-host/spapr.h
@@ -62,7 +62,6 @@ typedef struct sPAPRPHBState {
 uint32_t dma_liobn;
 uint64_t dma_window_start;
 uint64_t dma_window_size;
-sPAPRTCETable *tcet;
 AddressSpace iommu_as;
 MemoryRegion iommu_root;
 
-- 
1.9.rc0

[Qemu-devel] [PATCH v2 1/9] spapr: Enable dynamic change of the supported hypercalls list

2014-05-22 Thread Alexey Kardashevskiy

At the moment the "ibm,hypertas-functions" list is fixed. However some
calls should be listed there if they are supported by QEMU or the host
kernel.

This enables hyperrtas_prop to grow on stack by adding
a SPAPR_HYPERRTAS_ADD macro. "qemu,hypertas-functions" is converted as well.

The first user of this is going to be a "multi-tce" property.

Signed-off-by: Alexey Kardashevskiy 
---
Changes:
v2:
* replaced alloca() with GString
* removed "inline" from add_str() definition
---
 hw/ppc/spapr.c | 30 +++---
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 0a61246..a16945d 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -293,6 +293,10 @@ static size_t create_page_sizes_prop(CPUPPCState *env, 
uint32_t *prop,
 }  \
 } while (0)
 
+static void add_str(GString *s, const gchar *s1)
+{
+g_string_append_len(s, s1, strlen(s1) + 1);
+}
 
 static void *spapr_create_fdt_skel(hwaddr initrd_base,
hwaddr initrd_size,
@@ -306,9 +310,8 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
 CPUState *cs;
 uint32_t start_prop = cpu_to_be32(initrd_base);
 uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
-char hypertas_prop[] = "hcall-pft\0hcall-term\0hcall-dabr\0hcall-interrupt"
-"\0hcall-tce\0hcall-vio\0hcall-splpar\0hcall-bulk\0hcall-set-mode";
-char qemu_hypertas_prop[] = "hcall-memop1";
+GString *hypertas = g_string_sized_new(256);
+GString *qemu_hypertas = g_string_sized_new(256);
 uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)};
 uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)};
 int i, smt = kvmppc_smt_threads();
@@ -317,6 +320,17 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
 unsigned sockets = opts ? qemu_opt_get_number(opts, "sockets", 0) : 0;
 uint32_t cpus_per_socket = sockets ? (smp_cpus / sockets) : 1;
 
+add_str(hypertas, "hcall-pft");
+add_str(hypertas, "hcall-term");
+add_str(hypertas, "hcall-dabr");
+add_str(hypertas, "hcall-interrupt");
+add_str(hypertas, "hcall-tce");
+add_str(hypertas, "hcall-vio");
+add_str(hypertas, "hcall-splpar");
+add_str(hypertas, "hcall-bulk");
+add_str(hypertas, "hcall-set-mode");
+add_str(qemu_hypertas, "hcall-memop1");
+
 fdt = g_malloc0(FDT_MAX_SIZE);
 _FDT((fdt_create(fdt, FDT_MAX_SIZE)));
 
@@ -484,10 +498,12 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
 /* RTAS */
 _FDT((fdt_begin_node(fdt, "rtas")));
 
-_FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas_prop,
-   sizeof(hypertas_prop;
-_FDT((fdt_property(fdt, "qemu,hypertas-functions", qemu_hypertas_prop,
-   sizeof(qemu_hypertas_prop;
+_FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas->str,
+   hypertas->len)));
+g_string_free(hypertas, TRUE);
+_FDT((fdt_property(fdt, "qemu,hypertas-functions", qemu_hypertas->str,
+   qemu_hypertas->len)));
+g_string_free(qemu_hypertas, TRUE);
 
 _FDT((fdt_property(fdt, "ibm,associativity-reference-points",
 refpoints, sizeof(refpoints;
-- 
1.9.rc0

Re: [Qemu-devel] [v2][PATCH 4/8] xen, gfx passthrough: reserve 00:02.0 for INTEL IGD

2014-05-22 Thread Michael S. Tsirkin

On Thu, May 22, 2014 at 10:50:10AM +, Chen, Tiejun wrote:
> > -Original Message-
> > From: Gerd Hoffmann [mailto:kra...@redhat.com]
> > Sent: Thursday, May 22, 2014 2:45 PM
> > To: Chen, Tiejun
> > Cc: Anthony PERARD; Daniel P. Berrange; peter.mayd...@linaro.org;
> > xen-de...@lists.xensource.com; m...@redhat.com;
> > stefano.stabell...@eu.citrix.com; Kay, Allen M; kelly.zyta...@amd.com;
> > qemu-devel@nongnu.org; Zhang, Yang Z; anth...@codemonkey.ws
> > Subject: Re: [Qemu-devel] [v2][PATCH 4/8] xen, gfx passthrough: reserve
> > 00:02.0 for INTEL IGD
> > 
> >   Hi,
> > 
> > > > Another useful thing would be to not create the xen platform device
> > > > in case "-nodefaults" was specified on the command line (that switch
> > > > turns off a bunch of other devices present by default: vga, nic, cdrom, 
> > > > ...).
> > >
> > > Currently looks 'xen-platform' itself can't be created, not those devices
> > existed on that.
> > 
> > The error message looks more like libxl tries to hot-unplug the xen platform
> > device.
> > 
> > Attached patch (untested!) hooks up the xen platform device to the default
> > device code we have in qemu.  Two effects:
> > 
> >   (1) As mentioned above the device will not be created in case
> >   -nodefaults  is specified on the command line.
> >   (2) Autocreating the device is also turned off in case xen-platform
> >   is added manually via -device.
> > 
> > With the patch applied you should be able to move the xen-platform device to
> > some other place with a simple 'qemu -device xen-platform,addr=$slot'.
> > 
> 
> Gerd,
> 
> Sorry, I may misunderstand what you mean previously then have a wrong test.
> 
> So this still doesn't work actually. 
> 
> After applied your patch, 'xen-platform' is always disabled by default, 
> right? So 00:02.0 is left naturally to be assigned to IGD as we expect like 
> this,
> 
> tchen0@tchen0-HVM-domU:~$ lspci
> 00:00.0 Host bridge: Intel Corporation 4th Gen Core Processor DRAM Controller 
> (rev 06)
> 00:01.0 ISA bridge: Intel Corporation 82371SB PIIX3 ISA [Natoma/Triton II]
> 00:01.1 IDE interface: Intel Corporation 82371SB PIIX3 IDE [Natoma/Triton II]
> 00:01.3 Bridge: Intel Corporation 82371AB/EB/MB PIIX4 ACPI (rev 03)
> 00:02.0 VGA compatible controller: Intel Corporation Xeon E3-1200 v3/4th Gen 
> Core Processor Integrated Graphics Controller (rev 06)
> 00:03.0 USB controller: Intel Corporation 8 Series/C220 Series Chipset Family 
> USB EHCI #2 (rev 04)
> 00:1f.0 ISA bridge: Intel Corporation Q87 Express LPC Controller (rev 04)
> 
> Then this is fine but if you intend to add a 'qemu -device 
> xen-platform,addr=$slot', this doesn't work well. In my case,
> 
> gfx_passthru=1
> pci=["00:02.0@2", "00:1a.0"]
> xen_platform_pci=0
> device_model_args_hvm = ['-device', 'xen-platform,addr=0x3']
> 
> tchen0@tchen0-linux:~/workspace$ sudo xl cr domu-cfg
> Parsing config from domu-cfg
> libxl: error: libxl_qmp.c:287:qmp_handle_error_response: received an error 
> message from QMP server: Unsupported bus. Bus doesn't have property 
> 'acpi-pcihp-bsel' set
> libxl: error: libxl_create.c:1277:domcreate_attach_pci: libxl_device_pci_add 
> failed: -3
> 
> Thanks
> Tiejun

Weird: this implies use_acpi_pci_hotplug got set somehow.

-- 
MST

Re: [Qemu-devel] [PATCH 0/6] Obtain dirty bitmap via VM logging

2014-05-22 Thread Sanidhya Kashyap

On Wed, May 21, 2014 at 12:15 PM, ChenLiang  wrote:
> On 2014/5/21 12:56, Sanidhya Kashyap wrote:
>
>> On Wed, May 21, 2014 at 9:43 AM, ChenLiang  wrote:
>>> Hi,
>>> Nice job. We should avoid running migration_thread and 
>>> bitmap_logging_thread simultaneously.
>>>
>> Any particular suggestion to avoid running simultaneous execution of
>> the threads?
>>
>
> We can do it like this:
> https://lists.gnu.org/archive/html/qemu-devel/2014-04/msg02185.html
>

Hi Chen,

I have some questions which I wanted  to get clarified before
implementing the above part of avoiding the simultaneous execution.

As the migration process is going on, the RUN_STATE_RUNNING state is
being used, which is the same case in bitmap dump process. In order to
use the concept of RUN_STATE_* states, should I create two new states
as RUN_STATE_LOGBITMAP AND RUN_STATE_OUTMIGRATE? The
RUN_STATE_OUTMIGRATE state will have all the transitions that
RUN_STATE_RUNNING supports except the ones being used by migration.
Similarly, RUN_STATE_MIGRATE will support all the transitions of
RUN_STATE_RUNNING except RUN_STATE_LOGBITMAP.

We might also have to update the runstate_is_running function, since
RUN_STATE_LOGBITMAP and RUN_STATE_MIGRATE are almost similar in nature
to the RUN_STATE_RUNNING. What is your opinion about that?

Is it the only approach to do it or are there any simple and efficient
approaches?

Thanks,
Sanidhya

[Qemu-devel] [PATCH] pci: move dereferencing of root only after verifying valid root pointer.

2014-05-22 Thread Saravanakumar

Move dereferencing of root only after verifying valid root pointer.


Signed-off-by: Saravanakumar 
---
 hw/pci/pci.c |4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 22fe5ee..8d6a8d4 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -605,13 +605,13 @@ PCIBus *pci_get_bus_devfn(int *devfnp, PCIBus *root, 
const char *devaddr)
 int dom, bus;
 unsigned slot;
 
-assert(!root->parent_dev);
-
 if (!root) {
 fprintf(stderr, "No primary PCI bus\n");
 return NULL;
 }
 
+assert(!root->parent_dev);
+
 if (!devaddr) {
 *devfnp = -1;
 return pci_find_bus_nr(root, 0);
-- 
1.7.6.5

Re: [Qemu-devel] [PATCH 7/9] KVM: target-ppc: Enable transactional state migration

2014-05-22 Thread Tom Musta

On 5/21/2014 9:50 PM, Alexey Kardashevskiy wrote:
> On 05/22/2014 04:11 AM, Tom Musta wrote:
>> On 5/21/2014 1:20 AM, Alexey Kardashevskiy wrote:
>>> This adds migration support for registers saved before transaction started.
>>>

[ ... ]

>>
>>
> 
>> TM is not limited in the ISA to 64-bit implementations.  Why restrict
>> this to TARGET_PPC64?
> 
> TS/TM bits in MSR are in top 32bits which are unavailable for 32bit
> machine, and we are emulating a machine here, this is pretty much why.
> 
> 

OK.  This makes more sense to me now.  Thanks.

Re: [Qemu-devel] [PATCH] virtio-pci: report an error when disable msix

2014-05-22 Thread Amos Kong

CC: Alex

On Thu, May 22, 2014 at 12:11:47PM +0300, Michael S. Tsirkin wrote:
> On Thu, May 22, 2014 at 05:02:17PM +0800, Amos Kong wrote:
> > QEMU remains 4k memory for PCI BAR, each msix entry takes 16 bytes.
> > If user assigns more than 128 vectors, msix resource isn't enough,
> > so msix will be disabled.
> > 
> > This patch addes a note when fail to init exclusive bars for msix.
> > 
> >  qemu -device virtio-net-pci,netdev=h1,vectors=129,mq=on \
> >   -netdev tap,id=h1,queues=8
> > 
> > Signed-off-by: Amos Kong 
> 
> OK I guess, but how about removing the limit instead?

The limit of nentries had been 2048, it was changed to 128 by commit 53f94925.

If we remove this limit checking, it also can't pass sanity test,
ranges_overlap() fails.

/* Sanity test: table & pba don't overlap, fit within BARs, min aligned */
if ((table_bar_nr == pba_bar_nr &&
 ranges_overlap(table_offset, table_size, pba_offset, pba_size)) ||
 ... ) {
return -EINVAL;
}

Related commits:
==
|commit 53f949254ad2435bfd45cb0dee96f246a0bdd7e3
|Author: Alex Williamson 
|Date:   Thu Jun 14 12:15:51 2012 -0600
|
|msix: Add simple BAR allocation MSIX setup functions
|
|diff --git a/hw/msix.c b/hw/msix.c
|index b64f109..bafea94 100644
|--- a/hw/msix.c
|+++ b/hw/msix.c
|@@ -299,6 +299,45 @@ err_config:
|@@ -299,6 +299,45 @@ err_config:
| return ret;
| }
| 
|+int msix_init_exclusive_bar(PCIDevice *dev, unsigned short nentries,
|+uint8_t bar_nr)
|+{
|+int ret;
|+char *name;
|+
|+/*
|+ * Migration compatibility dictates that this remains a 4k
|+ * BAR with the vector table in the lower half and PBA in
|+ * the upper half.  Do not use these elsewhere!
|+ */

^ the limit comes from here

|+#define MSIX_EXCLUSIVE_BAR_SIZE 4096
|+#define MSIX_EXCLUSIVE_BAR_PBA_OFFSET (MSIX_EXCLUSIVE_BAR_SIZE / 2)
|+
|+if (nentries * PCI_MSIX_ENTRY_SIZE >
|MSIX_EXCLUSIVE_BAR_PBA_OFFSET) {
|+return -EINVAL;
|+}

> > ---
> >  hw/virtio/virtio-pci.c | 2 ++
> >  1 file changed, 2 insertions(+)
> > 
> > diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> > index ce97514..ea5dcdf 100644
> > --- a/hw/virtio/virtio-pci.c
> > +++ b/hw/virtio/virtio-pci.c
> > @@ -976,6 +976,8 @@ static void virtio_pci_device_plugged(DeviceState *d)
> >  
> >  if (proxy->nvectors &&
> >  msix_init_exclusive_bar(&proxy->pci_dev, proxy->nvectors, 1)) {
> > +error_report("%s: unable to init exclusive bars for msix, disable 
> > msix",
> > + __func__);
> >  proxy->nvectors = 0;
> >  }
> >  
> > -- 
> > 1.9.0

-- 
Amos.

Re: [Qemu-devel] [PATCH v2 14/23] target-arm: add banked coprocessor register type and macros

2014-05-22 Thread Aggeler Fabian

On 22 May 2014, at 09:41, Edgar E. Iglesias  wrote:

> On Tue, May 13, 2014 at 06:15:59PM +0200, Fabian Aggeler wrote:
>> Banked CP registers can be defined with a A32_BANKED_REG macro which defines
>> a non-secure instance of the register followed by an adjacent secure 
>> instance.
>> Using a union makes the code backwards-compatible since the non-secure
>> instance can normally be accessed by its existing name.
>> 
>> When translating a banked CP register access instruction in monitor mode,
>> the SCR.NS bit determines which instance is going to be accessed.
>> 
>> If EL3 is operating in Aarch64 state coprocessor registers are not
>> banked anymore but in some cases have its own _EL3 instance.
> 
> Hi
> 
> Regarding the sctlr regs and the banking of S/NS regs in general, I
> think the general pattern should be to arrayify the regs that need
> to be indexed by EL.
> 
> This is an example of a structure (indexed by EL) with the aarch32
> struct beeing here to help clarify.
> union {
>struct {
>uint64_t pad;
>uint64_t sctlr_ns;
>uint64_t hsctlr;
>uint64_t sctlr_s;
>} aarch32;
>uint64_t sctlr_el[4];
> }
> 
> I think we would naturally want to register this for AArch32 as banked
> with NS=sctlr_el[1] and S=sctlr_el[3].
> 
> Another register example is FAR. For FAR, things look like this
> (when EL2 is available and ignoring DFAR for clarity):
> union {
>struct {
>uint64_t pad;
>uint64_t ifar_ns;
>uint64_t ifar_s;
>} aarch32;
>uint64_t far_el[4];
> }
> 
> Preferably we need something that can handle both cases.
> An option could be to arrayify the .fieldoffset in reginfos?
> If we don't want hardcoded TZ knowledge in the generic cpreg accessors,
> maybe there could be a generic function that returns the .fieldoffset
> index based on CPUState (e.g NS=0, S=1 etc). Or maybe specialized
> read/write fns would be enough.
> 
> Just an idea to get the discussion going.
> 
> struct ARMCPRegInfo {
> 
>union {
>ptrdiff_t fieldoffset;
>ptrdiff_t fieldoffsets[2];
>};
> };
> 
> unsigned int arm_cpreg_tzbank_idx(CPUARMState *env)
> {
>return is_a64(env) ? 0 : arm_is_secure(env);
> }
> 
> Example:
>{ .name = "FAR_EL1", .state = ARM_CP_STATE_BOTH,
>  .opc0 = 3, .crn = 6, .crm = 0, .opc1 = 0, .opc2 = 0,
>  .access = PL1_RW,
>  .fieldindex_fn = arm_cpreg_tzbank_idx,
>  .fieldoffsets[] = { offsetof(CPUARMState, cp15.far_el[1]),
>  offsetof(CPUARMState, cp15.far_el[2])},
>  .resetvalue = 0, },
> 
>  ARMCPRegInfo sctlr = {
>  .name = "SCTLR", .state = ARM_CP_STATE_BOTH,
>  .opc0 = 3, .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 0,
>  .access = PL1_RW,
>  .fieldindex_fn = arm_cpreg_tzbank_idx,
>  .fieldoffsets[] = { offsetof(CPUARMState, cp15.sctlr_el[1]),
>  offsetof(CPUARMState, cp15.sctlr_el[3]),
>  },
>  /* Assuming raw_write and raw_read respect the indexing.  */
>  .writefn = sctlr_write, .resetvalue = cpu->reset_sctlr,
>  .raw_writefn = raw_write,
>  };
> 
> Best regards,
> Edgar
> 

Hi Edgar

Thanks for joining the discussion. I like the idea of arrayifying the cp regs, 
also for banking. 
Since your patches are doing this anyways for EL registers I wanted to change 
the registers
which do not have EL3/EL2 equivalents (DACR, PAR,…) to use the same mechanism. 
These
registers are the third case which you haven’t mentioned in your examples 
above, where we only
have one reg in Aarch64 but two (s/ns) in Aarch32. So I in my eyes it didn’t 
make sense to make
the array bigger than needed, that’s why I went for 2 entries only. But if it 
allows us map it easier
or in a more consistent way I don’t see why we cannot do it.

union {
   struct {
   uint64_t par_ns;
   uint64_t par_s;
   } aarch32;
   uint64_t par_el[2];
}

We should probably also get rid of the macros I used to define the banked 
registers, to make the code 
look more uniform. Using your idea of arrayifying fieldset too, we could get 
rid of the additional cpreg 
definitions. Do we need to specify a .fieldindex_fn for every cpreg in this 
case? 
Isn’t it the same for all the cpregs which are banked (two fieldoffsets, the 
first one for non-secure and
the second entry for secure)? But then we still need to know whether this 
register is banked or common.

But what about accessing them not from within a cpreg read/write instruction? 
We will have at least 3 
cases of different indexes ({ns=1, s=2}, {ns=1, s=3}, {ns=0, s=1}). Although by 
padding the last case 
we could merge it with the first one so we only have 2 ways of accessing a 
banked register, which was
also the case in my patches, for which I introduced macros. Any ideas how to 
simplify that?

Thanks,
Fabian

> 
> 
> 
> 
>> 
>> Signed-off-by: Sergey Fedorov 
>> Signed-off-by: Fabian Aggeler 
>> ---
>> target-arm/cpu.h   | 121 
>>

Re: [Qemu-devel] [PATCH v4 0/3] qapi: fix coding style in generated code

2014-05-22 Thread Amos Kong

On Fri, May 16, 2014 at 10:19:02AM -0400, Luiz Capitulino wrote:
> On Thu,  8 May 2014 09:14:37 +0800
> Amos Kong  wrote:
> 
> > Not a serious issue, but it's helpful if we can fix it.
> > 
> > V2: split change of scripts/qapi-visit.py to a split patch,
> > eat space by using a special char as Markus suggested
> > V3: update commitlog, update special string, fix of adding
> > const replace string by pattern
> > V4: fix pattern to cleanup special string (Paolo)
> > 
> > Amos Kong (3):
> >   qapi: fix coding style in parameters list
> >   qapi: add const prefix to 'char *' insider c_type()
> >   qapi: Suppress unwanted space between type and identifier
> > 
> >  scripts/qapi-commands.py |  4 +---
> >  scripts/qapi-visit.py| 20 ++--
> >  scripts/qapi.py  | 18 --
> >  3 files changed, 23 insertions(+), 19 deletions(-)
> 
> Amos, it seems that this series breaks test-qmp-commands (trace below).

Thanks for the catch.
 
> I'm not sure what is causing this and at first I thought your series was
> only making an existing bug visible, but I took a quick look and it seems
> that your last patch changes the code generator to drop the initialization
> of generated types pointers in (generated) qmp commands.
> 
> Can you please investigate this? And also, please, carry Reviewed-bys
> of unmodified patches and address Eric's comments if you respin.

I found the root reason.

We added a suffix in c_type() for pointer types, and clean it
in mcgen(). But there is a place that we directly check return
value of c_type().

I will respin.

A quick fix:

diff --git a/scripts/qapi-commands.py b/scripts/qapi-commands.py
index 8f8a258..980573e 100644
--- a/scripts/qapi-commands.py
+++ b/scripts/qapi-commands.py
@@ -90,7 +92,7 @@ def gen_visitor_input_vars_decl(args):
 bool has_%(argname)s = false;
 ''',
  argname=c_var(argname))
-if c_type(argtype).endswith("*"):
+if c_type(argtype).endswith("*\033EATSPACE."):
 ret += mcgen('''
 %(argtype)s %(argname)s = NULL;
 ''',

Thanks, Amos
 
> $ make check
> [...]
> GTESTER tests/test-qmp-commands
> *** Error in `tests/test-qmp-commands': free(): invalid pointer: 
> 0x7f8a2fef3030 ***
> === Backtrace: =
> /lib64/libc.so.6(+0x3926875cff)[0x7f8a2d421cff]
> /lib64/libc.so.6(+0x392687cff8)[0x7f8a2d428ff8]
> /lib64/libglib-2.0.so.0(g_free+0xf)[0x7f8a2e616f7f]
> tests/test-qmp-commands(+0x2b267)[0x7f8a2ef4b267]
> tests/test-qmp-commands(+0x2a3fb)[0x7f8a2ef4a3fb]
> tests/test-qmp-commands(+0x262ca)[0x7f8a2ef462ca]
> tests/test-qmp-commands(+0x2633b)[0x7f8a2ef4633b]
> tests/test-qmp-commands(+0x26494)[0x7f8a2ef46494]
> tests/test-qmp-commands(+0x29f73)[0x7f8a2ef49f73]
> tests/test-qmp-commands(+0x2d2ac)[0x7f8a2ef4d2ac]
> tests/test-qmp-commands(+0x2d3de)[0x7f8a2ef4d3de]
> tests/test-qmp-commands(+0x291ee)[0x7f8a2ef491ee]
> /lib64/libglib-2.0.so.0(+0x392946d5e1)[0x7f8a2e6355e1]
> /lib64/libglib-2.0.so.0(+0x392946d7a6)[0x7f8a2e6357a6]
> /lib64/libglib-2.0.so.0(g_test_run_suite+0x17b)[0x7f8a2e635b1b]
> tests/test-qmp-commands(main+0x9a)[0x7f8a2ef49bac]
> /lib64/libc.so.6(__libc_start_main+0xf5)[0x7f8a2d3cdd65]
> tests/test-qmp-commands(+0x4499)[0x7f8a2ef24499]
> === Memory map: 
> 7f8a2d3ac000-7f8a2d56 r-xp  fd:00 551385 
> /usr/lib64/libc-2.18.so
> 7f8a2d56-7f8a2d76 ---p 001b4000 fd:00 551385 
> /usr/lib64/libc-2.18.so
> 7f8a2d76-7f8a2d764000 r--p 001b4000 fd:00 551385 
> /usr/lib64/libc-2.18.so
> 7f8a2d764000-7f8a2d766000 rw-p 001b8000 fd:00 551385 
> /usr/lib64/libc-2.18.so
> 7f8a2d766000-7f8a2d76b000 rw-p  00:00 0 
> 7f8a2d76b000-7f8a2d783000 r-xp  fd:00 530647 
> /usr/lib64/libpthread-2.18.so
> 7f8a2d783000-7f8a2d982000 ---p 00018000 fd:00 530647 
> /usr/lib64/libpthread-2.18.so
> 7f8a2d982000-7f8a2d983000 r--p 00017000 fd:00 530647 
> /usr/lib64/libpthread-2.18.so
> 7f8a2d983000-7f8a2d984000 rw-p 00018000 fd:00 530647 
> /usr/lib64/libpthread-2.18.so
> 7f8a2d984000-7f8a2d988000 rw-p  00:00 0 
> 7f8a2d988000-7f8a2d99d000 r-xp  fd:00 551415 
> /usr/lib64/libgcc_s-4.8.2-20131212.so.1
> 7f8a2d99d000-7f8a2db9c000 ---p 00015000 fd:00 551415 
> /usr/lib64/libgcc_s-4.8.2-20131212.so.1
> 7f8a2db9c000-7f8a2db9d000 r--p 00014000 fd:00 551415 
> /usr/lib64/libgcc_s-4.8.2-20131212.so.1
> 7f8a2db9d000-7f8a2db9e000 rw-p 00015000 fd:00 551415 
> /usr/lib64/libgcc_s-4.8.2-20131212.so.1
> 7f8a2db9e000-7f8a2dca3000 r-xp  fd:00 569400 
> /usr/lib64/libm-2.18.so
> 7f8a2dca3000-7f8a2dea3000 ---p 00105000 fd:00 569400 
> /usr/lib64/libm-2.18.so
> 7f8a2dea3000-7f8a2dea4000 r--p 00105000 fd:00 569400 
> /usr/lib64/libm-2.18.so
> 7f8a

Re: [Qemu-devel] [PATCH] xen: make xen-platform a default device

2014-05-22 Thread Stefano Stabellini

On Thu, 22 May 2014, Gerd Hoffmann wrote:
> Patch hooks up the xen platform device to the default device code we
> have in qemu.  Two effects:
> 
>   (1) The device will not be created in case -nodefaults is specified
>   on the command line.
>   (2) Autocreating the device is also turned off in case xen-platform
>   is added manually via -device.
> 
> With the patch applied you can move the xen-platform device to some
> other place with a simple 'qemu -device xen-platform,addr=$slot'.
> 
> Tested-by: Tiejun Chen 
> Signed-off-by: Gerd Hoffmann 

Given that libxl always passes -nodefaults to QEMU, this patch is going
to effectively disable xen_platform_pci for all Xen users. It is not a
good idea. With the patch applied a Xen user would have no way to enable
xen_platform_pci except for passing some magic command line runes via
device_model_args_hvm.


>  hw/i386/pc_piix.c| 2 +-
>  include/hw/xen/xen.h | 1 +
>  vl.c | 3 +++
>  3 files changed, 5 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
> index eaf3e61..f987d03 100644
> --- a/hw/i386/pc_piix.c
> +++ b/hw/i386/pc_piix.c
> @@ -385,7 +385,7 @@ static void pc_xen_hvm_init(QEMUMachineInitArgs *args)
>  pc_init_pci(args);
>  
>  bus = pci_find_primary_bus();
> -if (bus != NULL) {
> +if (bus != NULL && default_xenplatform) {
>  pci_create_simple(bus, -1, "xen-platform");
>  }
>  }
> diff --git a/include/hw/xen/xen.h b/include/hw/xen/xen.h
> index 85fda3d..b350413 100644
> --- a/include/hw/xen/xen.h
> +++ b/include/hw/xen/xen.h
> @@ -20,6 +20,7 @@ enum xen_mode {
>  
>  extern uint32_t xen_domid;
>  extern enum xen_mode xen_mode;
> +extern int default_xenplatform;
>  
>  extern bool xen_allowed;
>  
> diff --git a/vl.c b/vl.c
> index 709d8cd..673148e 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -226,6 +226,7 @@ static int default_floppy = 1;
>  static int default_cdrom = 1;
>  static int default_sdcard = 1;
>  static int default_vga = 1;
> +int default_xenplatform = 1;
>  
>  static struct {
>  const char *driver;
> @@ -247,6 +248,7 @@ static struct {
>  { .driver = "isa-cirrus-vga",   .flag = &default_vga   },
>  { .driver = "vmware-svga",  .flag = &default_vga   },
>  { .driver = "qxl-vga",  .flag = &default_vga   },
> +{ .driver = "xen-platform", .flag = &default_xenplatform },
>  };
>  
>  static QemuOptsList qemu_rtc_opts = {
> @@ -4101,6 +4103,7 @@ int main(int argc, char **argv, char **envp)
>  default_monitor = 0;
>  default_net = 0;
>  default_vga = 0;
> +default_xenplatform = 0;
>  }
>  
>  if (is_daemonized()) {
> -- 
> 1.8.3.1
> 
>

Re: [Qemu-devel] [PATCH v2 14/23] target-arm: add banked coprocessor register type and macros

2014-05-22 Thread Sergey Fedorov


On 22.05.2014 15:49, Aggeler Fabian wrote:
> On 22 May 2014, at 09:41, Edgar E. Iglesias  wrote:
>
>> On Tue, May 13, 2014 at 06:15:59PM +0200, Fabian Aggeler wrote:
>>> Banked CP registers can be defined with a A32_BANKED_REG macro which defines
>>> a non-secure instance of the register followed by an adjacent secure 
>>> instance.
>>> Using a union makes the code backwards-compatible since the non-secure
>>> instance can normally be accessed by its existing name.
>>>
>>> When translating a banked CP register access instruction in monitor mode,
>>> the SCR.NS bit determines which instance is going to be accessed.
>>>
>>> If EL3 is operating in Aarch64 state coprocessor registers are not
>>> banked anymore but in some cases have its own _EL3 instance.
>> Hi
>>
>> Regarding the sctlr regs and the banking of S/NS regs in general, I
>> think the general pattern should be to arrayify the regs that need
>> to be indexed by EL.
>>
>> This is an example of a structure (indexed by EL) with the aarch32
>> struct beeing here to help clarify.
>> union {
>>struct {
>>uint64_t pad;
>>uint64_t sctlr_ns;
>>uint64_t hsctlr;
>>uint64_t sctlr_s;
>>} aarch32;
>>uint64_t sctlr_el[4];
>> }
>>
>> I think we would naturally want to register this for AArch32 as banked
>> with NS=sctlr_el[1] and S=sctlr_el[3].
>>
>> Another register example is FAR. For FAR, things look like this
>> (when EL2 is available and ignoring DFAR for clarity):
>> union {
>>struct {
>>uint64_t pad;
>>uint64_t ifar_ns;
>>uint64_t ifar_s;
>>} aarch32;
>>uint64_t far_el[4];
>> }
>>
>> Preferably we need something that can handle both cases.
>> An option could be to arrayify the .fieldoffset in reginfos?
>> If we don't want hardcoded TZ knowledge in the generic cpreg accessors,
>> maybe there could be a generic function that returns the .fieldoffset
>> index based on CPUState (e.g NS=0, S=1 etc). Or maybe specialized
>> read/write fns would be enough.
>>
>> Just an idea to get the discussion going.
>>
>> struct ARMCPRegInfo {
>> 
>>union {
>>ptrdiff_t fieldoffset;
>>ptrdiff_t fieldoffsets[2];
>>};
>> };
>>
>> unsigned int arm_cpreg_tzbank_idx(CPUARMState *env)
>> {
>>return is_a64(env) ? 0 : arm_is_secure(env);
>> }
>>
>> Example:
>>{ .name = "FAR_EL1", .state = ARM_CP_STATE_BOTH,
>>  .opc0 = 3, .crn = 6, .crm = 0, .opc1 = 0, .opc2 = 0,
>>  .access = PL1_RW,
>>  .fieldindex_fn = arm_cpreg_tzbank_idx,
>>  .fieldoffsets[] = { offsetof(CPUARMState, cp15.far_el[1]),
>>  offsetof(CPUARMState, cp15.far_el[2])},
>>  .resetvalue = 0, },
>>
>>  ARMCPRegInfo sctlr = {
>>  .name = "SCTLR", .state = ARM_CP_STATE_BOTH,
>>  .opc0 = 3, .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 0,
>>  .access = PL1_RW,
>>  .fieldindex_fn = arm_cpreg_tzbank_idx,
>>  .fieldoffsets[] = { offsetof(CPUARMState, cp15.sctlr_el[1]),
>>  offsetof(CPUARMState, cp15.sctlr_el[3]),
>>  },
>>  /* Assuming raw_write and raw_read respect the indexing.  */
>>  .writefn = sctlr_write, .resetvalue = cpu->reset_sctlr,
>>  .raw_writefn = raw_write,
>>  };
>>
>> Best regards,
>> Edgar
>>
> Hi Edgar
>
> Thanks for joining the discussion. I like the idea of arrayifying the cp 
> regs, also for banking. 
> Since your patches are doing this anyways for EL registers I wanted to change 
> the registers
> which do not have EL3/EL2 equivalents (DACR, PAR,…) to use the same 
> mechanism. These
> registers are the third case which you haven’t mentioned in your examples 
> above, where we only
> have one reg in Aarch64 but two (s/ns) in Aarch32. So I in my eyes it didn’t 
> make sense to make
> the array bigger than needed, that’s why I went for 2 entries only. But if it 
> allows us map it easier
> or in a more consistent way I don’t see why we cannot do it.
>
> union {
>struct {
>uint64_t par_ns;
>uint64_t par_s;
>} aarch32;
>uint64_t par_el[2];
> }
>
> We should probably also get rid of the macros I used to define the banked 
> registers, to make the code 
> look more uniform. Using your idea of arrayifying fieldset too, we could get 
> rid of the additional cpreg 
> definitions. Do we need to specify a .fieldindex_fn for every cpreg in this 
> case? 
> Isn’t it the same for all the cpregs which are banked (two fieldoffsets, the 
> first one for non-secure and
> the second entry for secure)? But then we still need to know whether this 
> register is banked or common.
>
> But what about accessing them not from within a cpreg read/write instruction? 
> We will have at least 3 
> cases of different indexes ({ns=1, s=2}, {ns=1, s=3}, {ns=0, s=1}). Although 
> by padding the last case 
> we could merge it with the first one so we only have 2 ways of accessing a 
> banked register, which was
> also the case in my patches, for which I intro

Re: [Qemu-devel] [PATCH v2 0/2] dataplane: Enable config-wce

2014-05-22 Thread Stefan Hajnoczi

On Thu, May 15, 2014 at 07:22:04PM +0800, Fam Zheng wrote:
> This applies on top of Stefan's dataplane series.
> 
> v2:
> 
> Patch 1 moves the declaration of bdrv_get_aio_context to block.h.
> Patch 2 is unchanged except for the dropped #include.
> 
> 
> Fam Zheng (2):
>   block: Move declaration of bdrv_get_aio_context to block.h
>   virtio-blk: Allow config-wce in dataplane
> 
>  hw/block/dataplane/virtio-blk.c | 6 --
>  hw/block/virtio-blk.c   | 8 +++-
>  include/block/block.h   | 7 +++
>  include/block/block_int.h   | 7 ---
>  4 files changed, 14 insertions(+), 14 deletions(-)
> 
> -- 
> 1.9.2
> 

Reviewed-by: Stefan Hajnoczi

Re: [Qemu-devel] [PATCH v1 RFC 6/6] KVM: s390: add cpu model support

2014-05-22 Thread Michael Mueller

On Thu, 22 May 2014 10:53:38 +0200
Paolo Bonzini  wrote:

> Il 22/05/2014 10:23, Michael Mueller ha scritto:
> > On Wed, 21 May 2014 15:22:35 +0200
> > Alexander Graf  wrote:
> >
> > I have seen the slides from Eduardo which he presented during this years
> > DevConf in Brno and made my comments according the s390x implementation
> > on that. Is you will see, this is mostly overlapping except for the model
> > definition authority that I clearly see on qemu's side.
> >
> > See pdf attachment.
> 
> More comments:
> 
> - "Only one machine type in s390 case which is -machine s390-virtio-ccw"
> 
> This probably should change sooner or later, as soon as the 
> implementation becomes stable enough.  Versioning is necessary for live 
> migration across different QEMU version.  Perhaps start versioning in 
> 2.2, i.e. start making s390-virtio-ccw-2.1 an alias for s390-virtio-ccw now?

Absolutely right, we did not do it yet.

> 
> Note that new virtio device features can appear at any time outside the 
> s390 code, and will take part in versioning as well.

These changes are then as well included in the machine version I guess. 

> 
> - "No enforce option"
> 
> Strongly suggest making enforce the only possible behavior.

Right that's the plan.

> 
> - "Not in the s390x case, because the KVM facility mask limits the cpu 
> model specific facilities"
> 
> What if the KVM facility mask changes?  For x86, nowadays new CPUID bits 
> are only introduced in KVM when a new processors comes out.  But if we 
> introduced an older CPUID bit, it would be a huge complication for 
> backwards compatibility.  Is it different for s390?
> 

I'm thinking about this one currently. The ABI itself will not change
because all facilities of the architecture will be defined in QEMU for the
respective CPU model even though some of them are masked out upon request
by KVM, but the mask is part of the interface itself and can be applied by
QEMU already.  

Michael 

> Paolo
>

Re: [Qemu-devel] [PATCH] xen: make xen-platform a default device

2014-05-22 Thread Michael S. Tsirkin

On Thu, May 22, 2014 at 01:11:28PM +0100, Stefano Stabellini wrote:
> On Thu, 22 May 2014, Gerd Hoffmann wrote:
> > Patch hooks up the xen platform device to the default device code we
> > have in qemu.  Two effects:
> > 
> >   (1) The device will not be created in case -nodefaults is specified
> >   on the command line.
> >   (2) Autocreating the device is also turned off in case xen-platform
> >   is added manually via -device.
> > 
> > With the patch applied you can move the xen-platform device to some
> > other place with a simple 'qemu -device xen-platform,addr=$slot'.
> > 
> > Tested-by: Tiejun Chen 
> > Signed-off-by: Gerd Hoffmann 
> 
> Given that libxl always passes -nodefaults to QEMU, this patch is going
> to effectively disable xen_platform_pci for all Xen users. It is not a
> good idea. With the patch applied a Xen user would have no way to enable
> xen_platform_pci except for passing some magic command line runes via
> device_model_args_hvm.
> 

Yes, it's an unfortunate use of the interface.
How about a new machine type for xenfv - that's the only one
that's affected, right?
It's time xen started versioning qemu hardware anyway.

> >  hw/i386/pc_piix.c| 2 +-
> >  include/hw/xen/xen.h | 1 +
> >  vl.c | 3 +++
> >  3 files changed, 5 insertions(+), 1 deletion(-)
> > 
> > diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
> > index eaf3e61..f987d03 100644
> > --- a/hw/i386/pc_piix.c
> > +++ b/hw/i386/pc_piix.c
> > @@ -385,7 +385,7 @@ static void pc_xen_hvm_init(QEMUMachineInitArgs *args)
> >  pc_init_pci(args);
> >  
> >  bus = pci_find_primary_bus();
> > -if (bus != NULL) {
> > +if (bus != NULL && default_xenplatform) {
> >  pci_create_simple(bus, -1, "xen-platform");
> >  }
> >  }
> > diff --git a/include/hw/xen/xen.h b/include/hw/xen/xen.h
> > index 85fda3d..b350413 100644
> > --- a/include/hw/xen/xen.h
> > +++ b/include/hw/xen/xen.h
> > @@ -20,6 +20,7 @@ enum xen_mode {
> >  
> >  extern uint32_t xen_domid;
> >  extern enum xen_mode xen_mode;
> > +extern int default_xenplatform;
> >  
> >  extern bool xen_allowed;
> >  
> > diff --git a/vl.c b/vl.c
> > index 709d8cd..673148e 100644
> > --- a/vl.c
> > +++ b/vl.c
> > @@ -226,6 +226,7 @@ static int default_floppy = 1;
> >  static int default_cdrom = 1;
> >  static int default_sdcard = 1;
> >  static int default_vga = 1;
> > +int default_xenplatform = 1;
> >  
> >  static struct {
> >  const char *driver;
> > @@ -247,6 +248,7 @@ static struct {
> >  { .driver = "isa-cirrus-vga",   .flag = &default_vga   },
> >  { .driver = "vmware-svga",  .flag = &default_vga   },
> >  { .driver = "qxl-vga",  .flag = &default_vga   },
> > +{ .driver = "xen-platform", .flag = &default_xenplatform },
> >  };
> >  
> >  static QemuOptsList qemu_rtc_opts = {
> > @@ -4101,6 +4103,7 @@ int main(int argc, char **argv, char **envp)
> >  default_monitor = 0;
> >  default_net = 0;
> >  default_vga = 0;
> > +default_xenplatform = 0;
> >  }
> >  
> >  if (is_daemonized()) {
> > -- 
> > 1.8.3.1
> > 
> >

[Qemu-devel] [PATCH v5 0/3] qapi: fix coding style in generated code

2014-05-22 Thread Amos Kong

Not a serious issue, but it's helpful if we can fix it.

V2: split change of scripts/qapi-visit.py to a split patch,
eat space by using a special char as Markus suggested
V3: update commitlog, update special string, fix of adding
const replace string by pattern
V4: fix pattern to cleanup special string (Paolo)
V5: fix string checking bug (Luiz), update commitlog (Eric)
add comments for c_type() function

Amos Kong (3):
  qapi: fix coding style in parameters list
  qapi: add const prefix to 'char *' insider c_type()
  qapi: Suppress unwanted space between type and identifier

 scripts/qapi-commands.py |  6 ++
 scripts/qapi-visit.py| 20 ++--
 scripts/qapi.py  | 21 +++--
 3 files changed, 27 insertions(+), 20 deletions(-)

-- 
1.9.0

[Qemu-devel] [PATCH v5 1/3] qapi: fix coding style in parameters list

2014-05-22 Thread Amos Kong

A space after * when declaring a pointer type is redundant.

Signed-off-by: Amos Kong 
Reviewed-by: Eric Blake 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Markus Armbruster 
---
 scripts/qapi-visit.py | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/scripts/qapi-visit.py b/scripts/qapi-visit.py
index 06a79f1..c129697 100644
--- a/scripts/qapi-visit.py
+++ b/scripts/qapi-visit.py
@@ -77,7 +77,7 @@ static void visit_type_%(full_name)s_field_%(c_name)s(Visitor 
*m, %(name)s **obj
 
 ret += mcgen('''
 
-static void visit_type_%(full_name)s_fields(Visitor *m, %(name)s ** obj, Error 
**errp)
+static void visit_type_%(full_name)s_fields(Visitor *m, %(name)s **obj, Error 
**errp)
 {
 Error *err = NULL;
 ''',
@@ -186,7 +186,7 @@ def generate_visit_struct(expr):
 
 ret += mcgen('''
 
-void visit_type_%(name)s(Visitor *m, %(name)s ** obj, const char *name, Error 
**errp)
+void visit_type_%(name)s(Visitor *m, %(name)s **obj, const char *name, Error 
**errp)
 {
 ''',
 name=name)
@@ -201,7 +201,7 @@ void visit_type_%(name)s(Visitor *m, %(name)s ** obj, const 
char *name, Error **
 def generate_visit_list(name, members):
 return mcgen('''
 
-void visit_type_%(name)sList(Visitor *m, %(name)sList ** obj, const char 
*name, Error **errp)
+void visit_type_%(name)sList(Visitor *m, %(name)sList **obj, const char *name, 
Error **errp)
 {
 Error *err = NULL;
 GenericList *i, **prev;
@@ -230,7 +230,7 @@ out:
 def generate_visit_enum(name, members):
 return mcgen('''
 
-void visit_type_%(name)s(Visitor *m, %(name)s * obj, const char *name, Error 
**errp)
+void visit_type_%(name)s(Visitor *m, %(name)s *obj, const char *name, Error 
**errp)
 {
 visit_type_enum(m, (int *)obj, %(name)s_lookup, "%(name)s", name, errp);
 }
@@ -240,7 +240,7 @@ void visit_type_%(name)s(Visitor *m, %(name)s * obj, const 
char *name, Error **e
 def generate_visit_anon_union(name, members):
 ret = mcgen('''
 
-void visit_type_%(name)s(Visitor *m, %(name)s ** obj, const char *name, Error 
**errp)
+void visit_type_%(name)s(Visitor *m, %(name)s **obj, const char *name, Error 
**errp)
 {
 Error *err = NULL;
 
@@ -327,7 +327,7 @@ def generate_visit_union(expr):
 
 ret += mcgen('''
 
-void visit_type_%(name)s(Visitor *m, %(name)s ** obj, const char *name, Error 
**errp)
+void visit_type_%(name)s(Visitor *m, %(name)s **obj, const char *name, Error 
**errp)
 {
 Error *err = NULL;
 
@@ -399,13 +399,13 @@ def generate_declaration(name, members, genlist=True, 
builtin_type=False):
 if not builtin_type:
 ret += mcgen('''
 
-void visit_type_%(name)s(Visitor *m, %(name)s ** obj, const char *name, Error 
**errp);
+void visit_type_%(name)s(Visitor *m, %(name)s **obj, const char *name, Error 
**errp);
 ''',
 name=name)
 
 if genlist:
 ret += mcgen('''
-void visit_type_%(name)sList(Visitor *m, %(name)sList ** obj, const char 
*name, Error **errp);
+void visit_type_%(name)sList(Visitor *m, %(name)sList **obj, const char *name, 
Error **errp);
 ''',
  name=name)
 
@@ -415,7 +415,7 @@ def generate_enum_declaration(name, members, genlist=True):
 ret = ""
 if genlist:
 ret += mcgen('''
-void visit_type_%(name)sList(Visitor *m, %(name)sList ** obj, const char 
*name, Error **errp);
+void visit_type_%(name)sList(Visitor *m, %(name)sList **obj, const char *name, 
Error **errp);
 ''',
  name=name)
 
@@ -424,7 +424,7 @@ void visit_type_%(name)sList(Visitor *m, %(name)sList ** 
obj, const char *name,
 def generate_decl_enum(name, members, genlist=True):
 return mcgen('''
 
-void visit_type_%(name)s(Visitor *m, %(name)s * obj, const char *name, Error 
**errp);
+void visit_type_%(name)s(Visitor *m, %(name)s *obj, const char *name, Error 
**errp);
 ''',
 name=name)
 
-- 
1.9.0

1 2 3 >

1 - 100 of 281 matches

Mail list logo