[Qemu-devel] [PATCH 0/2 v3] Release usb devices on shutdown and usb_del command

2010-06-12 Thread Shahar Havivi
v3:
separate usb hot-unplug and host terminate handling
remove empty methods from bsd and stub
added usb-linux atexit method to reset usb devices on termination

Shahar Havivi (2):
  Return usb device to host on usb_del command
  Return usb device to host on exit

 usb-linux.c |   13 +
 1 files changed, 13 insertions(+), 0 deletions(-)





[Qemu-devel] [PATCH 1/2] Return usb device to host on usb_del command

2010-06-12 Thread Shahar Havivi

Signed-off-by: Shahar Havivi 
---
 usb-linux.c |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/usb-linux.c b/usb-linux.c
index 88273ff..22a85e3 100644
--- a/usb-linux.c
+++ b/usb-linux.c
@@ -991,6 +991,7 @@ static int usb_host_close(USBHostDevice *dev)
 async_complete(dev);
 dev->closing = 0;
 usb_device_detach(&dev->dev);
+ioctl(dev->fd, USBDEVFS_RESET);
 close(dev->fd);
 dev->fd = -1;
 return 0;
-- 
1.7.0.4




[Qemu-devel] [PATCH 2/2] Return usb device to host on exit

2010-06-12 Thread Shahar Havivi

Signed-off-by: Shahar Havivi 
---
 usb-linux.c |   12 
 1 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/usb-linux.c b/usb-linux.c
index 22a85e3..2a595f0 100644
--- a/usb-linux.c
+++ b/usb-linux.c
@@ -286,6 +286,17 @@ static void async_cancel(USBPacket *unused, void *opaque)
 }
 }
 
+static void usb_host_cleanup(void)
+{
+struct USBHostDevice *s;
+
+QTAILQ_FOREACH(s, &hostdevs, next) {
+if (s->fd != -1) {
+ioctl(s->fd, USBDEVFS_RESET);
+}
+}
+}
+
 static int usb_host_claim_interfaces(USBHostDevice *dev, int configuration)
 {
 int dev_descr_len, config_descr_len;
@@ -1066,6 +1077,7 @@ USBDevice *usb_host_device_open(const char *devname)
 qdev_prop_set_uint32(&dev->qdev, "vendorid",  filter.vendor_id);
 qdev_prop_set_uint32(&dev->qdev, "productid", filter.product_id);
 qdev_init_nofail(&dev->qdev);
+atexit(usb_host_cleanup);
 return dev;
 
 fail:
-- 
1.7.0.4




[Qemu-devel] [PATCH v3 16/16] hpet: Add MSI support

2010-06-12 Thread Jan Kiszka
Paul Brook wrote:
>> +} else if (timer_fsb_route(timer)) {
>> +apic_send_msi(timer->fsb >> 32, timer->fsb & 0x);
> 
> This should use a regular memory write, like the PCI MSI-X code does.
> 

Indeed, here we go:

-->

From: Jan Kiszka 

This implements the HPET capability of routing IRQs to the front-side
bus, aka MSI support. This feature can be enabled via the qdev property
"msi" and is off by default.

Note that switching it on can cause guests (at least Linux) to use the
HPET as timer instead of the LAPIC. KVM users should recall that only
the latter is currently available as fast in-kernel model.

Signed-off-by: Jan Kiszka 
---
 hw/hpet.c  |   38 ++
 hw/hpet_emul.h |4 +++-
 2 files changed, 37 insertions(+), 5 deletions(-)

diff --git a/hw/hpet.c b/hw/hpet.c
index 7219967..117c598 100644
--- a/hw/hpet.c
+++ b/hw/hpet.c
@@ -39,6 +39,8 @@
 #define DPRINTF(...)
 #endif
 
+#define HPET_MSI_SUPPORT0
+
 struct HPETState;
 typedef struct HPETTimer {  /* timers */
 uint8_t tn; /*timer number*/
@@ -47,7 +49,7 @@ typedef struct HPETTimer {  /* timers */
 /* Memory-mapped, software visible timer registers */
 uint64_t config;/* configuration/cap */
 uint64_t cmp;   /* comparator */
-uint64_t fsb;   /* FSB route, not supported now */
+uint64_t fsb;   /* FSB route */
 /* Hidden register state */
 uint64_t period;/* Last value written to comparator */
 uint8_t wrap_flag;  /* timer pop will indicate wrap for one-shot 32-bit
@@ -59,6 +61,7 @@ typedef struct HPETState {
 SysBusDevice busdev;
 uint64_t hpet_offset;
 qemu_irq irqs[HPET_NUM_IRQ_ROUTES];
+uint32_t flags;
 uint8_t rtc_irq_level;
 uint8_t num_timers;
 HPETTimer timer[HPET_MAX_TIMERS];
@@ -80,6 +83,11 @@ static uint32_t timer_int_route(struct HPETTimer *timer)
 return (timer->config & HPET_TN_INT_ROUTE_MASK) >> HPET_TN_INT_ROUTE_SHIFT;
 }
 
+static uint32_t timer_fsb_route(HPETTimer *t)
+{
+return t->config & HPET_TN_FSB_ENABLE;
+}
+
 static uint32_t hpet_enabled(HPETState *s)
 {
 return s->config & HPET_CFG_ENABLE;
@@ -179,7 +187,11 @@ static void update_irq(struct HPETTimer *timer, int set)
 mask = 1 << timer->tn;
 if (!set || !timer_enabled(timer) || !hpet_enabled(timer->state)) {
 s->isr &= ~mask;
-qemu_irq_lower(s->irqs[route]);
+if (!timer_fsb_route(timer)) {
+qemu_irq_lower(s->irqs[route]);
+}
+} else if (timer_fsb_route(timer)) {
+stl_phys(timer->fsb >> 32, timer->fsb & 0x);
 } else if (timer->config & HPET_TN_TYPE_LEVEL) {
 s->isr |= mask;
 qemu_irq_raise(s->irqs[route]);
@@ -216,6 +228,12 @@ static int hpet_post_load(void *opaque, int version_id)
 /* Push number of timers into capability returned via HPET_ID */
 s->capability &= ~HPET_ID_NUM_TIM_MASK;
 s->capability |= (s->num_timers - 1) << HPET_ID_NUM_TIM_SHIFT;
+
+/* Derive HPET_MSI_SUPPORT from the capability of the first timer. */
+s->flags &= ~(1 << HPET_MSI_SUPPORT);
+if (s->timer[0].config & HPET_TN_FSB_CAP) {
+s->flags |= 1 << HPET_MSI_SUPPORT;
+}
 return 0;
 }
 
@@ -361,6 +379,8 @@ static uint32_t hpet_ram_readl(void *opaque, 
target_phys_addr_t addr)
 case HPET_TN_CMP + 4:
 return timer->cmp >> 32;
 case HPET_TN_ROUTE:
+return timer->fsb;
+case HPET_TN_ROUTE + 4:
 return timer->fsb >> 32;
 default:
 DPRINTF("qemu: invalid hpet_ram_readl\n");
@@ -444,6 +464,9 @@ static void hpet_ram_writel(void *opaque, 
target_phys_addr_t addr,
 switch ((addr - 0x100) % 0x20) {
 case HPET_TN_CFG:
 DPRINTF("qemu: hpet_ram_writel HPET_TN_CFG\n");
+if (activating_bit(old_val, new_val, HPET_TN_FSB_ENABLE)) {
+update_irq(timer, 0);
+}
 val = hpet_fixup_reg(new_val, old_val, HPET_TN_CFG_WRITE_MASK);
 timer->config = (timer->config & 0xULL) | val;
 if (new_val & HPET_TN_32BIT) {
@@ -501,8 +524,11 @@ static void hpet_ram_writel(void *opaque, 
target_phys_addr_t addr,
 hpet_set_timer(timer);
 }
 break;
+case HPET_TN_ROUTE:
+timer->fsb = (timer->fsb & 0xULL) | new_val;
+break;
 case HPET_TN_ROUTE + 4:
-DPRINTF("qemu: hpet_ram_writel HPET_TN_ROUTE + 4\n");
+timer->fsb = (new_val << 32) | (timer->fsb & 0x);
 break;
 default:
 DPRINTF("qemu: invalid hpet_ram_writel\n");
@@ -610,7 +636,10 @@ static void hpet_reset(DeviceState *d)
 
 hpet_del_timer(timer);
 timer->cmp = ~0ULL;
-timer->config =  HPET_TN_PERIODIC_CAP | HPET_TN_SIZE_CAP;
+timer->config = HPET_TN_PERIODIC_CAP | HPET_TN_SIZ

[Qemu-devel] Re: [PATCH v3 0/5] Add QMP migration events

2010-06-12 Thread Juan Quintela
Luiz Capitulino  wrote:
> On Thu, 10 Jun 2010 12:44:55 +0200
> Juan Quintela  wrote:
>
>> Luiz Capitulino  wrote:
>> > On Wed,  9 Jun 2010 14:10:53 +0200
>> > Juan Quintela  wrote:
>> >
>> >> This is a resent with what we agreed on yesterday call.
>> >> Migration events would be there for 0.13 until we get proper
>> >> async command support.
>> >
>> >  Something which is not clear to me is the set of events we'd have if 
>> > migrate
>> > was an async command.
>> >
>> >  Ie, do we really need MIGRATION_FAILED in this case? Don't we expect to 
>> > get
>> > this information from the async response?
>> >
>> 
>> I am not able to define simpler semantics for this events:
>
>  Ok, I must be missing something here.
>
>  First, let's talk about how async commands work today, better yet, how they
> should work in 0.14.
>
>  I see two possible interfaces (off the top of my head):
>
>  1. QMP only returns the response when the command is finished, eg:
>
> C: { "execute": "migrate", "id": "foo" ... }
> /* nothing is returned, other commands are issued, after several hours... 
> */
> S: { "return": ... "id": "foo" }
>
>  2. QMP returns a response right away just to signal that the command
> has been accepted:
>
> C: { "execute": "migrate", "id": "foo" ... }
> S: { "return": {}, "id": "foo" ... }
>
> However, the actual response is emitted as an event:
>
> S: { "event": "ASYNC_RESPONSE", "return": ..., "id": "foo" }
>
>
>  That's what I have in mind, that's why I'm confused about what we're
> trying to accomplish here.

You continue forgeting the case that you have more than one monitor
conected.  The other monitor will not receive _ethire_ of that
responses.  Will not know what is happening.


>> - MIGRATION_STARTED:  somebody started a migration, it is emited on
>>   source and target, all monitors receive this event.
>
>  The client has started the migration, it knows it. Why is the event needed?

The monitor that did it knows it, nobody else knows it.  At destination
time, I guess you agree this is important, i.e. the management app knows
that migration has started.

I have been needinng this for audit, knowing when guest
start/stop/migrates.  And just now the only way to get that information
is to "hack" qemu source code.  With migration_events it will be
"trivial" to know when that happens.

In libvirt case.  First thing that I would do if I receive a
MIGRATION_START command that I didn't started: I release control of that
VM, something fishy happened.  At this point, it is imposible to know
what happens.

>> - MIGRATION_ENDED: migration ended with sucess, all needed data is in
>>   target machine.  Also emitted in all monitors on source and target.
>>
>> - MIGRATION_CANCELED: in one of the source monitors somebody typed:
>>   migrate_cancel.  It is only emmited on the source monitors, target
>>   monitors will receive a MIGRATION_FAILED event.
>> 
>> - MIGRATION_FAILED (with this error).  At this point we don't have
>>   neither the QMP infraestructure for sending (with this error) nor
>>   migration infrastructure to put there anything different than -1.
>
>  Aren't all the three events above duplicating the async response?

Again, no.  Think that you have more than one monitor.
And indeed in the case of a single monitor.  We are delaying the
information to the target management app.

MIGRATION_ENDED on target machine: We can do whatever is needed when
migration has ended.  Async (or sync) answer to the source management
app, it needs to receive that information + send that information to
destination machine + receive information in destination machine + do
whatever is needed on destination vm.

Just because we refuse to give Information that we have, ready.
I am not asking for something that is difficult to do in qemu, it is
information that qemu knows (when migration has started/ended).  And we
are telling management apps that they need to guess when things happens
and use polling to know it.

>>   This event is emmited on all source and target monitors.
>>   - For 0.13: Event don't have a QError.
>>   - For 0.14: It will gain a QError.
>> 
>>   About migration becoming an async command.  Really it is independent
>>   of what events we emit.  If migration becomes async command, only
>>   difference is for the monitor that emitted the command, rest of
>>   monitors see nothing.  If we want to be able to see that informantion
>>   in the other monitors, we need the events anyways.
>
>  Somewhere else in this discussion someone has said that we should assume
> that the client talking to the source is the same one which is going to
> talk to the target, in this case all the communication is done through
> the source qemu instance.

That is another problem, that we don't have a monitor in the destination
target during migration.

Later, Juan.



[Qemu-devel] Re: [PATCH v3 0/5] Add QMP migration events

2010-06-12 Thread Juan Quintela
Anthony Liguori  wrote:
> On 06/11/2010 09:30 AM, Luiz Capitulino wrote:
>> On Thu, 10 Jun 2010 12:44:55 +0200
>> Juan Quintela  wrote:

>
> I think we've more or less agreed that MIGRATION_CONNECTED is really
> the event we want.

This had the problem of migrating to a file/whatever.

MIGRATION_CONECTED only makes sense when you have TCP or similar.
MIGRATION_STARTED it just means that migration has started,
independently of whatever method you have.  For TCP it is possible that
we want another event each time that somebody connected to a port (not
only for migration), but that is something different.

>>> - MIGRATION_ENDED: migration ended with sucess, all needed data is in
>>>target machine.  Also emitted in all monitors on source and target.
>>>
>>> - MIGRATION_CANCELED: in one of the source monitors somebody typed:
>>>migrate_cancel.  It is only emmited on the source monitors, target
>>>monitors will receive a MIGRATION_FAILED event.
>>>
>>> - MIGRATION_FAILED (with this error).  At this point we don't have
>>>neither the QMP infraestructure for sending (with this error) nor
>>>migration infrastructure to put there anything different than -1.
>>>  
>>   Aren't all the three events above duplicating the async response?
>>
>
> Yes.  Today, we should just generate a MIGRATION_DONE event and let a
> client poll for failure status.

I disagree completely.  It just defeat the reason for this.

MIGRATION_ENDED on destination machine: go ahead, everything is ok.
MIGRATION_FAILED: Uh, oh, something got wrong, we are in the slow path.

With MIGRATION_DONE + polling, we are delaying the "success" case just
to avoid having a new event.  I don't buy it.

>>>This event is emmited on all source and target monitors.
>>>- For 0.13: Event don't have a QError.
>>>- For 0.14: It will gain a QError.
>>>
>>>About migration becoming an async command.  Really it is independent
>>>of what events we emit.  If migration becomes async command, only
>>>difference is for the monitor that emitted the command, rest of
>>>monitors see nothing.  If we want to be able to see that informantion
>>>in the other monitors, we need the events anyways.
>>>  
>>   Somewhere else in this discussion someone has said that we should assume
>> that the client talking to the source is the same one which is going to
>> talk to the target, in this case all the communication is done through
>> the source qemu instance.
>>
>
> MIGRATION_DONE gets deprecated for 0.14.

I still think that we want the 4 events that I described.  My
understanding is that libvirt people also would love to have that 4
events.

Answer here is that: you can do this workaround and this other
workaround and you can get that information.

About semantics of messages, I don't see anytime soon that migration are
going to change from:

Start migration and then end with success or failure.

The only one that we could change/remove is MIGRATION_CANCEL to
MIGRATION_FAILURE(User canceled) it.  But that is it.

Why have to do a polling when none is needed?
If you preffer to change the MIGRATION_ENDED + MIGRATION_FAILURE(error)
to MIGRATION_ENDED(result code), and you have to check the error code, I
can also live with that.  But that is it.

Later, Juan.



[Qemu-devel] Re: [PATCH v3 0/5] Add QMP migration events

2010-06-12 Thread Juan Quintela
Luiz Capitulino  wrote:
> On Fri, 11 Jun 2010 09:38:42 -0500
> Anthony Liguori  wrote:
>
>> >   1. QMP only returns the response when the command is finished, eg:
>> >
>> >  C: { "execute": "migrate", "id": "foo" ... }
>> >  /* nothing is returned, other commands are issued, after several 
>> > hours... */
>> >  S: { "return": ... "id": "foo" }
>> >
>> 
>> This is how just about every RPC mechanism works...
>
>  Let's go for it then.
>
>> >> - MIGRATION_STARTED:  somebody started a migration, it is emited on
>> >>source and target, all monitors receive this event.
>> >>  
>> >   The client has started the migration, it knows it. Why is the event 
>> > needed?
>> >
>> 
>> I think we've more or less agreed that MIGRATION_CONNECTED is really the 
>> event we want.
>
>  Is it useful in the source or in the target?

Both.

>> >> - MIGRATION_ENDED: migration ended with sucess, all needed data is in
>> >>target machine.  Also emitted in all monitors on source and target.
>> >>
>> >> - MIGRATION_CANCELED: in one of the source monitors somebody typed:
>> >>migrate_cancel.  It is only emmited on the source monitors, target
>> >>monitors will receive a MIGRATION_FAILED event.
>> >>
>> >> - MIGRATION_FAILED (with this error).  At this point we don't have
>> >>neither the QMP infraestructure for sending (with this error) nor
>> >>migration infrastructure to put there anything different than -1.
>> >>  
>> >   Aren't all the three events above duplicating the async response?
>> >
>> 
>> Yes.  Today, we should just generate a MIGRATION_DONE event and let a 
>> client poll for failure status.
>
> [...]
>
>> MIGRATION_DONE gets deprecated for 0.14.
>
>  Yeah, this removes the need for polling in 0.13, but I was wondering if
> it's worth it. If I'm not mistaken, libvirt does the polling when working
> with the text Monitor and I believe it's not a big deal to keep it until 0.14.

It makes things slower for no good reason.

This reasoning is sneaky at least: 
- qemu didn't give interfaces to libvirt for do what libvirt wanted
- libvirt uses workarounds
- qemu tells libvirt that they are using workarounds that they shouldn't
- libvirt tells qemu why they need the new interface
- qemu tells libvirt that they could continue to use its workarounds.

I am losing something?  The whole point of live migration is that they
need to be as fast as possible.  For some scenaries (shared storage with
funny locking) libvirt needs to move from shared locks to normal locks
as far as migration ends on target.  We are telling them to do
workarounds becauese qemu don't want to tell libvirt on destination when
migration has ended.

Why can't we just tell them that migration has ended with success as
fast as possible?

I can't understand what I am missing here.  I can't believe that
libvirt(management app in general) could came with a simple request that
would make its live better.  And to make things worse, it is _trivial_
to implement, semantics are clear, has other uses, .

Later, Juan.




Re: [Qemu-devel] [PATCH] configure: Fix evaluation of config-host.mak in create_config

2010-06-12 Thread Aurelien Jarno
On Fri, Jun 11, 2010 at 10:58:29PM +0200, Jan Kiszka wrote:
> From: Jan Kiszka 
> 
> Only match on true dir variable assignments, avoid generating garbage
> due to the "# Configured with: ..." line which may contain "*dir=" as
> well.

Wouldn't it be better to skip all lines starting with '#', as this
problem might happen again after some more changes?

Or maybe we should do both.

> Signed-off-by: Jan Kiszka 
> ---
>  create_config |2 +-
>  1 files changed, 1 insertions(+), 1 deletions(-)
> 
> diff --git a/create_config b/create_config
> index 23c0cd5..0098e68 100755
> --- a/create_config
> +++ b/create_config
> @@ -13,7 +13,7 @@ case $line in
>  pkgversion=${line#*=}
>  echo "#define QEMU_PKGVERSION \"$pkgversion\""
>  ;;
> - prefix=* | *dir=*) # directory configuration
> + prefix=* | [a-z]*dir=*) # directory configuration
>  name=${line%=*}
>  value=${line#*=}
>  define_name=`echo $name | tr '[:lower:]' '[:upper:]'`
> 
> 

-- 
Aurelien Jarno  GPG: 1024D/F1BCDB73
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH 09/16] Enable message delivery via IRQs

2010-06-12 Thread Paul Brook
> This patch allows to optionally attach a message to an IRQ event. The
> message can contain a payload reference and a callback that the IRQ
> handler may invoke to report the delivery result. The former can be used
> to model message signaling interrupts, the latter to cleanly implement
> IRQ de-coalescing logics.

I don't like this. qemu_irq is a level triggered interface. Redundant calls to 
qemu_set_irq should (in principle) be a no-op.  If you want message passing 
then IMO you should be using something else.

Paul



Re: [Qemu-devel] [PATCH] configure: Fix evaluation of config-host.mak in create_config

2010-06-12 Thread Jan Kiszka
Aurelien Jarno wrote:
> On Fri, Jun 11, 2010 at 10:58:29PM +0200, Jan Kiszka wrote:
>> From: Jan Kiszka 
>>
>> Only match on true dir variable assignments, avoid generating garbage
>> due to the "# Configured with: ..." line which may contain "*dir=" as
>> well.
> 
> Wouldn't it be better to skip all lines starting with '#', as this
> problem might happen again after some more changes?
> 
> Or maybe we should do both.

This is what the patch (implicitly) does.

Jan

> 
>> Signed-off-by: Jan Kiszka 
>> ---
>>  create_config |2 +-
>>  1 files changed, 1 insertions(+), 1 deletions(-)
>>
>> diff --git a/create_config b/create_config
>> index 23c0cd5..0098e68 100755
>> --- a/create_config
>> +++ b/create_config
>> @@ -13,7 +13,7 @@ case $line in
>>  pkgversion=${line#*=}
>>  echo "#define QEMU_PKGVERSION \"$pkgversion\""
>>  ;;
>> - prefix=* | *dir=*) # directory configuration
>> + prefix=* | [a-z]*dir=*) # directory configuration
>>  name=${line%=*}
>>  value=${line#*=}
>>  define_name=`echo $name | tr '[:lower:]' '[:upper:]'`
>>
>>
> 




signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [PATCH 15/35] tcg-s390: Query instruction extensions that are installed.

2010-06-12 Thread Aurelien Jarno
On Fri, Jun 11, 2010 at 06:07:52AM -0700, Richard Henderson wrote:
> On 06/11/2010 01:06 AM, Aurelien Jarno wrote:
> > What's the difference between FACILITY_ZARCH and FACILITY_ZARCH_ACTIVE,
> > as both are actually flagged together. My guess is that
> > FACILITY_ZARCH_ACTIVE is needed in 64-bit mode, why FACILITY_ZARCH is
> > only needed for a possible future 32-bit mode. Is it correct?
> 
> Loosely,
> 
> ZARCH is set when the system is 64-bit capable, whether or not it is active.
> The OS would check this bit at startup if it wanted to change modes.  This
> bit isn't really interesting to us in userspace.
> 
> ZARCH_ACTIVE is set when the system is in 64-bit mode, i.e. you've booted
> with a 64-bit kernel.  Note that this says nothing about the address 
> decoding mode -- this bit can be set while the PSW is set for 31-bit
> address translation, e.g. running a 32-bit program on a 64-bit kernel.
> 

So in short we never use ZARCH in QEMU, so we probably don't want to
have this #define, nor add it at the same time as FACILITY_ZARCH_ACTIVE.


-- 
Aurelien Jarno  GPG: 1024D/F1BCDB73
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH 09/16] Enable message delivery via IRQs

2010-06-12 Thread Jan Kiszka
Paul Brook wrote:
>> This patch allows to optionally attach a message to an IRQ event. The
>> message can contain a payload reference and a callback that the IRQ
>> handler may invoke to report the delivery result. The former can be used
>> to model message signaling interrupts, the latter to cleanly implement
>> IRQ de-coalescing logics.
> 
> I don't like this. qemu_irq is a level triggered interface. Redundant calls 
> to 
> qemu_set_irq should (in principle) be a no-op.  If you want message passing 
> then IMO you should be using something else.

I think we all now agree that we need some extension to the qemu_irq
interface to realize clean irq de-coalescing.

Returning a delivery code was proposed several times but always
rejected. Blueswirl brought up that message passing can model this as
well and that it has additional use for sparc. So I followed this
suggestion, specifically due to the added value and the absence of
concerns by other parties. Now, after spending yet another "few" hours
on this series, you raise a new concern. Wouldn't it be possible to do
this during the design discussion?

OK, what is your proposal now for an IRQ de-coalescing framework?

Jan



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [PATCH 09/16] Enable message delivery via IRQs

2010-06-12 Thread Blue Swirl
On Sat, Jun 12, 2010 at 12:21 PM, Paul Brook  wrote:
>> This patch allows to optionally attach a message to an IRQ event. The
>> message can contain a payload reference and a callback that the IRQ
>> handler may invoke to report the delivery result. The former can be used
>> to model message signaling interrupts, the latter to cleanly implement
>> IRQ de-coalescing logics.
>
> I don't like this. qemu_irq is a level triggered interface. Redundant calls to
> qemu_set_irq should (in principle) be a no-op.  If you want message passing
> then IMO you should be using something else.

Keeping the optional message and qemu_irq together means that we can
reuse the existing IRQ subsystem. I'd guess something more separated
would need duplicate allocation and delivery support and maybe even
SysBus etc. would need lots of work to support a new class of IRQs.

Also detection of coalesced interrupts depends on reliably detecting
sort of redundant calls to qemu_set_irq. There we don't model real
level triggered hardware exactly but add some extra magic.



[Qemu-devel] [PATCH] win32: Add missing function ffs

2010-06-12 Thread Stefan Weil
mingw32 does not include function ffs.

Commit c6d29ad6e24533cc3762e1d654275607e1d03058 added a
declaration for ffs, but an implementation was missing.

For compilations with optimization, the compiler creates
inline code, so the implementation is not always needed.

Without optimization, linking fails without this patch.

v2: Use __builtin_ffs as suggested by Richard Henderson

Cc: Richard Henderson 
Signed-off-by: Stefan Weil 
---
 osdep.c |7 +++
 1 files changed, 7 insertions(+), 0 deletions(-)

diff --git a/osdep.c b/osdep.c
index abbc8a2..dbf872a 100644
--- a/osdep.c
+++ b/osdep.c
@@ -167,6 +167,13 @@ int qemu_create_pidfile(const char *filename)
 
 #ifdef _WIN32
 
+/* mingw32 needs ffs for compilations without optimization. */
+int ffs(int i)
+{
+/* Use gcc's builtin ffs. */
+return __builtin_ffs(i);
+}
+
 /* Offset between 1/1/1601 and 1/1/1970 in 100 nanosec units */
 #define _W32_FT_OFFSET (1164447360ULL)
 
-- 
1.7.1




Re: [Qemu-devel] [PATCH 20/35] tcg-s390: Use LOAD COMPLIMENT for negate.

2010-06-12 Thread Aurelien Jarno
On Fri, Jun 04, 2010 at 12:14:28PM -0700, Richard Henderson wrote:
> Signed-off-by: Richard Henderson 
> ---
>  tcg/s390/tcg-target.c |   10 ++
>  1 files changed, 2 insertions(+), 8 deletions(-)

This patch looks fine.

> diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
> index f53038b..826a2c8 100644
> --- a/tcg/s390/tcg-target.c
> +++ b/tcg/s390/tcg-target.c
> @@ -1134,16 +1134,10 @@ static inline void tcg_out_op(TCGContext *s, 
> TCGOpcode opc,
>  break;
>  
>  case INDEX_op_neg_i32:
> -/* FIXME: optimize args[0] != args[1] case */
> -tcg_out_insn(s, RR, LR, 13, args[1]);
> -tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
> -tcg_out_insn(s, RR, SR, args[0], 13);
> +tcg_out_insn(s, RR, LCR, args[0], args[1]);
>  break;
>  case INDEX_op_neg_i64:
> -/* FIXME: optimize args[0] != args[1] case */
> -tcg_out_mov(s, TCG_TMP0, args[1]);
> -tcg_out_movi(s, TCG_TYPE_I64, args[0], 0);
> -tcg_out_insn(s, RRE, SGR, args[0], TCG_TMP0);
> +tcg_out_insn(s, RRE, LCGR, args[0], args[1]);
>  break;
>  
>  case INDEX_op_mul_i32:
> -- 
> 1.7.0.1
> 
> 
> 

-- 
Aurelien Jarno  GPG: 1024D/F1BCDB73
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH 18/35] tcg-s390: Implement bswap operations.

2010-06-12 Thread Aurelien Jarno
On Fri, Jun 04, 2010 at 12:14:26PM -0700, Richard Henderson wrote:
> Signed-off-by: Richard Henderson 
> ---
>  tcg/s390/tcg-target.c |   24 
>  tcg/s390/tcg-target.h |   10 +-
>  2 files changed, 29 insertions(+), 5 deletions(-)

This patch looks fine.

> diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
> index 42e3224..3a98ca3 100644
> --- a/tcg/s390/tcg-target.c
> +++ b/tcg/s390/tcg-target.c
> @@ -86,6 +86,8 @@ typedef enum S390Opcode {
>  RRE_LLGCR   = 0xb984,
>  RRE_LLGFR   = 0xb916,
>  RRE_LLGHR   = 0xb985,
> +RRE_LRVR= 0xb91f,
> +RRE_LRVGR   = 0xb90f,
>  RRE_MSGR= 0xb90c,
>  RRE_MSR = 0xb252,
>  RRE_NGR = 0xb980,
> @@ -1231,6 +1233,21 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode 
> opc,
>  tgen_ext32u(s, args[0], args[1]);
>  break;
>  
> +case INDEX_op_bswap16_i32:
> +case INDEX_op_bswap16_i64:
> +/* The TCG bswap definition requires bits 0-47 already be zero.
> +   Thus we don't need the G-type insns to implement bswap16_i64.  */
> +tcg_out_insn(s, RRE, LRVR, args[0], args[1]);
> +tcg_out_sh32(s, RS_SRL, args[0], TCG_REG_NONE, 16);
> +break;
> +case INDEX_op_bswap32_i32:
> +case INDEX_op_bswap32_i64:
> +tcg_out_insn(s, RRE, LRVR, args[0], args[1]);
> +break;
> +case INDEX_op_bswap64_i64:
> +tcg_out_insn(s, RRE, LRVGR, args[0], args[1]);
> +break;
> +
>  case INDEX_op_br:
>  tgen_branch(s, S390_CC_ALWAYS, args[0]);
>  break;
> @@ -1353,6 +1370,9 @@ static const TCGTargetOpDef s390_op_defs[] = {
>  { INDEX_op_ext16s_i32, { "r", "r" } },
>  { INDEX_op_ext16u_i32, { "r", "r" } },
>  
> +{ INDEX_op_bswap16_i32, { "r", "r" } },
> +{ INDEX_op_bswap32_i32, { "r", "r" } },
> +
>  { INDEX_op_brcond_i32, { "r", "r" } },
>  { INDEX_op_setcond_i32, { "r", "r", "r" } },
>  
> @@ -1410,6 +1430,10 @@ static const TCGTargetOpDef s390_op_defs[] = {
>  { INDEX_op_ext32s_i64, { "r", "r" } },
>  { INDEX_op_ext32u_i64, { "r", "r" } },
>  
> +{ INDEX_op_bswap16_i64, { "r", "r" } },
> +{ INDEX_op_bswap32_i64, { "r", "r" } },
> +{ INDEX_op_bswap64_i64, { "r", "r" } },
> +
>  { INDEX_op_brcond_i64, { "r", "r" } },
>  { INDEX_op_setcond_i64, { "r", "r", "r" } },
>  #endif
> diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
> index 570c832..dcb9bc3 100644
> --- a/tcg/s390/tcg-target.h
> +++ b/tcg/s390/tcg-target.h
> @@ -54,8 +54,8 @@ typedef enum TCGReg {
>  #define TCG_TARGET_HAS_ext16s_i32
>  #define TCG_TARGET_HAS_ext8u_i32
>  #define TCG_TARGET_HAS_ext16u_i32
> -// #define TCG_TARGET_HAS_bswap16_i32
> -// #define TCG_TARGET_HAS_bswap32_i32
> +#define TCG_TARGET_HAS_bswap16_i32
> +#define TCG_TARGET_HAS_bswap32_i32
>  // #define TCG_TARGET_HAS_not_i32
>  #define TCG_TARGET_HAS_neg_i32
>  // #define TCG_TARGET_HAS_andc_i32
> @@ -72,9 +72,9 @@ typedef enum TCGReg {
>  #define TCG_TARGET_HAS_ext8u_i64
>  #define TCG_TARGET_HAS_ext16u_i64
>  #define TCG_TARGET_HAS_ext32u_i64
> -// #define TCG_TARGET_HAS_bswap16_i64
> -// #define TCG_TARGET_HAS_bswap32_i64
> -// #define TCG_TARGET_HAS_bswap64_i64
> +#define TCG_TARGET_HAS_bswap16_i64
> +#define TCG_TARGET_HAS_bswap32_i64
> +#define TCG_TARGET_HAS_bswap64_i64
>  // #define TCG_TARGET_HAS_not_i64
>  #define TCG_TARGET_HAS_neg_i64
>  // #define TCG_TARGET_HAS_andc_i64
> -- 
> 1.7.0.1
> 
> 
> 

-- 
Aurelien Jarno  GPG: 1024D/F1BCDB73
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH 09/16] Enable message delivery via IRQs

2010-06-12 Thread Paul Brook
> On Sat, Jun 12, 2010 at 12:21 PM, Paul Brook  wrote:
> >> This patch allows to optionally attach a message to an IRQ event. The
> >> message can contain a payload reference and a callback that the IRQ
> >> handler may invoke to report the delivery result. The former can be used
> >> to model message signaling interrupts, the latter to cleanly implement
> >> IRQ de-coalescing logics.
> > 
> > I don't like this. qemu_irq is a level triggered interface. Redundant
> > calls to qemu_set_irq should (in principle) be a no-op.  If you want
> > message passing then IMO you should be using something else.
> 
> Keeping the optional message and qemu_irq together means that we can
> reuse the existing IRQ subsystem. I'd guess something more separated
> would need duplicate allocation and delivery support and maybe even
> SysBus etc. would need lots of work to support a new class of IRQs.

How do you propose message passing is handled when you have nested multi-layer 
interrupt trees? How long is the message data valid for? Who owns it? How is a 
receiver meant to know for format of the message being delivered, and who it's 
intended for?

IMO message triggered systems are fundamentally different to level states. 
qemu_irq represents a level state, and I'd really like for it to stay that 
way.  

If we need/want a generic message passing interface, then that's a different 
problem, and needs to be done in such a way that the devices always agree on 
the type of message being passed.

TBH I preferred the original system whereby the source can query the state of 
the sink (i.e "are you ignoring this line?").  Note that conceptually this 
should be *querying* state, not responding to an event.

Paul



[Qemu-devel] [Bug 538908] Re: qemu-system-cris crashes after a few seconds

2010-06-12 Thread Ken Sharp
** Changed in: qemu
   Status: New => Fix Committed

-- 
qemu-system-cris crashes after a few seconds
https://bugs.launchpad.net/bugs/538908
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.

Status in QEMU: Fix Committed
Status in “qemu” package in Ubuntu: New

Bug description:
qemu-system-cris crashes after a few seconds.

Running the binary without any options loads the qemu window and it sits there 
waiting for me to do something.  About a minute later it crashes.  Dump 
attached.

Running Linux 2.6.27-17-generic on Ubuntu 8.10.
Athlon XP 3000+ 2GB RAM





Re: [Qemu-devel] [PATCH 19/35] tcg-s390: Implement rotates.

2010-06-12 Thread Aurelien Jarno
On Fri, Jun 04, 2010 at 12:14:27PM -0700, Richard Henderson wrote:
> Signed-off-by: Richard Henderson 
> ---
>  tcg/s390/tcg-target.c |   46 ++
>  tcg/s390/tcg-target.h |4 ++--
>  2 files changed, 48 insertions(+), 2 deletions(-)

This patch looks fine.

> diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
> index 3a98ca3..f53038b 100644
> --- a/tcg/s390/tcg-target.c
> +++ b/tcg/s390/tcg-target.c
> @@ -108,6 +108,8 @@ typedef enum S390Opcode {
>  RR_SR   = 0x1b,
>  RR_XR   = 0x17,
>  
> +RSY_RLL = 0xeb1d,
> +RSY_RLLG= 0xeb1c,
>  RSY_SLLG= 0xeb0d,
>  RSY_SRAG= 0xeb0a,
>  RSY_SRLG= 0xeb0c,
> @@ -1201,6 +1203,44 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode 
> opc,
>  op = RSY_SRAG;
>  goto do_shift64;
>  
> +case INDEX_op_rotl_i32:
> +/* ??? Using tcg_out_sh64 here for the format; it is a 32-bit rol.  
> */
> +if (const_args[2]) {
> +tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_REG_NONE, 
> args[2]);
> +} else {
> +tcg_out_sh64(s, RSY_RLL, args[0], args[1], args[2], 0);
> +}
> +break;
> +case INDEX_op_rotr_i32:
> +if (const_args[2]) {
> +tcg_out_sh64(s, RSY_RLL, args[0], args[1],
> + TCG_REG_NONE, (32 - args[2]) & 31);
> +} else {
> +tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
> +tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_TMP0, 0);
> +}
> +break;
> +
> +case INDEX_op_rotl_i64:
> +if (const_args[2]) {
> +tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
> + TCG_REG_NONE, args[2]);
> +} else {
> +tcg_out_sh64(s, RSY_RLLG, args[0], args[1], args[2], 0);
> +}
> +break;
> +case INDEX_op_rotr_i64:
> +if (const_args[2]) {
> +tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
> + TCG_REG_NONE, (64 - args[2]) & 63);
> +} else {
> +/* We can use the smaller 32-bit negate because only the
> +   low 6 bits are examined for the rotate.  */
> +tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
> +tcg_out_sh64(s, RSY_RLLG, args[0], args[1], TCG_TMP0, 0);
> +}
> +break;
> +
>  case INDEX_op_ext8s_i32:
>  tgen_ext8s(s, TCG_TYPE_I32, args[0], args[1]);
>  break;
> @@ -1365,6 +1405,9 @@ static const TCGTargetOpDef s390_op_defs[] = {
>  { INDEX_op_shr_i32, { "r", "0", "Ri" } },
>  { INDEX_op_sar_i32, { "r", "0", "Ri" } },
>  
> +{ INDEX_op_rotl_i32, { "r", "r", "Ri" } },
> +{ INDEX_op_rotr_i32, { "r", "r", "Ri" } },
> +
>  { INDEX_op_ext8s_i32, { "r", "r" } },
>  { INDEX_op_ext8u_i32, { "r", "r" } },
>  { INDEX_op_ext16s_i32, { "r", "r" } },
> @@ -1423,6 +1466,9 @@ static const TCGTargetOpDef s390_op_defs[] = {
>  { INDEX_op_shr_i64, { "r", "r", "Ri" } },
>  { INDEX_op_sar_i64, { "r", "r", "Ri" } },
>  
> +{ INDEX_op_rotl_i64, { "r", "r", "Ri" } },
> +{ INDEX_op_rotr_i64, { "r", "r", "Ri" } },
> +
>  { INDEX_op_ext8s_i64, { "r", "r" } },
>  { INDEX_op_ext8u_i64, { "r", "r" } },
>  { INDEX_op_ext16s_i64, { "r", "r" } },
> diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
> index dcb9bc3..9135c7a 100644
> --- a/tcg/s390/tcg-target.h
> +++ b/tcg/s390/tcg-target.h
> @@ -49,7 +49,7 @@ typedef enum TCGReg {
>  
>  /* optional instructions */
>  #define TCG_TARGET_HAS_div2_i32
> -// #define TCG_TARGET_HAS_rot_i32
> +#define TCG_TARGET_HAS_rot_i32
>  #define TCG_TARGET_HAS_ext8s_i32
>  #define TCG_TARGET_HAS_ext16s_i32
>  #define TCG_TARGET_HAS_ext8u_i32
> @@ -65,7 +65,7 @@ typedef enum TCGReg {
>  // #define TCG_TARGET_HAS_nor_i32
>  
>  #define TCG_TARGET_HAS_div2_i64
> -// #define TCG_TARGET_HAS_rot_i64
> +#define TCG_TARGET_HAS_rot_i64
>  #define TCG_TARGET_HAS_ext8s_i64
>  #define TCG_TARGET_HAS_ext16s_i64
>  #define TCG_TARGET_HAS_ext32s_i64
> -- 
> 1.7.0.1
> 
> 
> 

-- 
Aurelien Jarno  GPG: 1024D/F1BCDB73
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH 09/16] Enable message delivery via IRQs

2010-06-12 Thread Blue Swirl
On Sat, Jun 12, 2010 at 2:15 PM, Paul Brook  wrote:
>> On Sat, Jun 12, 2010 at 12:21 PM, Paul Brook  wrote:
>> >> This patch allows to optionally attach a message to an IRQ event. The
>> >> message can contain a payload reference and a callback that the IRQ
>> >> handler may invoke to report the delivery result. The former can be used
>> >> to model message signaling interrupts, the latter to cleanly implement
>> >> IRQ de-coalescing logics.
>> >
>> > I don't like this. qemu_irq is a level triggered interface. Redundant
>> > calls to qemu_set_irq should (in principle) be a no-op.  If you want
>> > message passing then IMO you should be using something else.
>>
>> Keeping the optional message and qemu_irq together means that we can
>> reuse the existing IRQ subsystem. I'd guess something more separated
>> would need duplicate allocation and delivery support and maybe even
>> SysBus etc. would need lots of work to support a new class of IRQs.
>
> How do you propose message passing is handled when you have nested multi-layer
> interrupt trees?

Do we have such trees somewhere? I think message passing interrupts
are only used in bus based systems, like PCI or UPA/JBUS etc. I don't
know how LAPIC/IOAPIC bus works, it could be similar.

> How long is the message data valid for?

Currently it's valid for the duration of the call. It may make sense
to extend that later in order to handle EOIs.

> Who owns it?

The source.

> How is a
> receiver meant to know for format of the message being delivered, and who it's
> intended for?

This depends on the architecture. On Sparc64 the message is specified
by the guest or OF.

> IMO message triggered systems are fundamentally different to level states.
> qemu_irq represents a level state, and I'd really like for it to stay that
> way.

In real HW (MSI or Sparc64 mondo interrupts) a bus delivers the
message. But our buses only handle address and data lines.

> If we need/want a generic message passing interface, then that's a different
> problem, and needs to be done in such a way that the devices always agree on
> the type of message being passed.
>
> TBH I preferred the original system whereby the source can query the state of
> the sink (i.e "are you ignoring this line?").  Note that conceptually this
> should be *querying* state, not responding to an event.

It's simple, but I think too simple. It would work for the coalescing
interrupt hack but useless for other things.



Re: [Qemu-devel] [PATCH 17/35] tcg-s390: Implement sign and zero-extension operations.

2010-06-12 Thread Aurelien Jarno
On Fri, Jun 04, 2010 at 12:14:25PM -0700, Richard Henderson wrote:
> Signed-off-by: Richard Henderson 
> ---
>  tcg/s390/tcg-target.c |  164 
> -
>  tcg/s390/tcg-target.h |   20 +++---
>  2 files changed, 158 insertions(+), 26 deletions(-)

This patch looks fine.

> diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
> index 71e017a..42e3224 100644
> --- a/tcg/s390/tcg-target.c
> +++ b/tcg/s390/tcg-target.c
> @@ -78,10 +78,14 @@ typedef enum S390Opcode {
>  RRE_DLR = 0xb997,
>  RRE_DSGFR   = 0xb91d,
>  RRE_DSGR= 0xb90d,
> +RRE_LGBR= 0xb906,
>  RRE_LCGR= 0xb903,
>  RRE_LGFR= 0xb914,
> +RRE_LGHR= 0xb907,
>  RRE_LGR = 0xb904,
> +RRE_LLGCR   = 0xb984,
>  RRE_LLGFR   = 0xb916,
> +RRE_LLGHR   = 0xb985,
>  RRE_MSGR= 0xb90c,
>  RRE_MSR = 0xb252,
>  RRE_NGR = 0xb980,
> @@ -117,11 +121,9 @@ typedef enum S390Opcode {
>  RXY_LGF = 0xe314,
>  RXY_LGH = 0xe315,
>  RXY_LHY = 0xe378,
> -RXY_LLC = 0xe394,
>  RXY_LLGC= 0xe390,
>  RXY_LLGF= 0xe316,
>  RXY_LLGH= 0xe391,
> -RXY_LLH = 0xe395,
>  RXY_LMG = 0xeb04,
>  RXY_LRV = 0xe31e,
>  RXY_LRVG= 0xe30f,
> @@ -553,6 +555,96 @@ static inline void tcg_out_st(TCGContext *s, TCGType 
> type, TCGReg data,
>  }
>  }
>  
> +static void tgen_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
> +{
> +if (facilities & FACILITY_EXT_IMM) {
> +tcg_out_insn(s, RRE, LGBR, dest, src);
> +return;
> +}
> +
> +if (type == TCG_TYPE_I32) {
> +if (dest == src) {
> +tcg_out_sh32(s, RS_SLL, dest, TCG_REG_NONE, 24);
> +} else {
> +tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 24);
> +}
> +tcg_out_sh32(s, RS_SRA, dest, TCG_REG_NONE, 24);
> +} else {
> +tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 56);
> +tcg_out_sh64(s, RSY_SRAG, dest, dest, TCG_REG_NONE, 56);
> +}
> +}
> +
> +static void tgen_ext8u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
> +{
> +if (facilities & FACILITY_EXT_IMM) {
> +tcg_out_insn(s, RRE, LLGCR, dest, src);
> +return;
> +}
> +
> +if (dest == src) {
> +tcg_out_movi(s, type, TCG_TMP0, 0xff);
> +src = TCG_TMP0;
> +} else {
> +tcg_out_movi(s, type, dest, 0xff);
> +}
> +if (type == TCG_TYPE_I32) {
> +tcg_out_insn(s, RR, NR, dest, src);
> +} else {
> +tcg_out_insn(s, RRE, NGR, dest, src);
> +}
> +}
> +
> +static void tgen_ext16s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
> +{
> +if (facilities & FACILITY_EXT_IMM) {
> +tcg_out_insn(s, RRE, LGHR, dest, src);
> +return;
> +}
> +
> +if (type == TCG_TYPE_I32) {
> +if (dest == src) {
> +tcg_out_sh32(s, RS_SLL, dest, TCG_REG_NONE, 16);
> +} else {
> +tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 16);
> +}
> +tcg_out_sh32(s, RS_SRA, dest, TCG_REG_NONE, 16);
> +} else {
> +tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 48);
> +tcg_out_sh64(s, RSY_SRAG, dest, dest, TCG_REG_NONE, 48);
> +}
> +}
> +
> +static void tgen_ext16u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
> +{
> +if (facilities & FACILITY_EXT_IMM) {
> +tcg_out_insn(s, RRE, LLGHR, dest, src);
> +return;
> +}
> +
> +if (dest == src) {
> +tcg_out_movi(s, type, TCG_TMP0, 0x);
> +src = TCG_TMP0;
> +} else {
> +tcg_out_movi(s, type, dest, 0x);
> +}
> +if (type == TCG_TYPE_I32) {
> +tcg_out_insn(s, RR, NR, dest, src);
> +} else {
> +tcg_out_insn(s, RRE, NGR, dest, src);
> +}
> +}
> +
> +static inline void tgen_ext32s(TCGContext *s, TCGReg dest, TCGReg src)
> +{
> +tcg_out_insn(s, RRE, LGFR, dest, src);
> +}
> +
> +static inline void tgen_ext32u(TCGContext *s, TCGReg dest, TCGReg src)
> +{
> +tcg_out_insn(s, RRE, LLGFR, dest, src);
> +}
> +
>  static void tgen32_cmp(TCGContext *s, TCGCond c, TCGReg r1, TCGReg r2)
>  {
>  if (c > TCG_COND_GT) {
> @@ -643,8 +735,8 @@ static void tcg_prepare_qemu_ldst(TCGContext* s, int 
> data_reg, int addr_reg,
>  }
>  
>  #if TARGET_LONG_BITS == 32
> -tcg_out_insn(s, RRE, LLGFR, arg1, addr_reg);
> -tcg_out_insn(s, RRE, LLGFR, arg0, addr_reg);
> +tgen_ext32u(s, arg1, addr_reg);
> +tgen_ext32u(s, arg0, addr_reg);
>  #else
>  tcg_out_mov(s, arg1, addr_reg);
>  tcg_out_mov(s, arg0, addr_reg);
> @@ -681,7 +773,7 @@ static void tcg_prepare_qemu_ldst(TCGContext* s, int 
> data_reg, int addr_reg,
>  
>  /* call load/store helper */
>  #if TARGET_LONG_BITS == 32
> -tcg_out_insn(s, RRE, LLGFR, arg0, addr_reg);
> +tgen_ext32u(s, arg0, addr_reg);
>  #else
>  tcg_out_mov(s, arg0, addr_reg);
>  #endif
> @@ -697,15 +789,13 @@ stat

Re: [Qemu-devel] [PATCH] configure: Fix evaluation of config-host.mak in create_config

2010-06-12 Thread Aurelien Jarno
On Sat, Jun 12, 2010 at 02:25:10PM +0200, Jan Kiszka wrote:
> Aurelien Jarno wrote:
> > On Fri, Jun 11, 2010 at 10:58:29PM +0200, Jan Kiszka wrote:
> >> From: Jan Kiszka 
> >>
> >> Only match on true dir variable assignments, avoid generating garbage
> >> due to the "# Configured with: ..." line which may contain "*dir=" as
> >> well.
> > 
> > Wouldn't it be better to skip all lines starting with '#', as this
> > problem might happen again after some more changes?
> > 
> > Or maybe we should do both.
> 
> This is what the patch (implicitly) does.

Yes, but only for this line. What I mean is something more generic
like:

"#"*) # comment
continue
;;

-- 
Aurelien Jarno  GPG: 1024D/F1BCDB73
aurel...@aurel32.net http://www.aurel32.net



[Qemu-devel] Re: [PATCH] win32: Add missing function ffs

2010-06-12 Thread Richard Henderson
On 06/12/2010 07:07 AM, Stefan Weil wrote:
> v2: Use __builtin_ffs as suggested by Richard Henderson
> 
> Cc: Richard Henderson 
> Signed-off-by: Stefan Weil 

Acked-by: Richard Henderson 


r~



Re: [Qemu-devel] [PATCH 09/16] Enable message delivery via IRQs

2010-06-12 Thread Paul Brook
> I think message passing interrupts
> are only used in bus based systems, like PCI or UPA/JBUS etc. I don't
> know how LAPIC/IOAPIC bus works, it could be similar.

PCI Message Signalled Interrupts use a regular data write, and we model it 
exactly that way. Under normal circumstances you program the device to write 
to memory mapped APIC control registers, but there's no real reason why you 
couldn't write to RAM.

LAPIC/IOAPIC have their own bus. On some hardware this is multiplexed over the 
main system bus, but logically it is a separate interconnect.

The fact that these a bus based systems suggests against using qemu_irq, which 
is an inherently point-point system.

> > How is a
> > receiver meant to know for format of the message being delivered, and who
> > it's intended for?
> 
> This depends on the architecture. On Sparc64 the message is specified
> by the guest or OF.
>...
> In real HW (MSI or Sparc64 mondo interrupts) a bus delivers the
> message. But our buses only handle address and data lines.

IIUC you're trying to use qemu_irq as a generic interconnect between devices. 
I think that's going to cause more problems than it solves.  If a bus has 
additional message passing capabilities then this can be part of the bus 
interface.

If two devices need a private communication channel then we probably want some 
implementation agnostic way of defining this link.  Connecting LAPIC to IOAPIC 
is a potential example of this. However this needs to be done in a type-safe 
manner.  DMA channels may be another potential use of this linking. We'd want 
to avoid connecting a DMA channel to an APIC.

[*] A simple unidirectional dma request line is suitable for qmu_irq. A DMA 
system that transfers data outside of memory read/write transactions is not. 
e.g. ISA effectively defines a regular memory bus plus 8 bidirectional data 
streams (aka DMA channels). These are multiplexed over the same physical pins, 
but logically distinct. The DMA channels could either be bundled up into the 
ISA bus interface, or modelled as links between devices and the DMA 
controller.

> > TBH I preferred the original system whereby the source can query the
> > state of the sink (i.e "are you ignoring this line?").  Note that
> > conceptually this should be querying state, not responding to an event.
> 
> It's simple, but I think too simple. It would work for the coalescing
> interrupt hack but useless for other things.

I'm not convinced making qemu_irq do "other things" is a good idea.

Paul



Re: [Qemu-devel] [PATCH] configure: Fix evaluation of config-host.mak in create_config

2010-06-12 Thread Jan Kiszka
Aurelien Jarno wrote:
> On Sat, Jun 12, 2010 at 02:25:10PM +0200, Jan Kiszka wrote:
>> Aurelien Jarno wrote:
>>> On Fri, Jun 11, 2010 at 10:58:29PM +0200, Jan Kiszka wrote:
 From: Jan Kiszka 

 Only match on true dir variable assignments, avoid generating garbage
 due to the "# Configured with: ..." line which may contain "*dir=" as
 well.
>>> Wouldn't it be better to skip all lines starting with '#', as this
>>> problem might happen again after some more changes?
>>>
>>> Or maybe we should do both.
>> This is what the patch (implicitly) does.
> 
> Yes, but only for this line. What I mean is something more generic
> like:
> 
> "#"*) # comment
> continue
> ;;
> 

IMO, that would only paper over suboptimally expressed patterns for the
other line evaluations.

Jan



signature.asc
Description: OpenPGP digital signature


[Qemu-devel] Re: [PATCH] configure: Fix evaluation of config-host.mak in create_config

2010-06-12 Thread Paolo Bonzini

On 06/12/2010 06:44 PM, Jan Kiszka wrote:

Yes, but only for this line. What I mean is something more generic
like:

"#"*) # comment
 continue
 ;;



IMO, that would only paper over suboptimally expressed patterns for the
other line evaluations.


Agreed.  I like Jan's patch more.

Paolo



Re: [Qemu-devel] [PATCH 09/16] Enable message delivery via IRQs

2010-06-12 Thread Blue Swirl
On Sat, Jun 12, 2010 at 3:58 PM, Paul Brook  wrote:
>> I think message passing interrupts
>> are only used in bus based systems, like PCI or UPA/JBUS etc. I don't
>> know how LAPIC/IOAPIC bus works, it could be similar.
>
> PCI Message Signalled Interrupts use a regular data write, and we model it
> exactly that way. Under normal circumstances you program the device to write
> to memory mapped APIC control registers, but there's no real reason why you
> couldn't write to RAM.
>
> LAPIC/IOAPIC have their own bus. On some hardware this is multiplexed over the
> main system bus, but logically it is a separate interconnect.
>
> The fact that these a bus based systems suggests against using qemu_irq, which
> is an inherently point-point system.
>
>> > How is a
>> > receiver meant to know for format of the message being delivered, and who
>> > it's intended for?
>>
>> This depends on the architecture. On Sparc64 the message is specified
>> by the guest or OF.
>>...
>> In real HW (MSI or Sparc64 mondo interrupts) a bus delivers the
>> message. But our buses only handle address and data lines.
>
> IIUC you're trying to use qemu_irq as a generic interconnect between devices.
> I think that's going to cause more problems than it solves.  If a bus has
> additional message passing capabilities then this can be part of the bus
> interface.
>
> If two devices need a private communication channel then we probably want some
> implementation agnostic way of defining this link.  Connecting LAPIC to IOAPIC
> is a potential example of this. However this needs to be done in a type-safe
> manner.  DMA channels may be another potential use of this linking. We'd want
> to avoid connecting a DMA channel to an APIC.
>
> [*] A simple unidirectional dma request line is suitable for qmu_irq. A DMA
> system that transfers data outside of memory read/write transactions is not.
> e.g. ISA effectively defines a regular memory bus plus 8 bidirectional data
> streams (aka DMA channels). These are multiplexed over the same physical pins,
> but logically distinct. The DMA channels could either be bundled up into the
> ISA bus interface, or modelled as links between devices and the DMA
> controller.

Very very interesting. There's some out of band data related to DMA
(bus errors, IOMMU faults, ISA DREQ) which hasn't been covered in
previous Generic DMA proposals. Maybe all we need is a generic side
band link, which would be usable for point to point (qemu_irq,
coalescing, messages) and bus channels?

>> > TBH I preferred the original system whereby the source can query the
>> > state of the sink (i.e "are you ignoring this line?").  Note that
>> > conceptually this should be querying state, not responding to an event.
>>
>> It's simple, but I think too simple. It would work for the coalescing
>> interrupt hack but useless for other things.
>
> I'm not convinced making qemu_irq do "other things" is a good idea.



Re: [Qemu-devel] [PATCH 09/16] Enable message delivery via IRQs

2010-06-12 Thread Paul Brook
> > [*] A simple unidirectional dma request line is suitable for qmu_irq. A
> > DMA system that transfers data outside of memory read/write transactions
> > is not. e.g. ISA effectively defines a regular memory bus plus 8
> > bidirectional data streams (aka DMA channels). These are multiplexed
> > over the same physical pins, but logically distinct. The DMA channels
> > could either be bundled up into the ISA bus interface, or modelled as
> > links between devices and the DMA controller.
> 
> Very very interesting. There's some out of band data related to DMA
> (bus errors, IOMMU faults, ISA DREQ) which hasn't been covered in
> previous Generic DMA proposals.

I suspect you're confusing two different concepts - bus-master DMA (memory 
accesses generated by devices) v.s. IO channel DMA (out of band data stream 
between a device and a third party DMA engine). Despite both being called 
"DMA", they significantly different beasts. The IO channel DMA master is 
effectively a bridge between the memory bus (accessed via bus-master DMA) and 
the device IO channel.

Note that actual IO channel DMA is relatively rare. While many embedded 
devices still have a DMA engine this tends to generate regular memory 
accesses, which are then programmed to access a memory mapped FIFO. This 
shouldn't need any special magic to implement, typically just a simple 
qemu_irq control line.

> Maybe all we need is a generic side
> band link, which would be usable for point to point (qemu_irq,
> coalescing, messages) and bus channels?

We may want to consider a common mechanism for mapping links between devices. 
i.e some way of saying "Connect device X port N to device Y port M". In the 
case of traditional interrupts each IRQ source or sink would be a "port". For 
a bus system you'd probably want to connect them all to a [virtual] bus 
arbiter object.

This should allow the mapping core to verify that e.g. you're not connecting a 
qemu_irq to something expecting an apic_message, preferably without knowing or 
caring how either of those work.

In theory I guess you could also use the same mechanism to connect network 
devices to their host interface, etc.  I haven't thought this bit through 
fully, so this may be excessively clever.

Paul



Re: [Qemu-devel] [PATCH 09/16] Enable message delivery via IRQs

2010-06-12 Thread Blue Swirl
On Sat, Jun 12, 2010 at 7:33 PM, Blue Swirl  wrote:
> On Sat, Jun 12, 2010 at 3:58 PM, Paul Brook  wrote:
>>> I think message passing interrupts
>>> are only used in bus based systems, like PCI or UPA/JBUS etc. I don't
>>> know how LAPIC/IOAPIC bus works, it could be similar.
>>
>> PCI Message Signalled Interrupts use a regular data write, and we model it
>> exactly that way. Under normal circumstances you program the device to write
>> to memory mapped APIC control registers, but there's no real reason why you
>> couldn't write to RAM.
>>
>> LAPIC/IOAPIC have their own bus. On some hardware this is multiplexed over 
>> the
>> main system bus, but logically it is a separate interconnect.
>>
>> The fact that these a bus based systems suggests against using qemu_irq, 
>> which
>> is an inherently point-point system.
>>
>>> > How is a
>>> > receiver meant to know for format of the message being delivered, and who
>>> > it's intended for?
>>>
>>> This depends on the architecture. On Sparc64 the message is specified
>>> by the guest or OF.
>>>...
>>> In real HW (MSI or Sparc64 mondo interrupts) a bus delivers the
>>> message. But our buses only handle address and data lines.
>>
>> IIUC you're trying to use qemu_irq as a generic interconnect between devices.
>> I think that's going to cause more problems than it solves.  If a bus has
>> additional message passing capabilities then this can be part of the bus
>> interface.
>>
>> If two devices need a private communication channel then we probably want 
>> some
>> implementation agnostic way of defining this link.  Connecting LAPIC to 
>> IOAPIC
>> is a potential example of this. However this needs to be done in a type-safe
>> manner.  DMA channels may be another potential use of this linking. We'd want
>> to avoid connecting a DMA channel to an APIC.
>>
>> [*] A simple unidirectional dma request line is suitable for qmu_irq. A DMA
>> system that transfers data outside of memory read/write transactions is not.
>> e.g. ISA effectively defines a regular memory bus plus 8 bidirectional data
>> streams (aka DMA channels). These are multiplexed over the same physical 
>> pins,
>> but logically distinct. The DMA channels could either be bundled up into the
>> ISA bus interface, or modelled as links between devices and the DMA
>> controller.
>
> Very very interesting. There's some out of band data related to DMA
> (bus errors, IOMMU faults, ISA DREQ) which hasn't been covered in
> previous Generic DMA proposals. Maybe all we need is a generic side
> band link, which would be usable for point to point (qemu_irq,
> coalescing, messages) and bus channels?

I think we could solve all problems (well, maybe not world peace, yet)
by switching to message based system for all of DMA and IRQs.

Each device would have a message input port and way to output messages.

Examples:

Zero copy memory access from device D1 to D2 to host memory (D3) with
access broken to page length units and errors occurring on the last
byte:
D1 send_msg(ID, MSG_MEM_WRITE, DMA address, length) -> D2
D2 send_msg(ID, MSG_MEM_WRITE, DMA address2, length) -> D3
D3 send_replymsg(ID, MSG_MEM_PTR, host address, 4096) -> D2
D2 send_replymsg(ID, MSG_MEM_PTR, host address, 4096) -> D1
D3 send_replymsg(ID, MSG_MEM_PTR, host address, length - 4096 - 1) -> D2
D2 send_replymsg(ID, MSG_MEM_PTR, host address, length - 4096 - 1) -> D1
D3 send_replymsg(ID, MSG_MEM_ERROR, DMA address2 + length - 1, 1, status) -> D2
D2 send_replymsg(ID, MSG_MEM_ERROR, DMA address + length - 1, 1, status) -> D1

IRQ delivery chain D1->D2->D3 with coalescing, messages, delivery
reporting and EOI:
D1 send_msg(ID, MSG_IRQ_RAISE, payload) -> D2
D2 send_msg(ID, MSG_IRQ_RAISE, payload) -> D3
D3 send_replymsg(ID, MSG_IRQ_STATUS, MI_COALESCED) -> D2
D2 send_replymsg(ID, MSG_IRQ_STATUS, MI_COALESCED) -> D1
D3 send_replymsg(ID, MSG_IRQ_STATUS, MI_DELIVERED) -> D2
D2 send_replymsg(ID, MSG_IRQ_STATUS, MI_DELIVERED) -> D1
D3 send_replymsg(ID, MSG_IRQ_STATUS, MI_EOI) -> D2
D2 send_replymsg(ID, MSG_IRQ_STATUS, MI_EOI) -> D1

What else do we want? :-)



[Qemu-devel] [PATCH v2 0/7] APIC/IOAPIC cleanup

2010-06-12 Thread Blue Swirl
Clean up APIC and IOAPIC. Convert both devices to qdev.

v1->v2:
Remove apic.h reorganization.
Add IOAPIC and APIC qdev conversions.
Use CPUState also in 5/7. However on 6/7 we have to again use void *
because of VMState limitations. VMState gurus, please comment.

Blue Swirl (7):
  ioapic: unexport ioapic_set_irq
  ioapic: convert to qdev
  apic: avoid passing CPUState from devices
  apic: avoid passing CPUState from CPU code
  apic: avoid using CPUState internals
  apic: convert to qdev
  apic: qdev conversion cleanup

 hw/apic.c   |  174 +++---
 hw/apic.h   |   21 --
 hw/ioapic.c |   47 
 hw/pc.c |   74 ++--
 hw/pc.h |4 +-
 hw/pc_piix.c|   19 +-
 qemu-common.h   |2 +-
 target-i386/cpu.h   |   28 +---
 target-i386/cpuid.c |6 ++
 target-i386/helper.c|4 +-
 target-i386/kvm.c   |   14 ++--
 target-i386/op_helper.c |8 +-
 12 files changed, 258 insertions(+), 143 deletions(-)



[Qemu-devel] [PATCH v2 3/7] apic: avoid passing CPUState from devices

2010-06-12 Thread Blue Swirl
Pass only APICState from pc.c.

Signed-off-by: Blue Swirl 
---
 hw/apic.c |   32 ++--
 hw/apic.h |8 +---
 hw/pc.c   |   10 ++
 3 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/hw/apic.c b/hw/apic.c
index 7fbd79b..c4dc52c 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -94,7 +94,7 @@
 #define MSI_ADDR_BASE   0xfee0
 #define MSI_ADDR_SIZE   0x10

-typedef struct APICState {
+struct APICState {
 CPUState *cpu_env;
 uint32_t apicbase;
 uint8_t id;
@@ -118,7 +118,7 @@ typedef struct APICState {
 QEMUTimer *timer;
 int sipi_vector;
 int wait_for_sipi;
-} APICState;
+};

 static int apic_io_memory;
 static APICState *local_apics[MAX_APICS + 1];
@@ -167,9 +167,8 @@ static inline int get_bit(uint32_t *tab, int index)
 return !!(tab[i] & mask);
 }

-static void apic_local_deliver(CPUState *env, int vector)
+static void apic_local_deliver(APICState *s, int vector)
 {
-APICState *s = env->apic_state;
 uint32_t lvt = s->lvt[vector];
 int trigger_mode;

@@ -180,15 +179,15 @@ static void apic_local_deliver(CPUState *env, int vector)

 switch ((lvt >> 8) & 7) {
 case APIC_DM_SMI:
-cpu_interrupt(env, CPU_INTERRUPT_SMI);
+cpu_interrupt(s->cpu_env, CPU_INTERRUPT_SMI);
 break;

 case APIC_DM_NMI:
-cpu_interrupt(env, CPU_INTERRUPT_NMI);
+cpu_interrupt(s->cpu_env, CPU_INTERRUPT_NMI);
 break;

 case APIC_DM_EXTINT:
-cpu_interrupt(env, CPU_INTERRUPT_HARD);
+cpu_interrupt(s->cpu_env, CPU_INTERRUPT_HARD);
 break;

 case APIC_DM_FIXED:
@@ -200,12 +199,11 @@ static void apic_local_deliver(CPUState *env, int vector)
 }
 }

-void apic_deliver_pic_intr(CPUState *env, int level)
+void apic_deliver_pic_intr(APICState *s, int level)
 {
-if (level)
-apic_local_deliver(env, APIC_LVT_LINT0);
-else {
-APICState *s = env->apic_state;
+if (level) {
+apic_local_deliver(s, APIC_LVT_LINT0);
+} else {
 uint32_t lvt = s->lvt[APIC_LVT_LINT0];

 switch ((lvt >> 8) & 7) {
@@ -215,7 +213,7 @@ void apic_deliver_pic_intr(CPUState *env, int level)
 reset_bit(s->irr, lvt & 0xff);
 /* fall through */
 case APIC_DM_EXTINT:
-cpu_reset_interrupt(env, CPU_INTERRUPT_HARD);
+cpu_reset_interrupt(s->cpu_env, CPU_INTERRUPT_HARD);
 break;
 }
 }
@@ -591,9 +589,8 @@ static void apic_deliver(APICState *s, uint8_t
dest, uint8_t dest_mode,
  trigger_mode);
 }

-int apic_get_interrupt(CPUState *env)
+int apic_get_interrupt(APICState *s)
 {
-APICState *s = env->apic_state;
 int intno;

 /* if the APIC is installed or enabled, we let the 8259 handle the
@@ -615,9 +612,8 @@ int apic_get_interrupt(CPUState *env)
 return intno;
 }

-int apic_accept_pic_intr(CPUState *env)
+int apic_accept_pic_intr(APICState *s)
 {
-APICState *s = env->apic_state;
 uint32_t lvt0;

 if (!s)
@@ -679,7 +675,7 @@ static void apic_timer(void *opaque)
 {
 APICState *s = opaque;

-apic_local_deliver(s->cpu_env, APIC_LVT_TIMER);
+apic_local_deliver(s, APIC_LVT_TIMER);
 apic_timer_update(s, s->next_time);
 }

diff --git a/hw/apic.h b/hw/apic.h
index dc41400..e6bce1e 100644
--- a/hw/apic.h
+++ b/hw/apic.h
@@ -1,14 +1,16 @@
 #ifndef APIC_H
 #define APIC_H

+/* apic.c */
+typedef struct APICState APICState;
 void apic_deliver_irq(uint8_t dest, uint8_t dest_mode,
  uint8_t delivery_mode,
  uint8_t vector_num, uint8_t polarity,
  uint8_t trigger_mode);
 int apic_init(CPUState *env);
-int apic_accept_pic_intr(CPUState *env);
-void apic_deliver_pic_intr(CPUState *env, int level);
-int apic_get_interrupt(CPUState *env);
+int apic_accept_pic_intr(APICState *s);
+void apic_deliver_pic_intr(APICState *s, int level);
+int apic_get_interrupt(APICState *s);
 void apic_reset_irq_delivered(void);
 int apic_get_irq_delivered(void);

diff --git a/hw/pc.c b/hw/pc.c
index 9b85c42..fe4ebbe 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -142,7 +142,7 @@ int cpu_get_pic_interrupt(CPUState *env)
 {
 int intno;

-intno = apic_get_interrupt(env);
+intno = apic_get_interrupt(env->apic_state);
 if (intno >= 0) {
 /* set irq request if a PIC irq is still pending */
 /* XXX: improve that */
@@ -150,8 +150,9 @@ int cpu_get_pic_interrupt(CPUState *env)
 return intno;
 }
 /* read the irq from the PIC */
-if (!apic_accept_pic_intr(env))
+if (!apic_accept_pic_intr(env->apic_state)) {
 return -1;
+}

 intno = pic_read_irq(isa_pic);
 return intno;
@@ -164,8 +165,9 @@ static void pic_irq_request(void *opaque, int irq,
int level)
 DPRINTF("pic_irqs: %s irq %d\n", level? "raise" : "lower", irq);
 if (env->apic_state) {
 while (env) {
-if (apic_acce

[Qemu-devel] [PATCH v2 5/7] apic: avoid using CPUState internals

2010-06-12 Thread Blue Swirl
Move the actual CPUState contents handling to cpu.h and cpuid.c.

Handle CPU reset and set env->halted in pc.c.

Add a function to get the local APIC state of the current
CPU for the MMIO.

Signed-off-by: Blue Swirl 
---
 hw/apic.c   |   39 ++-
 hw/apic.h   |   10 +-
 hw/pc.c |   34 --
 target-i386/cpu.h   |   27 ---
 target-i386/cpuid.c |6 ++
 5 files changed, 77 insertions(+), 39 deletions(-)

diff --git a/hw/apic.c b/hw/apic.c
index 91c8d93..632d6eb 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -320,7 +320,7 @@ void cpu_set_apic_base(APICState *s, uint64_t val)
 /* if disabled, cannot be enabled again */
 if (!(val & MSR_IA32_APICBASE_ENABLE)) {
 s->apicbase &= ~MSR_IA32_APICBASE_ENABLE;
-s->cpu_env->cpuid_features &= ~CPUID_APIC;
+cpu_clear_apic_feature(s->cpu_env);
 s->spurious_vec &= ~APIC_SV_ENABLE;
 }
 }
@@ -508,8 +508,6 @@ void apic_init_reset(APICState *s)
 s->initial_count_load_time = 0;
 s->next_time = 0;
 s->wait_for_sipi = 1;
-
-s->cpu_env->halted = !(s->apicbase & MSR_IA32_APICBASE_BSP);
 }

 static void apic_startup(APICState *s, int vector_num)
@@ -524,13 +522,7 @@ void apic_sipi(APICState *s)

 if (!s->wait_for_sipi)
 return;
-
-s->cpu_env->eip = 0;
-cpu_x86_load_seg_cache(s->cpu_env, R_CS, s->sipi_vector << 8,
-   s->sipi_vector << 12,
-   s->cpu_env->segs[R_CS].limit,
-   s->cpu_env->segs[R_CS].flags);
-s->cpu_env->halted = 0;
+cpu_x86_load_seg_cache_sipi(s->cpu_env, s->sipi_vector);
 s->wait_for_sipi = 0;
 }

@@ -692,15 +684,14 @@ static void apic_mem_writew(void *opaque,
target_phys_addr_t addr, uint32_t val)

 static uint32_t apic_mem_readl(void *opaque, target_phys_addr_t addr)
 {
-CPUState *env;
 APICState *s;
 uint32_t val;
 int index;

-env = cpu_single_env;
-if (!env)
+s = cpu_get_current_apic();
+if (!s) {
 return 0;
-s = env->apic_state;
+}

 index = (addr >> 4) & 0xff;
 switch(index) {
@@ -782,7 +773,6 @@ static void apic_send_msi(target_phys_addr_t addr,
uint32 data)

 static void apic_mem_writel(void *opaque, target_phys_addr_t addr,
uint32_t val)
 {
-CPUState *env;
 APICState *s;
 int index = (addr >> 4) & 0xff;
 if (addr > 0xfff || !index) {
@@ -795,10 +785,10 @@ static void apic_mem_writel(void *opaque,
target_phys_addr_t addr, uint32_t val)
 return;
 }

-env = cpu_single_env;
-if (!env)
+s = cpu_get_current_apic();
+if (!s) {
 return;
-s = env->apic_state;
+}

 DPRINTF("write: " TARGET_FMT_plx " = %08x\n", addr, val);

@@ -949,7 +939,6 @@ static void apic_reset(void *opaque)
 s->apicbase = 0xfee0 |
 (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE;

-cpu_reset(s->cpu_env);
 apic_init_reset(s);

 if (bsp) {
@@ -974,16 +963,16 @@ static CPUWriteMemoryFunc * const apic_mem_write[3] = {
 apic_mem_writel,
 };

-int apic_init(CPUState *env)
+APICState *apic_init(CPUState *env, uint32_t apic_id)
 {
 APICState *s;

-if (last_apic_idx >= MAX_APICS)
-return -1;
+if (last_apic_idx >= MAX_APICS) {
+return NULL;
+}
 s = qemu_mallocz(sizeof(APICState));
-env->apic_state = s;
 s->idx = last_apic_idx++;
-s->id = env->cpuid_apic_id;
+s->id = apic_id;
 s->cpu_env = env;

 msix_supported = 1;
@@ -1004,5 +993,5 @@ int apic_init(CPUState *env)
 qemu_register_reset(apic_reset, s);

 local_apics[s->idx] = s;
-return 0;
+return s;
 }
diff --git a/hw/apic.h b/hw/apic.h
index e6bce1e..fb4a9cf 100644
--- a/hw/apic.h
+++ b/hw/apic.h
@@ -7,13 +7,21 @@ void apic_deliver_irq(uint8_t dest, uint8_t dest_mode,
  uint8_t delivery_mode,
  uint8_t vector_num, uint8_t polarity,
  uint8_t trigger_mode);
-int apic_init(CPUState *env);
+APICState *apic_init(CPUState *env, uint32_t apic_id);
 int apic_accept_pic_intr(APICState *s);
 void apic_deliver_pic_intr(APICState *s, int level);
 int apic_get_interrupt(APICState *s);
 void apic_reset_irq_delivered(void);
 int apic_get_irq_delivered(void);
+void cpu_set_apic_base(APICState *s, uint64_t val);
+uint64_t cpu_get_apic_base(APICState *s);
+void cpu_set_apic_tpr(APICState *s, uint8_t val);
+uint8_t cpu_get_apic_tpr(APICState *s);
+void apic_init_reset(APICState *s);
+void apic_sipi(APICState *s);

+/* pc.c */
 int cpu_is_bsp(CPUState *env);
+APICState *cpu_get_current_apic(void);

 #endif
diff --git a/hw/pc.c b/hw/pc.c
index fe4ebbe..422e273 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -749,6 +749,15 @@ int cpu_is_bsp(CPUState *env)
 return env->cpu_index == 0;
 }

+APICState *cpu_get_current_apic(void)
+{
+if (cpu_single_env) {
+return cpu_sing

[Qemu-devel] [PATCH v2 6/7] apic: convert to qdev

2010-06-12 Thread Blue Swirl
Convert to qdev.

Use an opaque CPUState pointer because of missing VMState
implementation for CPUState.

Signed-off-by: Blue Swirl 
---
 hw/apic.c |   88 +---
 hw/apic.h |2 +-
 2 files changed, 61 insertions(+), 29 deletions(-)

diff --git a/hw/apic.c b/hw/apic.c
index 632d6eb..d0cdddb 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -17,13 +17,11 @@
  * License along with this library; if not, see 
  */
 #include "hw.h"
-#include "pc.h"
 #include "apic.h"
-#include "pci.h"
 #include "msix.h"
 #include "qemu-timer.h"
 #include "host-utils.h"
-#include "kvm.h"
+#include "sysbus.h"

 //#define DEBUG_APIC
 //#define DEBUG_COALESCING
@@ -95,7 +93,8 @@
 #define MSI_ADDR_SIZE   0x10

 struct APICState {
-CPUState *cpu_env;
+SysBusDevice busdev;
+void *cpu_env;
 uint32_t apicbase;
 uint8_t id;
 uint8_t arb_id;
@@ -120,12 +119,9 @@ struct APICState {
 int wait_for_sipi;
 };

-static int apic_io_memory;
 static APICState *local_apics[MAX_APICS + 1];
-static int last_apic_idx = 0;
 static int apic_irq_delivered;

-
 static void apic_set_irq(APICState *s, int vector_num, int trigger_mode);
 static void apic_update_irq(APICState *s);
 static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask,
@@ -930,9 +926,9 @@ static const VMStateDescription vmstate_apic = {
 }
 };

-static void apic_reset(void *opaque)
+static void apic_reset(DeviceState *d)
 {
-APICState *s = opaque;
+APICState *s = container_of(d, APICState, busdev.qdev);
 int bsp;

 bsp = cpu_is_bsp(s->cpu_env);
@@ -963,35 +959,71 @@ static CPUWriteMemoryFunc * const apic_mem_write[3] = {
 apic_mem_writel,
 };

-APICState *apic_init(CPUState *env, uint32_t apic_id)
+APICState *apic_init(void *env, uint8_t apic_id)
 {
+DeviceState *dev;
+SysBusDevice *d;
 APICState *s;
+static int apic_mapped;

-if (last_apic_idx >= MAX_APICS) {
-return NULL;
-}
-s = qemu_mallocz(sizeof(APICState));
-s->idx = last_apic_idx++;
-s->id = apic_id;
-s->cpu_env = env;
-
-msix_supported = 1;
+dev = qdev_create(NULL, "apic");
+qdev_prop_set_uint8(dev, "id", apic_id);
+qdev_prop_set_ptr(dev, "cpu_env", env);
+qdev_init_nofail(dev);
+d = sysbus_from_qdev(dev);

 /* XXX: mapping more APICs at the same memory location */
-if (apic_io_memory == 0) {
+if (apic_mapped == 0) {
 /* NOTE: the APIC is directly connected to the CPU - it is not
on the global memory bus. */
-apic_io_memory = cpu_register_io_memory(apic_mem_read,
-apic_mem_write, NULL);
 /* XXX: what if the base changes? */
-cpu_register_physical_memory(MSI_ADDR_BASE, MSI_ADDR_SIZE,
- apic_io_memory);
+sysbus_mmio_map(d, 0, MSI_ADDR_BASE);
+apic_mapped = 1;
 }
-s->timer = qemu_new_timer(vm_clock, apic_timer, s);

-vmstate_register(s->idx, &vmstate_apic, s);
-qemu_register_reset(apic_reset, s);
+msix_supported = 1;
+
+s = container_of(dev, APICState, busdev.qdev);

-local_apics[s->idx] = s;
 return s;
 }
+
+static int apic_init1(SysBusDevice *dev)
+{
+APICState *s = FROM_SYSBUS(APICState, dev);
+int apic_io_memory;
+static int last_apic_idx;
+
+if (last_apic_idx >= MAX_APICS) {
+return -1;
+}
+apic_io_memory = cpu_register_io_memory(apic_mem_read,
+apic_mem_write, NULL);
+sysbus_init_mmio(dev, MSI_ADDR_SIZE, apic_io_memory);
+
+s->timer = qemu_new_timer(vm_clock, apic_timer, s);
+s->idx = last_apic_idx++;
+local_apics[s->idx] = s;
+return 0;
+}
+
+static SysBusDeviceInfo apic_info = {
+.init = apic_init1,
+.qdev.name = "apic",
+.qdev.size = sizeof(APICState),
+.qdev.vmsd = &vmstate_apic,
+.qdev.reset = apic_reset,
+.qdev.no_user = 1,
+.qdev.props = (Property[]) {
+DEFINE_PROP_UINT8("id", APICState, id, -1),
+DEFINE_PROP_PTR("cpu_env", APICState, cpu_env),
+DEFINE_PROP_END_OF_LIST(),
+}
+};
+
+static void apic_register_devices(void)
+{
+sysbus_register_withprop(&apic_info);
+}
+
+device_init(apic_register_devices)
diff --git a/hw/apic.h b/hw/apic.h
index fb4a9cf..77078ca 100644
--- a/hw/apic.h
+++ b/hw/apic.h
@@ -7,7 +7,7 @@ void apic_deliver_irq(uint8_t dest, uint8_t dest_mode,
  uint8_t delivery_mode,
  uint8_t vector_num, uint8_t polarity,
  uint8_t trigger_mode);
-APICState *apic_init(CPUState *env, uint32_t apic_id);
+APICState *apic_init(void *env, uint8_t apic_id);
 int apic_accept_pic_intr(APICState *s);
 void apic_deliver_pic_intr(APICState *s, int level);
 int apic_get_interrupt(APICState *s);
-- 
1.7.1



[Qemu-devel] [PATCH v2 1/7] ioapic: unexport ioapic_set_irq

2010-06-12 Thread Blue Swirl
There's no need to use ioapic_set_irq() outside of ioapic.c, so
make it static.

Signed-off-by: Blue Swirl 
---
 hw/apic.h   |1 -
 hw/ioapic.c |2 +-
 2 files changed, 1 insertions(+), 2 deletions(-)

diff --git a/hw/apic.h b/hw/apic.h
index 132fcab..e1954f4 100644
--- a/hw/apic.h
+++ b/hw/apic.h
@@ -11,7 +11,6 @@ int apic_accept_pic_intr(CPUState *env);
 void apic_deliver_pic_intr(CPUState *env, int level);
 int apic_get_interrupt(CPUState *env);
 qemu_irq *ioapic_init(void);
-void ioapic_set_irq(void *opaque, int vector, int level);
 void apic_reset_irq_delivered(void);
 int apic_get_irq_delivered(void);

diff --git a/hw/ioapic.c b/hw/ioapic.c
index 335da6e..e3f8a46 100644
--- a/hw/ioapic.c
+++ b/hw/ioapic.c
@@ -94,7 +94,7 @@ static void ioapic_service(IOAPICState *s)
 }
 }

-void ioapic_set_irq(void *opaque, int vector, int level)
+static void ioapic_set_irq(void *opaque, int vector, int level)
 {
 IOAPICState *s = opaque;

-- 
1.7.1



[Qemu-devel] [PATCH v2 4/7] apic: avoid passing CPUState from CPU code

2010-06-12 Thread Blue Swirl
Pass only APICState when accessing APIC from CPU code.

Signed-off-by: Blue Swirl 
---
 hw/apic.c   |   39 ---
 target-i386/cpu.h   |   13 +++--
 target-i386/helper.c|4 ++--
 target-i386/kvm.c   |   14 +++---
 target-i386/op_helper.c |8 
 5 files changed, 36 insertions(+), 42 deletions(-)

diff --git a/hw/apic.c b/hw/apic.c
index c4dc52c..91c8d93 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -310,10 +310,8 @@ void apic_deliver_irq(uint8_t dest, uint8_t dest_mode,
  trigger_mode);
 }

-void cpu_set_apic_base(CPUState *env, uint64_t val)
+void cpu_set_apic_base(APICState *s, uint64_t val)
 {
-APICState *s = env->apic_state;
-
 DPRINTF("cpu_set_apic_base: %016" PRIx64 "\n", val);
 if (!s)
 return;
@@ -322,32 +320,28 @@ void cpu_set_apic_base(CPUState *env, uint64_t val)
 /* if disabled, cannot be enabled again */
 if (!(val & MSR_IA32_APICBASE_ENABLE)) {
 s->apicbase &= ~MSR_IA32_APICBASE_ENABLE;
-env->cpuid_features &= ~CPUID_APIC;
+s->cpu_env->cpuid_features &= ~CPUID_APIC;
 s->spurious_vec &= ~APIC_SV_ENABLE;
 }
 }

-uint64_t cpu_get_apic_base(CPUState *env)
+uint64_t cpu_get_apic_base(APICState *s)
 {
-APICState *s = env->apic_state;
-
 DPRINTF("cpu_get_apic_base: %016" PRIx64 "\n",
 s ? (uint64_t)s->apicbase: 0);
 return s ? s->apicbase : 0;
 }

-void cpu_set_apic_tpr(CPUX86State *env, uint8_t val)
+void cpu_set_apic_tpr(APICState *s, uint8_t val)
 {
-APICState *s = env->apic_state;
 if (!s)
 return;
 s->tpr = (val & 0x0f) << 4;
 apic_update_irq(s);
 }

-uint8_t cpu_get_apic_tpr(CPUX86State *env)
+uint8_t cpu_get_apic_tpr(APICState *s)
 {
-APICState *s = env->apic_state;
 return s ? s->tpr >> 4 : 0;
 }

@@ -490,9 +484,8 @@ static void apic_get_delivery_bitmask(uint32_t
*deliver_bitmask,
 }


-void apic_init_reset(CPUState *env)
+void apic_init_reset(APICState *s)
 {
-APICState *s = env->apic_state;
 int i;

 if (!s)
@@ -516,7 +509,7 @@ void apic_init_reset(CPUState *env)
 s->next_time = 0;
 s->wait_for_sipi = 1;

-env->halted = !(s->apicbase & MSR_IA32_APICBASE_BSP);
+s->cpu_env->halted = !(s->apicbase & MSR_IA32_APICBASE_BSP);
 }

 static void apic_startup(APICState *s, int vector_num)
@@ -525,19 +518,19 @@ static void apic_startup(APICState *s, int vector_num)
 cpu_interrupt(s->cpu_env, CPU_INTERRUPT_SIPI);
 }

-void apic_sipi(CPUState *env)
+void apic_sipi(APICState *s)
 {
-APICState *s = env->apic_state;
-
-cpu_reset_interrupt(env, CPU_INTERRUPT_SIPI);
+cpu_reset_interrupt(s->cpu_env, CPU_INTERRUPT_SIPI);

 if (!s->wait_for_sipi)
 return;

-env->eip = 0;
-cpu_x86_load_seg_cache(env, R_CS, s->sipi_vector << 8,
s->sipi_vector << 12,
-   env->segs[R_CS].limit, env->segs[R_CS].flags);
-env->halted = 0;
+s->cpu_env->eip = 0;
+cpu_x86_load_seg_cache(s->cpu_env, R_CS, s->sipi_vector << 8,
+   s->sipi_vector << 12,
+   s->cpu_env->segs[R_CS].limit,
+   s->cpu_env->segs[R_CS].flags);
+s->cpu_env->halted = 0;
 s->wait_for_sipi = 0;
 }

@@ -957,7 +950,7 @@ static void apic_reset(void *opaque)
 (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE;

 cpu_reset(s->cpu_env);
-apic_init_reset(s->cpu_env);
+apic_init_reset(s);

 if (bsp) {
 /*
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 548ab80..0b19fe3 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -860,11 +860,12 @@ void cpu_x86_update_cr3(CPUX86State *env,
target_ulong new_cr3);
 void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4);

 /* hw/apic.c */
-void cpu_set_apic_base(CPUX86State *env, uint64_t val);
-uint64_t cpu_get_apic_base(CPUX86State *env);
-void cpu_set_apic_tpr(CPUX86State *env, uint8_t val);
+typedef struct APICState APICState;
+void cpu_set_apic_base(APICState *s, uint64_t val);
+uint64_t cpu_get_apic_base(APICState *s);
+void cpu_set_apic_tpr(APICState *s, uint8_t val);
 #ifndef NO_CPU_IO_DEFS
-uint8_t cpu_get_apic_tpr(CPUX86State *env);
+uint8_t cpu_get_apic_tpr(APICState *s);
 #endif

 /* hw/pc.c */
@@ -942,8 +943,8 @@ static inline void cpu_get_tb_cpu_state(CPUState
*env, target_ulong *pc,
 (env->eflags & (IOPL_MASK | TF_MASK | RF_MASK | VM_MASK));
 }

-void apic_init_reset(CPUState *env);
-void apic_sipi(CPUState *env);
+void apic_init_reset(APICState *s);
+void apic_sipi(APICState *s);
 void do_cpu_init(CPUState *env);
 void do_cpu_sipi(CPUState *env);
 #endif /* CPU_I386_H */
diff --git a/target-i386/helper.c b/target-i386/helper.c
index c9508a8..718394c 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -1150,12 +1150,12 @@ void do_cpu_init(CPUState *env)
 int sipi = env->interrupt_request & CPU_INTERRUPT_SIPI;
 cpu_reset(env);
 env->inte

[Qemu-devel] [PATCH v2 2/7] ioapic: convert to qdev

2010-06-12 Thread Blue Swirl
Convert to qdev.

Signed-off-by: Blue Swirl 
---
 hw/apic.h|2 --
 hw/ioapic.c  |   45 ++---
 hw/pc.h  |4 +++-
 hw/pc_piix.c |   19 ++-
 4 files changed, 51 insertions(+), 19 deletions(-)

diff --git a/hw/apic.h b/hw/apic.h
index e1954f4..dc41400 100644
--- a/hw/apic.h
+++ b/hw/apic.h
@@ -1,7 +1,6 @@
 #ifndef APIC_H
 #define APIC_H

-typedef struct IOAPICState IOAPICState;
 void apic_deliver_irq(uint8_t dest, uint8_t dest_mode,
  uint8_t delivery_mode,
  uint8_t vector_num, uint8_t polarity,
@@ -10,7 +9,6 @@ int apic_init(CPUState *env);
 int apic_accept_pic_intr(CPUState *env);
 void apic_deliver_pic_intr(CPUState *env, int level);
 int apic_get_interrupt(CPUState *env);
-qemu_irq *ioapic_init(void);
 void apic_reset_irq_delivered(void);
 int apic_get_irq_delivered(void);

diff --git a/hw/ioapic.c b/hw/ioapic.c
index e3f8a46..0336dbd 100644
--- a/hw/ioapic.c
+++ b/hw/ioapic.c
@@ -25,6 +25,7 @@
 #include "apic.h"
 #include "qemu-timer.h"
 #include "host-utils.h"
+#include "sysbus.h"

 //#define DEBUG_IOAPIC

@@ -35,7 +36,6 @@
 #define DPRINTF(fmt, ...)
 #endif

-#define IOAPIC_NUM_PINS0x18
 #define IOAPIC_LVT_MASKED  (1<<16)

 #define IOAPIC_TRIGGER_EDGE0
@@ -50,7 +50,10 @@
 #define IOAPIC_DM_SIPI 0x5
 #define IOAPIC_DM_EXTINT   0x7

+typedef struct IOAPICState IOAPICState;
+
 struct IOAPICState {
+SysBusDevice busdev;
 uint8_t id;
 uint8_t ioregsel;

@@ -209,12 +212,14 @@ static const VMStateDescription vmstate_ioapic = {
 }
 };

-static void ioapic_reset(void *opaque)
+static void ioapic_reset(DeviceState *d)
 {
-IOAPICState *s = opaque;
+IOAPICState *s = container_of(d, IOAPICState, busdev.qdev);
 int i;

-memset(s, 0, sizeof(*s));
+s->id = 0;
+s->ioregsel = 0;
+s->irr = 0;
 for(i = 0; i < IOAPIC_NUM_PINS; i++)
 s->ioredtbl[i] = 1 << 16; /* mask LVT */
 }
@@ -231,22 +236,32 @@ static CPUWriteMemoryFunc * const ioapic_mem_write[3] = {
 ioapic_mem_writel,
 };

-qemu_irq *ioapic_init(void)
+static int ioapic_init1(SysBusDevice *dev)
 {
-IOAPICState *s;
-qemu_irq *irq;
+IOAPICState *s = FROM_SYSBUS(IOAPICState, dev);
 int io_memory;

-s = qemu_mallocz(sizeof(IOAPICState));
-ioapic_reset(s);
-
 io_memory = cpu_register_io_memory(ioapic_mem_read,
ioapic_mem_write, s);
-cpu_register_physical_memory(0xfec0, 0x1000, io_memory);
+sysbus_init_mmio(dev, 0x1000, io_memory);

-vmstate_register(0, &vmstate_ioapic, s);
-qemu_register_reset(ioapic_reset, s);
-irq = qemu_allocate_irqs(ioapic_set_irq, s, IOAPIC_NUM_PINS);
+qdev_init_gpio_in(&dev->qdev, ioapic_set_irq, IOAPIC_NUM_PINS);

-return irq;
+return 0;
 }
+
+static SysBusDeviceInfo ioapic_info = {
+.init = ioapic_init1,
+.qdev.name = "ioapic",
+.qdev.size = sizeof(IOAPICState),
+.qdev.vmsd = &vmstate_ioapic,
+.qdev.reset = ioapic_reset,
+.qdev.no_user = 1,
+};
+
+static void ioapic_register_devices(void)
+{
+sysbus_register_withprop(&ioapic_info);
+}
+
+device_init(ioapic_register_devices)
diff --git a/hw/pc.h b/hw/pc.h
index 0e52933..ccfd7ad 100644
--- a/hw/pc.h
+++ b/hw/pc.h
@@ -39,9 +39,11 @@ void pic_info(Monitor *mon);
 void irq_info(Monitor *mon);

 /* ISA */
+#define IOAPIC_NUM_PINS 0x18
+
 typedef struct isa_irq_state {
 qemu_irq *i8259;
-qemu_irq *ioapic;
+qemu_irq ioapic[IOAPIC_NUM_PINS];
 } IsaIrqState;

 void isa_irq_handler(void *opaque, int n, int level);
diff --git a/hw/pc_piix.c b/hw/pc_piix.c
index 70f563a..852727e 100644
--- a/hw/pc_piix.c
+++ b/hw/pc_piix.c
@@ -32,6 +32,7 @@
 #include "boards.h"
 #include "ide.h"
 #include "kvm.h"
+#include "sysbus.h"

 #define MAX_IDE_BUS 2

@@ -39,6 +40,22 @@ static const int ide_iobase[MAX_IDE_BUS] = { 0x1f0, 0x170 };
 static const int ide_iobase2[MAX_IDE_BUS] = { 0x3f6, 0x376 };
 static const int ide_irq[MAX_IDE_BUS] = { 14, 15 };

+static void ioapic_init(IsaIrqState *isa_irq_state)
+{
+DeviceState *dev;
+SysBusDevice *d;
+unsigned int i;
+
+dev = qdev_create(NULL, "ioapic");
+qdev_init_nofail(dev);
+d = sysbus_from_qdev(dev);
+sysbus_mmio_map(d, 0, 0xfec0);
+
+for (i = 0; i < IOAPIC_NUM_PINS; i++) {
+isa_irq_state->ioapic[i] = qdev_get_gpio_in(dev, i);
+}
+}
+
 /* PC hardware initialisation */
 static void pc_init1(ram_addr_t ram_size,
  const char *boot_device,
@@ -76,7 +93,7 @@ static void pc_init1(ram_addr_t ram_size,
 isa_irq_state = qemu_mallocz(sizeof(*isa_irq_state));
 isa_irq_state->i8259 = i8259;
 if (pci_enabled) {
-isa_irq_state->ioapic = ioapic_init();
+ioapic_init(isa_irq_state);
 }
 isa_irq = qemu_allocate_irqs(isa_irq_handler, isa_irq_state, 24);

-- 
1.7.1



[Qemu-devel] [PATCH v2 7/7] apic: qdev conversion cleanup

2010-06-12 Thread Blue Swirl
Make APICState completely private to apic.c by using DeviceState
in external APIs.

Move apic_init() to pc.c.

Signed-off-by: Blue Swirl 
---
 hw/apic.c |   86 +++-
 hw/apic.h |   24 +++---
 hw/pc.c   |   32 +++-
 qemu-common.h |2 +-
 target-i386/cpu.h |2 +-
 5 files changed, 83 insertions(+), 63 deletions(-)

diff --git a/hw/apic.c b/hw/apic.c
index d0cdddb..89374fe 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -18,7 +18,6 @@
  */
 #include "hw.h"
 #include "apic.h"
-#include "msix.h"
 #include "qemu-timer.h"
 #include "host-utils.h"
 #include "sysbus.h"
@@ -89,9 +88,10 @@
 #define MSI_ADDR_DEST_ID_SHIFT 12
 #defineMSI_ADDR_DEST_ID_MASK   0x000

-#define MSI_ADDR_BASE   0xfee0
 #define MSI_ADDR_SIZE   0x10

+typedef struct APICState APICState;
+
 struct APICState {
 SysBusDevice busdev;
 void *cpu_env;
@@ -195,8 +195,10 @@ static void apic_local_deliver(APICState *s, int vector)
 }
 }

-void apic_deliver_pic_intr(APICState *s, int level)
+void apic_deliver_pic_intr(DeviceState *d, int level)
 {
+APICState *s = container_of(d, APICState, busdev.qdev);
+
 if (level) {
 apic_local_deliver(s, APIC_LVT_LINT0);
 } else {
@@ -306,8 +308,10 @@ void apic_deliver_irq(uint8_t dest, uint8_t dest_mode,
  trigger_mode);
 }

-void cpu_set_apic_base(APICState *s, uint64_t val)
+void cpu_set_apic_base(DeviceState *d, uint64_t val)
 {
+APICState *s = container_of(d, APICState, busdev.qdev);
+
 DPRINTF("cpu_set_apic_base: %016" PRIx64 "\n", val);
 if (!s)
 return;
@@ -321,23 +325,29 @@ void cpu_set_apic_base(APICState *s, uint64_t val)
 }
 }

-uint64_t cpu_get_apic_base(APICState *s)
+uint64_t cpu_get_apic_base(DeviceState *d)
 {
+APICState *s = container_of(d, APICState, busdev.qdev);
+
 DPRINTF("cpu_get_apic_base: %016" PRIx64 "\n",
 s ? (uint64_t)s->apicbase: 0);
 return s ? s->apicbase : 0;
 }

-void cpu_set_apic_tpr(APICState *s, uint8_t val)
+void cpu_set_apic_tpr(DeviceState *d, uint8_t val)
 {
+APICState *s = container_of(d, APICState, busdev.qdev);
+
 if (!s)
 return;
 s->tpr = (val & 0x0f) << 4;
 apic_update_irq(s);
 }

-uint8_t cpu_get_apic_tpr(APICState *s)
+uint8_t cpu_get_apic_tpr(DeviceState *d)
 {
+APICState *s = container_of(d, APICState, busdev.qdev);
+
 return s ? s->tpr >> 4 : 0;
 }

@@ -479,9 +489,9 @@ static void apic_get_delivery_bitmask(uint32_t
*deliver_bitmask,
 }
 }

-
-void apic_init_reset(APICState *s)
+void apic_init_reset(DeviceState *d)
 {
+APICState *s = container_of(d, APICState, busdev.qdev);
 int i;

 if (!s)
@@ -512,8 +522,10 @@ static void apic_startup(APICState *s, int vector_num)
 cpu_interrupt(s->cpu_env, CPU_INTERRUPT_SIPI);
 }

-void apic_sipi(APICState *s)
+void apic_sipi(DeviceState *d)
 {
+APICState *s = container_of(d, APICState, busdev.qdev);
+
 cpu_reset_interrupt(s->cpu_env, CPU_INTERRUPT_SIPI);

 if (!s->wait_for_sipi)
@@ -522,10 +534,11 @@ void apic_sipi(APICState *s)
 s->wait_for_sipi = 0;
 }

-static void apic_deliver(APICState *s, uint8_t dest, uint8_t dest_mode,
+static void apic_deliver(DeviceState *d, uint8_t dest, uint8_t dest_mode,
  uint8_t delivery_mode, uint8_t vector_num,
  uint8_t polarity, uint8_t trigger_mode)
 {
+APICState *s = container_of(d, APICState, busdev.qdev);
 uint32_t deliver_bitmask[MAX_APIC_WORDS];
 int dest_shorthand = (s->icr[0] >> 18) & 3;
 APICState *apic_iter;
@@ -570,8 +583,9 @@ static void apic_deliver(APICState *s, uint8_t
dest, uint8_t dest_mode,
  trigger_mode);
 }

-int apic_get_interrupt(APICState *s)
+int apic_get_interrupt(DeviceState *d)
 {
+APICState *s = container_of(d, APICState, busdev.qdev);
 int intno;

 /* if the APIC is installed or enabled, we let the 8259 handle the
@@ -593,8 +607,9 @@ int apic_get_interrupt(APICState *s)
 return intno;
 }

-int apic_accept_pic_intr(APICState *s)
+int apic_accept_pic_intr(DeviceState *d)
 {
+APICState *s = container_of(d, APICState, busdev.qdev);
 uint32_t lvt0;

 if (!s)
@@ -680,14 +695,16 @@ static void apic_mem_writew(void *opaque,
target_phys_addr_t addr, uint32_t val)

 static uint32_t apic_mem_readl(void *opaque, target_phys_addr_t addr)
 {
+DeviceState *d;
 APICState *s;
 uint32_t val;
 int index;

-s = cpu_get_current_apic();
-if (!s) {
+d = cpu_get_current_apic();
+if (!d) {
 return 0;
 }
+s = container_of(d, APICState, busdev.qdev);

 index = (addr >> 4) & 0xff;
 switch(index) {
@@ -769,6 +786,7 @@ static void apic_send_msi(target_phys_addr_t addr,
uint32 data)

 static void apic_mem_writel(void *opaque, target_phys_addr_t addr,
uint32_t val)
 {
+DeviceState *d;
 APICStat

[Qemu-devel] [Bug 581737] Re: Can't read e1000 NIC EEPROM on NetBSD guest

2010-06-12 Thread Izumi Tsutsui
> Please email the patch to qemu-devel@nongnu.org via git-send-email.
Isn't the following post enough? What's incomplete on this?
http://lists.nongnu.org/archive/html/qemu-devel/2010-06/msg00449.html
(sorry I'm not familiar with git)

-- 
Can't read e1000 NIC EEPROM on NetBSD guest
https://bugs.launchpad.net/bugs/581737
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.

Status in QEMU: Incomplete

Bug description:
QEMU Version: qemu-0.12.4
Host OS: NetBSD/i386 5.0.2
Guest OS: NetBSD/i386 5.1_RC1

On this environment, guest NetBSD tries to attach e1000 NIC using its own wm(4) 
driver but fails to read EEPROM as the following:
---
NetBSD 5.1_RC1 (GENERIC) #0: Sat Apr 24 23:26:09 UTC 2010

bui...@b7.netbsd.org:/home/builds/ab/netbsd-5-1-RC1/i386/201004250032Z-obj/home/builds/ab/
netbsd-5-1-RC1/src/sys/arch/i386/compile/GENERIC
total memory = 127 MB
avail memory = 113 MB
Bochs Bochs
 :
drm at vga1 not configured
wm0 at pci0 dev 3 function 0: Intel i82540EM 1000BASE-T Ethernet, rev. 3
wm0: interrupting at irq 11
wm0: unable to read Ethernet address
isa0 at pcib0
 :
---

You can reproduce this with NetBSD/i386 install CD image:
 ftp://ftp.NetBSD.org/pub/NetBSD/NetBSD-5.1_RC1/iso/i386cd-5.1_RC1.iso
 % qemu -cdrom i386cd-5.1_RC1.iso -boot d
 ---in QEMU window---
 [type ^C to quit installer]
 # dmesg | grep wm0
--

Per DBGOUT(EEPROM) messages, it show too large eecd_state.bitnum values, i.e. 
EEPROM state is not reset properly.
The set_eecd() function in e1000.c clears EEPROM internal state values on SK 
rising edge during CS==L.
But according to FM93C06 EEPROM (which is MicroWire compatible) data sheet,
EEPROM internal status should be cleared on CS rise edge regardless of SK input:
 "... a rising edge on this signal is required to reset the internal 
state-machine to accept a new cycle .."

Intel's em driver seems to explicitly raise and lower SK output after CS is 
negated in em_standby_eeprom()
so many other OSes that use Intel's driver don't have this problem with current 
e1000.c implementation,
but I can't find articles that say the MICROWIRE or EEPROM spec requires such 
sequence.

With the attached patch, NetBSD guest properly gets MAC address from e1000 NIC 
EEPROM.





Re: [Qemu-devel] [PATCH 09/16] Enable message delivery via IRQs

2010-06-12 Thread Blue Swirl
On Sat, Jun 12, 2010 at 8:32 PM, Blue Swirl  wrote:
> On Sat, Jun 12, 2010 at 7:33 PM, Blue Swirl  wrote:
>> On Sat, Jun 12, 2010 at 3:58 PM, Paul Brook  wrote:
 I think message passing interrupts
 are only used in bus based systems, like PCI or UPA/JBUS etc. I don't
 know how LAPIC/IOAPIC bus works, it could be similar.
>>>
>>> PCI Message Signalled Interrupts use a regular data write, and we model it
>>> exactly that way. Under normal circumstances you program the device to write
>>> to memory mapped APIC control registers, but there's no real reason why you
>>> couldn't write to RAM.
>>>
>>> LAPIC/IOAPIC have their own bus. On some hardware this is multiplexed over 
>>> the
>>> main system bus, but logically it is a separate interconnect.
>>>
>>> The fact that these a bus based systems suggests against using qemu_irq, 
>>> which
>>> is an inherently point-point system.
>>>
 > How is a
 > receiver meant to know for format of the message being delivered, and who
 > it's intended for?

 This depends on the architecture. On Sparc64 the message is specified
 by the guest or OF.
...
 In real HW (MSI or Sparc64 mondo interrupts) a bus delivers the
 message. But our buses only handle address and data lines.
>>>
>>> IIUC you're trying to use qemu_irq as a generic interconnect between 
>>> devices.
>>> I think that's going to cause more problems than it solves.  If a bus has
>>> additional message passing capabilities then this can be part of the bus
>>> interface.
>>>
>>> If two devices need a private communication channel then we probably want 
>>> some
>>> implementation agnostic way of defining this link.  Connecting LAPIC to 
>>> IOAPIC
>>> is a potential example of this. However this needs to be done in a type-safe
>>> manner.  DMA channels may be another potential use of this linking. We'd 
>>> want
>>> to avoid connecting a DMA channel to an APIC.
>>>
>>> [*] A simple unidirectional dma request line is suitable for qmu_irq. A DMA
>>> system that transfers data outside of memory read/write transactions is not.
>>> e.g. ISA effectively defines a regular memory bus plus 8 bidirectional data
>>> streams (aka DMA channels). These are multiplexed over the same physical 
>>> pins,
>>> but logically distinct. The DMA channels could either be bundled up into the
>>> ISA bus interface, or modelled as links between devices and the DMA
>>> controller.
>>
>> Very very interesting. There's some out of band data related to DMA
>> (bus errors, IOMMU faults, ISA DREQ) which hasn't been covered in
>> previous Generic DMA proposals. Maybe all we need is a generic side
>> band link, which would be usable for point to point (qemu_irq,
>> coalescing, messages) and bus channels?
>
> I think we could solve all problems (well, maybe not world peace, yet)
> by switching to message based system for all of DMA and IRQs.
>
> Each device would have a message input port and way to output messages.
>
> Examples:
>
> Zero copy memory access from device D1 to D2 to host memory (D3) with
> access broken to page length units and errors occurring on the last
> byte:
> D1 send_msg(ID, MSG_MEM_WRITE, DMA address, length) -> D2
> D2 send_msg(ID, MSG_MEM_WRITE, DMA address2, length) -> D3
> D3 send_replymsg(ID, MSG_MEM_PTR, host address, 4096) -> D2
> D2 send_replymsg(ID, MSG_MEM_PTR, host address, 4096) -> D1
> D3 send_replymsg(ID, MSG_MEM_PTR, host address, length - 4096 - 1) -> D2
> D2 send_replymsg(ID, MSG_MEM_PTR, host address, length - 4096 - 1) -> D1
> D3 send_replymsg(ID, MSG_MEM_ERROR, DMA address2 + length - 1, 1, status) -> 
> D2
> D2 send_replymsg(ID, MSG_MEM_ERROR, DMA address + length - 1, 1, status) -> D1

This could be implemented with small modifications to existing
cpu_physical_memory_rw() interface. Improved devices would check if a
channel back has been registered. If not, current methods are used,
same with unimproved devices. If yes, translated pointers will flow
back for zero copy DMA.

> IRQ delivery chain D1->D2->D3 with coalescing, messages, delivery
> reporting and EOI:
> D1 send_msg(ID, MSG_IRQ_RAISE, payload) -> D2
> D2 send_msg(ID, MSG_IRQ_RAISE, payload) -> D3
> D3 send_replymsg(ID, MSG_IRQ_STATUS, MI_COALESCED) -> D2
> D2 send_replymsg(ID, MSG_IRQ_STATUS, MI_COALESCED) -> D1
> D3 send_replymsg(ID, MSG_IRQ_STATUS, MI_DELIVERED) -> D2
> D2 send_replymsg(ID, MSG_IRQ_STATUS, MI_DELIVERED) -> D1
> D3 send_replymsg(ID, MSG_IRQ_STATUS, MI_EOI) -> D2
> D2 send_replymsg(ID, MSG_IRQ_STATUS, MI_EOI) -> D1

Same here: send_msg could be in fact be qemu_irq_raise(). Without a
return chain, no replies will be sent. For messages, an augmented
forward chain must exist, otherwise the messages will be ignored.