[Qemu-devel] [PATCH 11/12] hw/omap1.c : separate uart module
Signed-off-by: cmchao --- Makefile.target |3 +- hw/omap1.c | 170 hw/omap_uart.c | 194 +++ 3 files changed, 196 insertions(+), 171 deletions(-) create mode 100644 hw/omap_uart.c diff --git a/Makefile.target b/Makefile.target index 20bcb8a..a01daa4 100644 --- a/Makefile.target +++ b/Makefile.target @@ -263,7 +263,8 @@ obj-arm-y += pxa2xx.o pxa2xx_pic.o pxa2xx_gpio.o pxa2xx_timer.o pxa2xx_dma.o obj-arm-y += pxa2xx_lcd.o pxa2xx_mmci.o pxa2xx_pcmcia.o pxa2xx_keypad.o obj-arm-y += gumstix.o obj-arm-y += zaurus.o ide/microdrive.o spitz.o tosa.o tc6393xb.o -obj-arm-y += omap1.o omap_lcdc.o omap_dma.o omap_clk.o omap_mmc.o omap_i2c.o omap_gpio.o omap_intc.o +obj-arm-y += omap1.o omap_lcdc.o omap_dma.o omap_clk.o omap_mmc.o omap_i2c.o \ + omap_gpio.o omap_intc.o omap_uart.o obj-arm-y += omap2.o omap_dss.o soc_dma.o omap_gptimer.o omap_synctimer.o \ omap_gpmc.o omap_sdrc.o omap_spi.o omap_tap.o omap_l4.o obj-arm-y += omap_sx1.o palm.o tsc210x.o diff --git a/hw/omap1.c b/hw/omap1.c index 21c53fe..301eec5 100644 --- a/hw/omap1.c +++ b/hw/omap1.c @@ -1378,176 +1378,6 @@ static void omap_dpll_init(struct dpll_ctl_s *s, target_phys_addr_t base, cpu_register_physical_memory(base, 0x100, iomemtype); } -/* UARTs */ -struct omap_uart_s { -target_phys_addr_t base; -SerialState *serial; /* TODO */ -struct omap_target_agent_s *ta; -omap_clk fclk; -qemu_irq irq; - -uint8_t eblr; -uint8_t syscontrol; -uint8_t wkup; -uint8_t cfps; -uint8_t mdr[2]; -uint8_t scr; -uint8_t clksel; -}; - -void omap_uart_reset(struct omap_uart_s *s) -{ -s->eblr = 0x00; -s->syscontrol = 0; -s->wkup = 0x3f; -s->cfps = 0x69; -s->clksel = 0; -} - -struct omap_uart_s *omap_uart_init(target_phys_addr_t base, -qemu_irq irq, omap_clk fclk, omap_clk iclk, -qemu_irq txdma, qemu_irq rxdma, CharDriverState *chr) -{ -struct omap_uart_s *s = (struct omap_uart_s *) -qemu_mallocz(sizeof(struct omap_uart_s)); - -s->base = base; -s->fclk = fclk; -s->irq = irq; -#ifdef TARGET_WORDS_BIGENDIAN -s->serial = serial_mm_init(base, 2, irq, omap_clk_getrate(fclk)/16, - chr ?: qemu_chr_open("null", "null", NULL), 1, - 1); -#else -s->serial = serial_mm_init(base, 2, irq, omap_clk_getrate(fclk)/16, - chr ?: qemu_chr_open("null", "null", NULL), 1, - 0); -#endif -return s; -} - -static uint32_t omap_uart_read(void *opaque, target_phys_addr_t addr) -{ -struct omap_uart_s *s = (struct omap_uart_s *) opaque; - -addr &= 0xff; -switch (addr) { -case 0x20: /* MDR1 */ -return s->mdr[0]; -case 0x24: /* MDR2 */ -return s->mdr[1]; -case 0x40: /* SCR */ -return s->scr; -case 0x44: /* SSR */ -return 0x0; -case 0x48: /* EBLR (OMAP2) */ -return s->eblr; -case 0x4C: /* OSC_12M_SEL (OMAP1) */ -return s->clksel; -case 0x50: /* MVR */ -return 0x30; -case 0x54: /* SYSC (OMAP2) */ -return s->syscontrol; -case 0x58: /* SYSS (OMAP2) */ -return 1; -case 0x5c: /* WER (OMAP2) */ -return s->wkup; -case 0x60: /* CFPS (OMAP2) */ -return s->cfps; -} - -OMAP_BAD_REG(addr); -return 0; -} - -static void omap_uart_write(void *opaque, target_phys_addr_t addr, -uint32_t value) -{ -struct omap_uart_s *s = (struct omap_uart_s *) opaque; - -addr &= 0xff; -switch (addr) { -case 0x20: /* MDR1 */ -s->mdr[0] = value & 0x7f; -break; -case 0x24: /* MDR2 */ -s->mdr[1] = value & 0xff; -break; -case 0x40: /* SCR */ -s->scr = value & 0xff; -break; -case 0x48: /* EBLR (OMAP2) */ -s->eblr = value & 0xff; -break; -case 0x4C: /* OSC_12M_SEL (OMAP1) */ -s->clksel = value & 1; -break; -case 0x44: /* SSR */ -case 0x50: /* MVR */ -case 0x58: /* SYSS (OMAP2) */ -OMAP_RO_REG(addr); -break; -case 0x54: /* SYSC (OMAP2) */ -s->syscontrol = value & 0x1d; -if (value & 2) -omap_uart_reset(s); -break; -case 0x5c: /* WER (OMAP2) */ -s->wkup = value & 0x7f; -break; -case 0x60: /* CFPS (OMAP2) */ -s->cfps = value & 0xff; -break; -default: -OMAP_BAD_REG(addr); -} -} - -static CPUReadMemoryFunc * const omap_uart_readfn[] = { -omap_uart_read, -omap_uart_read, -omap_badwidth_read8, -}; - -static CPUWriteMemoryFunc * const omap_uart_writefn[] = { -omap_uart_write, -omap_uart_write, -omap_badwidth_write8, -}; - -struct omap_uart_s *omap2_uart_init(struct omap_target_agent_s *ta, -qemu_irq irq, omap_
[Qemu-devel] [PATCH 08/12] hw/omap2.c : separate spi module
Signed-off-by: cmchao --- Makefile.target |2 +- hw/omap.h |2 + hw/omap2.c | 323 --- hw/omap_spi.c | 346 +++ 4 files changed, 349 insertions(+), 324 deletions(-) create mode 100644 hw/omap_spi.c diff --git a/Makefile.target b/Makefile.target index 9a309e2..1edec6f 100644 --- a/Makefile.target +++ b/Makefile.target @@ -264,7 +264,7 @@ obj-arm-y += pxa2xx_lcd.o pxa2xx_mmci.o pxa2xx_pcmcia.o pxa2xx_keypad.o obj-arm-y += gumstix.o obj-arm-y += zaurus.o ide/microdrive.o spitz.o tosa.o tc6393xb.o obj-arm-y += omap1.o omap_lcdc.o omap_dma.o omap_clk.o omap_mmc.o omap_i2c.o omap_gpio.o omap_intc.o -obj-arm-y += omap2.o omap_dss.o soc_dma.o omap_gptimer.o omap_synctimer.o omap_gpmc.o omap_sdrc.o +obj-arm-y += omap2.o omap_dss.o soc_dma.o omap_gptimer.o omap_synctimer.o omap_gpmc.o omap_sdrc.o omap_spi.o obj-arm-y += omap_sx1.o palm.o tsc210x.o obj-arm-y += nseries.o blizzard.o onenand.o vga.o cbus.o tusb6010.o usb-musb.o obj-arm-y += mst_fpga.o mainstone.o diff --git a/hw/omap.h b/hw/omap.h index ea23ec9..fef495a 100644 --- a/hw/omap.h +++ b/hw/omap.h @@ -706,12 +706,14 @@ struct omap_uwire_s *omap_uwire_init(target_phys_addr_t base, void omap_uwire_attach(struct omap_uwire_s *s, uWireSlave *slave, int chipselect); +/* OMAP2 spi */ struct omap_mcspi_s; struct omap_mcspi_s *omap_mcspi_init(struct omap_target_agent_s *ta, int chnum, qemu_irq irq, qemu_irq *drq, omap_clk fclk, omap_clk iclk); void omap_mcspi_attach(struct omap_mcspi_s *s, uint32_t (*txrx)(void *opaque, uint32_t, int), void *opaque, int chipselect); +void omap_mcspi_reset(struct omap_mcspi_s *s); struct omap_rtc_s; struct omap_rtc_s *omap_rtc_init(target_phys_addr_t base, diff --git a/hw/omap2.c b/hw/omap2.c index e6d1b52..ae6394e 100644 --- a/hw/omap2.c +++ b/hw/omap2.c @@ -27,329 +27,6 @@ #include "soc_dma.h" #include "audio/audio.h" -/* Multichannel SPI */ -struct omap_mcspi_s { -qemu_irq irq; -int chnum; - -uint32_t sysconfig; -uint32_t systest; -uint32_t irqst; -uint32_t irqen; -uint32_t wken; -uint32_t control; - -struct omap_mcspi_ch_s { -qemu_irq txdrq; -qemu_irq rxdrq; -uint32_t (*txrx)(void *opaque, uint32_t, int); -void *opaque; - -uint32_t tx; -uint32_t rx; - -uint32_t config; -uint32_t status; -uint32_t control; -} ch[4]; -}; - -static inline void omap_mcspi_interrupt_update(struct omap_mcspi_s *s) -{ -qemu_set_irq(s->irq, s->irqst & s->irqen); -} - -static inline void omap_mcspi_dmarequest_update(struct omap_mcspi_ch_s *ch) -{ -qemu_set_irq(ch->txdrq, -(ch->control & 1) && /* EN */ -(ch->config & (1 << 14)) &&/* DMAW */ -(ch->status & (1 << 1)) && /* TXS */ -((ch->config >> 12) & 3) != 1);/* TRM */ -qemu_set_irq(ch->rxdrq, -(ch->control & 1) && /* EN */ -(ch->config & (1 << 15)) &&/* DMAW */ -(ch->status & (1 << 0)) && /* RXS */ -((ch->config >> 12) & 3) != 2);/* TRM */ -} - -static void omap_mcspi_transfer_run(struct omap_mcspi_s *s, int chnum) -{ -struct omap_mcspi_ch_s *ch = s->ch + chnum; - -if (!(ch->control & 1))/* EN */ -return; -if ((ch->status & (1 << 0)) && /* RXS */ -((ch->config >> 12) & 3) != 2 && /* TRM */ -!(ch->config & (1 << 19))) /* TURBO */ -goto intr_update; -if ((ch->status & (1 << 1)) && /* TXS */ -((ch->config >> 12) & 3) != 1) /* TRM */ -goto intr_update; - -if (!(s->control & 1) || /* SINGLE */ -(ch->config & (1 << 20))) {/* FORCE */ -if (ch->txrx) -ch->rx = ch->txrx(ch->opaque, ch->tx, /* WL */ -1 + (0x1f & (ch->config >> 7))); -} - -ch->tx = 0; -ch->status |= 1 << 2; /* EOT */ -ch->status |= 1 << 1; /* TXS */ -if (((ch->config >> 12) & 3) != 2) /* TRM */ -ch->status |= 1 << 0; /* RXS */ - -intr_update: -if ((ch->status & (1 << 0)) && /* RXS */ -((ch->config >> 12) & 3) != 2 && /* TRM */ -!(ch->config & (1 << 19))) /* TURBO */ -s->irqst |= 1 << (2 + 4 * chnum); /* RX_FULL */ -if ((ch->status & (1 << 1)) && /* TXS */ -((ch->config >> 12) & 3) != 1) /* TRM */ -s->irq
Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback
On Sat, May 29, 2010 at 09:15:11AM +, Blue Swirl wrote: > >> There is no code, because we're still at architecture design stage. > >> > > Try to write test code to understand the problem better. > > I will. > Please do ASAP. This discussion shows that you don't understand what is the problem that we are dialing with. > >> >> >> guests could also be assisted with special handling (like win2k > >> >> >> install hack), for example guest instructions could be counted > >> >> >> (approximately, for example using TB size or TSC) and only inject > >> >> >> after at least N instructions have passed. > >> >> > Guest instructions cannot be easily counted in KVM (it can be done > >> >> > more > >> >> > or less reliably using perf counters, may be). > >> >> > >> >> Aren't there any debug registers or perf counters, which can generate > >> >> an interrupt after some number of instructions have been executed? > >> > Don't think debug registers have something like that and they are > >> > available for guest use anyway. Perf counters differs greatly from CPU > >> > to CPU (even between two CPUs of the same manufacturer), and we want to > >> > keep using them for profiling guests. And I don't see what problem it > >> > will solve anyway that can be solved by simple delay between irq > >> > reinjection. > >> > >> This would allow counting the executed instructions and limit it. Thus > >> we could emulate a 500MHz CPU on a 2GHz CPU more accurately. > >> > > Why would you want to limit number of instruction executed by guest if > > CPU has nothing else to do anyway? The problem occurs not when we have > > spare cycles so give to a guest, but in opposite case. > > I think one problem is that the guest has executed too much compared > to what would happen with real HW with a lesser CPU. That explains the > RTC frequency reprogramming case. You think wrong. The problem is exactly opposite: the guest haven't had enough execution time between two time interrupts. I don't know what RTC frequency reprogramming case you are talking about here. > > > > >> >> > >> >> >> > >> >> >> > And even if the rate did not matter, the APIC woult still have to > >> >> >> > now > >> >> >> > about the fact that an IRQ is really periodic and does not only > >> >> >> > appear > >> >> >> > as such for a certain interval. This really does not sound like > >> >> >> > simplifying things or even make them cleaner. > >> >> >> > >> >> >> It would, the voodoo would be contained only in APIC, RTC would be > >> >> >> just like any other device. With the bidirectional irqs, this voodoo > >> >> >> would probably eventually spread to many other devices. The logical > >> >> >> conclusion of that would be a system where all devices would be > >> >> >> careful not to disturb the guest at wrong moment because that would > >> >> >> trigger a bug. > >> >> >> > >> >> > This voodoo will be so complex and unreliable that it will make RTC > >> >> > hack > >> >> > pale in comparison (and I still don't see how you are going to make it > >> >> > actually work). > >> >> > >> >> Implement everything inside APIC: only coalescing and reinjection. > >> > APIC has zero info needed to implement reinjection correctly as was > >> > shown to you several time in this thread and you simply keep ignoring > >> > it. > >> > >> On the contrary, APIC is actually the only source of the IRQ ack > >> information. RTC hack would not work without APIC (or the > >> bidirectional IRQ) passing this info to RTC. > >> > >> What APIC doesn't have now is the timer frequency or period info. This > >> is known by RTC and also higher levels managing the clocks. > >> > > So APIC has one bit of information and RTC everything else. > > The information known by RTC (timer period) is also known by higher levels. > What do you mean by higher level here? vl.c or APIC. > > The current > > approach (and proposed patch) brings this one bit of information to RTC, > > you are arguing that RTC should be able to communicate all its info to > > APIC. Sorry I don't see that your way has any advantage. Just more > > complex interface and it is much easier to get it wrong for other time > > sources. > > I don't think anymore that APIC should be handling this but the > generic stuff, like vl.c or exec.c. Then there would be only > information passing from APIC to higher levels. Handling reinjection by general timer code makes infinitely more sense then handling it in APIC. One thing (from the top of my head) that can't be implemented at that level is injection of interrupt back to back (i.e injecting next interrupt immediately after guest acknowledge previous one to RTC). > > >> I keep ignoring the idea that the current model, where both RTC and > >> APIC must somehow work together to make coalescing work, is the only > >> possible just because it is committed and it happens to work in some > >> cases. It would be much better to concentrate this to one place, APIC > >> or preferably higher level where it ma
Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback
On Sat, May 29, 2010 at 09:35:30AM +, Blue Swirl wrote: > > I still don't see how the alternative is supposed to simplify our life > > or improve the efficiency of the de-coalescing workaround. It's rather > > problematic like Gleb pointed out: The de-coalescing logic needs to be > > informed about periodicity changes that can only be delivered along > > IRQs. So what to do with the backlog when the timer is stopped? > > What happens with the current design? Gleb only mentioned the > frequency change, I thought that was not so big problem. But I don't > think this case should be allowed happen at all, it can't exist on > real HW. > Hm, why it can't exist on real HW? Do simple exercise. Run WindowsXP inside QEMU, connect with gdb to QEMU process and check what frequency RTC configured with (hint: it will be 64Hz), now run video inside the guest and check frequency again (hint: it will be 1Khz). -- Gleb.
Re: [Qemu-devel] [PATCH] virtio-blk: Avoid zeroing every request structure
Alexander Graf wrote: > Anthony Liguori wrote: > > I'd prefer to stick to bug fixes for stable releases. Performance > > improvements are a good motivation for people to upgrade to 0.13 :-) > > In general I agree, but this one looks like a really simple one. Besides, there are too many reported guest regressions at the moment to upgrade if using any of them. -- Jamie
[Qemu-devel] Re: [SeaBIOS] SMBIOS strings
Jes Sorensen wrote: Hi, We were looking at the dmidecode output from qemu-kvm pre-seabios and current qemu-kvm and noticed some of the strings have changed. The main problem with this is that certain OSes are quite sensitive to system changes and avoiding to change things unnecessarily would probably be a good thing. Which OSes do care? Windows only? I wanted to check with the lists if there are any strong feelings about this, and whether some of these changes were made for specific reasons? For example: Handle 0x, DMI type 0, 24 bytes BIOS Information - Vendor: QEMU - Version: QEMU + Vendor: Bochs + Version: Bochs Release Date: 01/01/2007 Address: 0xE8000 Runtime Size: 96 kB You can configure this with CONFIG_APPNAME. and this: Handle 0x0401, DMI type 4, 32 bytes Processor Information - Socket Designation: CPU 1 + Socket Designation: CPU01 Type: Central Processor Family: Other - Manufacturer: QEMU - ID: 63 06 00 00 FD FB 8B 07 + Manufacturer: Bochs + ID: 23 06 00 00 FD FB 8B 07 Version: Not Specified Voltage: Unknown External Clock: Unknown I guess the Socket Designation in particular might have been done for a reason? Otherwise, if there are no objections, I'll look at adding some patches to make it more backwards compatible. Cheers, Jes Is the different ID displayed on the same VM configuration (esp. -cpu option) ? The value is gained by calling CPUID so it should not be different. Which pre-seabios qemu-kvm bios are you comparing to? Sebastian
Re: [Qemu-devel] [PATCH] target-ppc: remove useless line
Am 28.05.2010 um 21:00 schrieb Thomas Monjalon: From: Thomas Monjalon This line was a bit clear. The next lines set or reset this bit (LE) depending of another bit (ILE). So the first line is useless. Signed-off-by: Thomas Monjalon --- target-ppc/helper.c |1 - 1 files changed, 0 insertions(+), 1 deletions(-) diff --git a/target-ppc/helper.c b/target-ppc/helper.c index 3d843b5..dabf1fd 100644 --- a/target-ppc/helper.c +++ b/target-ppc/helper.c @@ -2591,7 +2591,6 @@ static inline void powerpc_excp(CPUState *env, int excp_model, int excp) #if 0 /* Fix this: not on all targets */ new_msr &= ~((target_ulong)1 << MSR_PMM); #endif -new_msr &= ~((target_ulong)1 << MSR_LE); if (msr_ile) new_msr |= (target_ulong)1 << MSR_LE; else Following line is new_msr &= ~((target_ulong)1 << MSR_LE); so this should be fine. Reviewed-by: Andreas Faerber Andreas
Re: [Qemu-devel] Inquiry about qemu for Motorola 68360
>>qemu-system-m68k -cpu ? Sorry . Where to find executable 'qemu-system-m68k.exe" ? Thank you
[Qemu-devel] Re: [PATCH 1/2] Pad iommu with an empty slot (necessary for SunOS 4.1.4)
On Fri, May 28, 2010 at 9:53 PM, Artyom Tarasenko wrote: >> 32m: 0x12fff394 >> 64m: 0x14fff394 >> 192m:0x1cfff394 >> 256m:0x20fff394 >> >> Memory probing? It would be strange that OS would do it itself. The OS >> could just >> ask OBP how much does it have. Here is the listing where it happens: >> >> _swift_vac_rgnflush: rd %psr, %g2 >> _swift_vac_rgnflush+4: andn %g2, 0x20, %g5 >> _swift_vac_rgnflush+8: mov %g5, %psr >> _swift_vac_rgnflush+0xc: nop >> _swift_vac_rgnflush+0x10: nop >> _swift_vac_rgnflush+0x14: mov 0x100, %g5 >> _swift_vac_rgnflush+0x18: lda [%g5] 0x4, %g5 >> _swift_vac_rgnflush+0x1c: sll %o2, 0x2, %g1 >> _swift_vac_rgnflush+0x20: sll %g5, 0x4, %g5 >> _swift_vac_rgnflush+0x24: add %g5, %g1, %g5 >> _swift_vac_rgnflush+0x28: lda [%g5] 0x20, %g5 >> >> _swift_vac_rgnflush+0x28: is the fatal one. >> >> kadb> $c >> _swift_vac_rgnflush(?) >> _vac_rgnflush() + 4 >> _hat_setup_kas(0xc00,0xf0447000,0x43a000,0x400,0xf043a000,0x3c0) + 70 >> _startup(0xfe00,0x1000,0xfa00,0xf00e2bfc,0x10,0xdbc00) + 1414 >> _main(0xf00e0fb4,0xf0007810,0x293ff49f,0xa805209c,0x200,0xf00d1d18) + 14 >> >> Unfortunately (but not surprisingly) kadb doesn't allow debugging >> cache-flush code, so I can't check what is in >> [%g5] (aka sfar) on the real machine when this happens. > > I was telling fairy tales here and no one stopped me. [%g5] is not > sfar, it's the context pointer, > so the code makes much more sense! > > And I guess, SunOS 4.1.4 is buggy. I've managed to reproduce the > complete case on the real machine. The trick is to set the breakpoint > before the interrupts are switched off: > > kadb> _swift_vac_rgnflush:b > kadb> :c > breakpoint _swift_vac_rgnflush: rd %psr, %g2 > kadb> 44000e5 > kadb> $q > Type 'go' to resume > Type help for more information > ok 100 4 spacel@ . > 3fff00 > > So at _swift_vac_rgnflush+0x28 it would access (44000e5<<2) + (3fff00 > << 4) = 14fff394. Which is outside of IOMMU. > > ok 14fff394 20 spacel@ . > 3fe000 > > This seems to be an alias to > > ok 1404 20 spacel@ . > 3fe000 > > So, it seems to be safe to pad iommu with an empty slot. I guess we > are not missing anything more serious. Alternatively we can use your > aliasing patch. > > What do you say? Thanks, applied. > P.S. What is also interesting about SunOS 4.1.4 is that only the > single-cpu kernel (which is used during the installation) calls > _swift_vac_rgnflush on initialization. The smp kernel just doesn't > have this call in _hat_setup_kas. Maybe they have noticed the bug and > corrected it? > > -- > Regards, > Artyom Tarasenko > > solaris/sparc under qemu blog: http://tyom.blogspot.com/ >
Re: [Qemu-devel] Re: [PATCH v2 12/15] monitor: Add basic device state visualization
Avi Kivity writes: > On 05/23/2010 10:57 AM, Jan Kiszka wrote: >> Avi Kivity wrote: >> >>> On 05/22/2010 11:18 AM, Jan Kiszka wrote: >>> From: Jan Kiszka This introduces device_show, a monitor command that saves the vmstate of a qdev device and visualizes it. QMP is also supported. Buffers are cut after 16 byte by default, but the full content can be requested via '-f'. To pretty-print sub-arrays, vmstate is extended to store the start index name. A new qerror is introduced to signal a missing vmstate. And it comes with documentation. + +Dump a snapshot of the device state. Buffers are cut after 16 bytes unless +a full dump is requested. + +Arguments: + +- "path": the device's qtree path or unique ID (json-string) >>> This may be ambiguous. >>> >> Can your elaborate what precisely is ambiguous? >> > > Can't the user choose the unique ID so that it aliases an unrelated > qtree path? > > I prefer having mutually exclusive 'path' and 'ref' arguments. Don't think that's necessary. If the string starts with '/', it's an absolute path. Else, it's a relative path rooted at the node with the ID equal to the first component. Currently breaks down when IDs contain '/', but permitting that is a bug. There may be more problems; the path lookup code is way too clever.
Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback
On Sat, May 29, 2010 at 10:16 AM, Jan Kiszka wrote: > Blue Swirl wrote: > This is - according to my current understanding - the proposed > alternative architecture: > > .---. > | de-coalescing | > | logic | > '---' > ^ | > period,| |IRQ > coalesced| |(or tuned VM clock) > (yes/no)| v > .---. .. .---. > | RTC |-IRQ->| router |-IRQ>| APIC | > '---' '' '---' > | ^ | ^ > | | | | > '---period---' '--period---' The period information is already known by the higher level clock management, >>> The timer device models program the next event of some qemu-timer. There >>> is no tag attached explaining the timer subsystem or anyone else the >>> reason behind this programming. >> >> Yes, but why would we care? All timers in the system besides RTC >> should be affected likewise, this would in fact be the benefit from >> handling the problem at higher level. > > And how does your equation for calculating the clock slow-down look like? How about icount_adjust()?
Re: [Qemu-devel] cg14
2010/5/29 Bob Breuer : > Artyom Tarasenko wrote: >> 2010/5/28 Blue Swirl : >> >>> On Fri, May 28, 2010 at 7:54 AM, Bob Breuer wrote: >>> Artyom Tarasenko wrote: > 2010/5/27 Bob Breuer : > > >> Artyom Tarasenko wrote: >> >> >>> Was going to put some more empty slots into SS-10/20 (VSIMMs, SX) >>> after we are done with SS-5 (due to technical limitations I can switch >>> access from one real SS model to another one once a few days only). >>> >>> >>> >> I have a partial implementation of the SS-20 VSIMM (cg14) that I've been >> working on. With the Sun firmware, I have working text console, color >> boot logo, and programmable video resolutions up to 1600x1280. >> >> > Great news! This would allow qemu booting NeXTStep! Are you planning > to submit the patches any time soon? > > > It's not in a state to be submitted yet, but I've attached a working patch if you want to give it a try. I need to hook it up to qdev and fill in some more of the obviously incomplete switch cases before I'd sign off on it. >>> Nice work. I have a few comments below. >>> >>> This probably needs support from OpenBIOS to be usable without OBP. >>> >> >> Maybe it can be used as a second adapter without OpenBIOS support? At >> least under some OSes? >> >> > Probably won't be used without at least being in the firmware device > tree. One area that OpenBIOS could enhance would be a larger memory > size option. The real hardware was only available in 4M and 8M options, > but the memory map allows for 16M. OBP will identify a 16M VSIMM but > won't do anything else with it, and with 16M of vram it would allow for > a potential 2560x1600 32bit resolution. Bob diff --git a/Makefile.target b/Makefile.target index fda5bf3..b17b3af 100644 --- a/Makefile.target +++ b/Makefile.target @@ -250,6 +250,7 @@ else obj-sparc-y = sun4m.o lance.o tcx.o sun4m_iommu.o slavio_intctl.o obj-sparc-y += slavio_timer.o slavio_misc.o sparc32_dma.o obj-sparc-y += cs4231.o eccmemctl.o sbi.o sun4c_intctl.o +obj-sparc-y += cg14.o endif obj-arm-y = integratorcp.o versatilepb.o arm_pic.o arm_timer.o diff --git a/hw/sun4m.c b/hw/sun4m.c index 7ba0f76..8b23c9b 100644 --- a/hw/sun4m.c +++ b/hw/sun4m.c @@ -864,6 +864,13 @@ static void sun4m_hw_init(const struct sun4m_hwdef *hwdef, ram_addr_t RAM_size, fprintf(stderr, "qemu: Unsupported depth: %d\n", graphic_depth); exit (1); } + if (hwdef->machine_id == 65) { /* SS-20 */ >>> hwdef structure should contain a field for cg14. If non-zero, install >>> cg14. Was cg14 only available for SS-20? Was it always included? This >>> is also interesting for OpenBIOS, we need to detect cg14 vs. TCX. >>> > The cg14 was only an option for SS-20 and the rare SS-10SX, but not the > regular SS-10, though the SS-10 chipset may have been capable of > supporting it. Each cg14 vsimm takes the place of a stick of memory > with 2 slots physically capable of holding a vsimm. The few SS-10 OBP versions I have probe for VSIMMs. So we need to put either empty_slot (when the -nographics option is used) or VSIMMs there. > Is there a way to pass the framebuffer type and/or address to OpenBIOS? > I would be inclined to have the SS-20 machine default to cg14 instead of > TCX, but it will blow a hole in the support of more than 2G of emulated > system ram. + /* cg14.c */ + void cg14_init(target_phys_addr_t ctrl_base, target_phys_addr_t vram_base, + uint32_t vram_size); >>> This should go to sun4m.h or cg14.h. >>> >>> + + cg14_init(0x09c00ULL, 0x0fc00ULL, 8<<20); + } else >>> Please add braces and reindent. >>> >>> tcx_init(hwdef->tcx_base, 0x0010, graphic_width, graphic_height, graphic_depth); --- /dev/null Fri May 28 02:08:36 2010 +++ hw/cg14.c Fri May 28 01:58:49 2010 @@ -0,0 +1,785 @@ +/* + * QEMU CG14 Frame buffer + * + * Copyright (c) 2010 Bob Breuer + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +
Re: [Qemu-devel] [PATCH] Compile dma only once
On Fri, May 28, 2010 at 7:34 PM, Paul Brook wrote: >> Use a qemu_irq to request CPU exit. > > Needing to request a CPU exit at all is just wrong. See previous discussions > about how any use of qemu_bh_schedule_idle is fundamentally broken. I agree for the device case. Is the attached patch then OK? But what about other uses (with the patch applied): User emulator signal delivery: /src/qemu/darwin-user/signal.c:216:cpu_exit(global_env); /src/qemu/linux-user/signal.c:507:cpu_exit(thread_env); qemu_notify_event(): /src/qemu/cpus.c:286:cpu_exit(env); /src/qemu/cpus.c:289:cpu_exit(next_cpu); Is that broken too and should be removed? cpu_signal(): /src/qemu/cpus.c:531:cpu_exit(cpu_single_env); vm_stop(): /src/qemu/cpus.c:733:cpu_exit(cpu_single_env); KVM IO window exit: /src/qemu/kvm-all.c:859:cpu_exit(env); Some exclusive ARM operation: /src/qemu/linux-user/main.c:152:cpu_exit(other); ARM/m68k semihosting: /src/qemu/gdbstub.c:2296:cpu_exit(s->c_cpu); From 12940e4bf57af4801ffc209095b6adcc0693320f Mon Sep 17 00:00:00 2001 From: Blue Swirl Date: Sat, 29 May 2010 07:59:40 + Subject: [PATCH] dma: remove DMA_schedule and related cpu_request_exit irq It's wrong for devices to request cpu_exit. Remove DMA_schedule and cpu_request_exit irq, thus partially reverting 4556bd8b2514a55d48c15b1adb17537f49657744. Signed-off-by: Blue Swirl --- hw/dma.c| 19 --- hw/fdc.c|1 - hw/isa.h|3 +-- hw/mips_jazz.c | 13 + hw/mips_malta.c | 13 + hw/pc.c | 13 + hw/ppc_prep.c | 13 + hw/sun4m.c |1 - hw/sun4u.c |1 - 9 files changed, 9 insertions(+), 68 deletions(-) diff --git a/hw/dma.c b/hw/dma.c index 5b21521..1015b41 100644 --- a/hw/dma.c +++ b/hw/dma.c @@ -57,7 +57,6 @@ static struct dma_cont { uint8_t flip_flop; int dshift; struct dma_regs regs[4]; -qemu_irq *cpu_request_exit; } dma_controllers[2]; enum { @@ -442,14 +441,6 @@ int DMA_write_memory (int nchan, void *buf, int pos, int len) return len; } -/* request the emulator to transfer a new DMA memory block ASAP */ -void DMA_schedule(int nchan) -{ -struct dma_cont *d = &dma_controllers[nchan > 3]; - -qemu_irq_pulse(*d->cpu_request_exit); -} - static void dma_reset(void *opaque) { struct dma_cont *d = opaque; @@ -465,14 +456,12 @@ static int dma_phony_handler (void *opaque, int nchan, int dma_pos, int dma_len) /* dshift = 0: 8 bit DMA, 1 = 16 bit DMA */ static void dma_init2(struct dma_cont *d, int base, int dshift, - int page_base, int pageh_base, - qemu_irq *cpu_request_exit) + int page_base, int pageh_base) { static const int page_port_list[] = { 0x1, 0x2, 0x3, 0x7 }; int i; d->dshift = dshift; -d->cpu_request_exit = cpu_request_exit; for (i = 0; i < 8; i++) { register_ioport_write (base + (i << dshift), 1, 1, write_chan, d); register_ioport_read (base + (i << dshift), 1, 1, read_chan, d); @@ -542,12 +531,12 @@ static const VMStateDescription vmstate_dma = { } }; -void DMA_init(int high_page_enable, qemu_irq *cpu_request_exit) +void DMA_init(int high_page_enable) { dma_init2(&dma_controllers[0], 0x00, 0, 0x80, - high_page_enable ? 0x480 : -1, cpu_request_exit); + high_page_enable ? 0x480 : -1); dma_init2(&dma_controllers[1], 0xc0, 1, 0x88, - high_page_enable ? 0x488 : -1, cpu_request_exit); + high_page_enable ? 0x488 : -1); vmstate_register (0, &vmstate_dma, &dma_controllers[0]); vmstate_register (1, &vmstate_dma, &dma_controllers[1]); diff --git a/hw/fdc.c b/hw/fdc.c index 6306496..d4505b4 100644 --- a/hw/fdc.c +++ b/hw/fdc.c @@ -1174,7 +1174,6 @@ static void fdctrl_start_transfer(FDCtrl *fdctrl, int direction) * recall us... */ DMA_hold_DREQ(fdctrl->dma_chann); -DMA_schedule(fdctrl->dma_chann); return; } else { FLOPPY_ERROR("dma_mode=%d direction=%d\n", dma_mode, direction); diff --git a/hw/isa.h b/hw/isa.h index aaf0272..9681de1 100644 --- a/hw/isa.h +++ b/hw/isa.h @@ -40,8 +40,7 @@ int DMA_read_memory (int nchan, void *buf, int pos, int size); int DMA_write_memory (int nchan, void *buf, int pos, int size); void DMA_hold_DREQ (int nchan); void DMA_release_DREQ (int nchan); -void DMA_schedule(int nchan); -void DMA_init(int high_page_enable, qemu_irq *cpu_request_exit); +void DMA_init(int high_page_enable); void DMA_register_channel (int nchan, DMA_transfer_handler transfer_handler, void *opaque); diff --git a/hw/mips_jazz.c b/hw/mips_jazz.c index ead3a00..6e0ec8f 100644 --- a/hw/mips_jazz.c +++ b/hw/mips_jazz.c @@ -114,15 +114,6 @@ static void audio_init(qemu_irq
Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback
Blue Swirl wrote: This is - according to my current understanding - the proposed alternative architecture: .---. | de-coalescing | | logic | '---' ^ | period,| |IRQ coalesced| |(or tuned VM clock) (yes/no)| v .---. .. .---. | RTC |-IRQ->| router |-IRQ>| APIC | '---' '' '---' |^| ^ ||| | '---period---''--period---' >>> The period information is already known by the higher level clock >>> management, >> The timer device models program the next event of some qemu-timer. There >> is no tag attached explaining the timer subsystem or anyone else the >> reason behind this programming. > > Yes, but why would we care? All timers in the system besides RTC > should be affected likewise, this would in fact be the benefit from > handling the problem at higher level. And how does your equation for calculating the clock slow-down look like? Jan signature.asc Description: OpenPGP digital signature
Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback
Gleb Natapov wrote: > On Fri, May 28, 2010 at 08:06:45PM +, Blue Swirl wrote: >> 2010/5/28 Gleb Natapov : >>> On Thu, May 27, 2010 at 06:37:10PM +, Blue Swirl wrote: 2010/5/27 Gleb Natapov : > On Wed, May 26, 2010 at 08:35:00PM +, Blue Swirl wrote: >> On Wed, May 26, 2010 at 8:09 PM, Jan Kiszka wrote: >>> Blue Swirl wrote: On Tue, May 25, 2010 at 9:44 PM, Jan Kiszka wrote: > Anthony Liguori wrote: >> On 05/25/2010 02:09 PM, Blue Swirl wrote: >>> On Mon, May 24, 2010 at 8:13 PM, Jan Kiszka >>> wrote: >>> From: Jan Kiszka This allows to communicate potential IRQ coalescing during delivery from the sink back to the source. Targets that support IRQ coalescing workarounds need to register handlers that return the appropriate QEMU_IRQ_* code, and they have to propergate the code across all IRQ redirections. If the IRQ source receives a QEMU_IRQ_COALESCED, it can apply its workaround. If multiple sinks exist, the source may only consider an IRQ coalesced if all other sinks either report QEMU_IRQ_COALESCED as well or QEMU_IRQ_MASKED. >>> No real devices are interested whether any of their output lines are >>> even connected. This would introduce a new signal type, >>> bidirectional >>> multi-level, which is not correct. >>> >> I don't think it's really an issue of correct, but I wouldn't >> disagree >> to a suggestion that we ought to introduce a new signal type for this >> type of bidirectional feedback. Maybe it's qemu_coalesced_irq and >> has a >> similar interface as qemu_irq. > A separate type would complicate the delivery of the feedback value > across GPIO pins (as Paul requested for the RTC->HPET routing). > >>> I think the real solution to coalescing is put the logic inside one >>> device, in this case APIC because it has the information about irq >>> delivery. APIC could monitor incoming RTC irqs for frequency >>> information and whether they get delivered or not. If not, an >>> internal >>> timer is installed which injects the lost irqs. > That won't fly as the IRQs will already arrive at the APIC with a > sufficiently high jitter. At the bare minimum, you need to tell the > interrupt controller about the fact that a particular IRQ should be > delivered at a specific regular rate. For this, you also need a > generic > interface - nothing really "won". OK, let's simplify: just reinject at next possible chance. No need to monitor or tell anything. >>> There are guests that won't like this (I know of one in-house, but >>> others may even have more examples), specifically if you end up firing >>> multiple IRQs in a row due to a longer backlog. For that reason, the RTC >>> spreads the reinjection according to the current rate. >> Then reinject with a constant delay, or next CPU exit. Such buggy > If guest's time frequency is the same as host time frequency you can't > reinject with constant delay. That is why current code mixes two > approaches: reinject M interrupts in a raw then delay. This approach can be also used by APIC-only version. >>> I don't know what APIC-only version you are talking about. I haven't >>> seen the code and I don't understand hand waving, sorry. >> There is no code, because we're still at architecture design stage. >> > Try to write test code to understand the problem better. > >> guests could also be assisted with special handling (like win2k >> install hack), for example guest instructions could be counted >> (approximately, for example using TB size or TSC) and only inject >> after at least N instructions have passed. > Guest instructions cannot be easily counted in KVM (it can be done more > or less reliably using perf counters, may be). Aren't there any debug registers or perf counters, which can generate an interrupt after some number of instructions have been executed? >>> Don't think debug registers have something like that and they are >>> available for guest use anyway. Perf counters differs greatly from CPU >>> to CPU (even between two CPUs of the same manufacturer), and we want to >>> keep using them for profiling guests. And I don't see what problem it >>> will solve anyway that can be solved by simple delay between irq >>> reinjection. >> This would allow counting the executed instructions and limit it. Thus >> we could emulate a 500MHz CPU on a 2GHz CPU more accurately. >> > Why would you want to limit number of instruction executed by guest if > CPU has nothing else to do anyway? The problem occurs not
[Qemu-devel] [PATCH 0/3] [RFC] Threaded vnc server
Hi, This series add a threaded VNC server and should be applied on top on my previous patch set (adding tight encoding). The first two patchs add some functions to qemu-thread. The last is the threaded VNC server and the changelog explains how it works. refs: http://xf.iksaif.net/blog/index.php?post/2010/05/28/QEMU%3A-Threaded-VNC-Server-results Thanks Corentin Chary (3): qemu-thread: add qemu_mutex/cond_destroy and qemu_mutex_exit qemu-thread: add cleanup_push() and cleanup_pop() vnc: threaded VNC server Makefile|4 + Makefile.objs |7 +- configure | 13 ++ qemu-thread.c | 22 qemu-thread.h |8 ++ vnc-jobs-sync.c | 70 vnc-jobs.c | 328 +++ vnc.c | 161 +++ vnc.h | 73 9 files changed, 663 insertions(+), 23 deletions(-) create mode 100644 vnc-jobs-sync.c create mode 100644 vnc-jobs.c
Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback
Blue Swirl wrote: > 2010/5/29 Jan Kiszka : >> Gleb Natapov wrote: >>> On Fri, May 28, 2010 at 08:06:45PM +, Blue Swirl wrote: 2010/5/28 Gleb Natapov : > On Thu, May 27, 2010 at 06:37:10PM +, Blue Swirl wrote: >> 2010/5/27 Gleb Natapov : >>> On Wed, May 26, 2010 at 08:35:00PM +, Blue Swirl wrote: On Wed, May 26, 2010 at 8:09 PM, Jan Kiszka wrote: > Blue Swirl wrote: >> On Tue, May 25, 2010 at 9:44 PM, Jan Kiszka >> wrote: >>> Anthony Liguori wrote: On 05/25/2010 02:09 PM, Blue Swirl wrote: > On Mon, May 24, 2010 at 8:13 PM, Jan Kiszka > wrote: > >> From: Jan Kiszka >> >> This allows to communicate potential IRQ coalescing during >> delivery from >> the sink back to the source. Targets that support IRQ coalescing >> workarounds need to register handlers that return the appropriate >> QEMU_IRQ_* code, and they have to propergate the code across all >> IRQ >> redirections. If the IRQ source receives a QEMU_IRQ_COALESCED, >> it can >> apply its workaround. If multiple sinks exist, the source may >> only >> consider an IRQ coalesced if all other sinks either report >> QEMU_IRQ_COALESCED as well or QEMU_IRQ_MASKED. >> > No real devices are interested whether any of their output lines > are > even connected. This would introduce a new signal type, > bidirectional > multi-level, which is not correct. > I don't think it's really an issue of correct, but I wouldn't disagree to a suggestion that we ought to introduce a new signal type for this type of bidirectional feedback. Maybe it's qemu_coalesced_irq and has a similar interface as qemu_irq. >>> A separate type would complicate the delivery of the feedback value >>> across GPIO pins (as Paul requested for the RTC->HPET routing). >>> > I think the real solution to coalescing is put the logic inside > one > device, in this case APIC because it has the information about irq > delivery. APIC could monitor incoming RTC irqs for frequency > information and whether they get delivered or not. If not, an > internal > timer is installed which injects the lost irqs. >>> That won't fly as the IRQs will already arrive at the APIC with a >>> sufficiently high jitter. At the bare minimum, you need to tell the >>> interrupt controller about the fact that a particular IRQ should be >>> delivered at a specific regular rate. For this, you also need a >>> generic >>> interface - nothing really "won". >> OK, let's simplify: just reinject at next possible chance. No need to >> monitor or tell anything. > There are guests that won't like this (I know of one in-house, but > others may even have more examples), specifically if you end up firing > multiple IRQs in a row due to a longer backlog. For that reason, the > RTC > spreads the reinjection according to the current rate. Then reinject with a constant delay, or next CPU exit. Such buggy >>> If guest's time frequency is the same as host time frequency you can't >>> reinject with constant delay. That is why current code mixes two >>> approaches: reinject M interrupts in a raw then delay. >> This approach can be also used by APIC-only version. >> > I don't know what APIC-only version you are talking about. I haven't > seen the code and I don't understand hand waving, sorry. There is no code, because we're still at architecture design stage. >>> Try to write test code to understand the problem better. >>> guests could also be assisted with special handling (like win2k install hack), for example guest instructions could be counted (approximately, for example using TB size or TSC) and only inject after at least N instructions have passed. >>> Guest instructions cannot be easily counted in KVM (it can be done more >>> or less reliably using perf counters, may be). >> Aren't there any debug registers or perf counters, which can generate >> an interrupt after some number of instructions have been executed? > Don't think debug registers have something like that and they are > available for guest use anyway. Perf counters differs greatly from CPU > to CPU (even between two CPUs of the same manufacturer), and we want to > keep using them for profiling guests. And I don't see what problem it > will solve anyway that can be solved by simple delay betwee
Re: [Qemu-devel] [PATCH v2 02/15] qdev: Fix scanning across single-bus devices
Markus Armbruster wrote: > [cc: kraxel] > > Jan Kiszka writes: > >> From: Jan Kiszka >> >> As long as we allow /dev.1 as shortcut for /dev1/bus1, we also have to >> make sure that /dev1/dev2 works for /dev1/bus1/dev2/bus2 - as long as >> there is only one child bus per device. > > We auto-root a path not starting with '/' via convention "first > component is ID then" (I like that). We auto-complete a path ending > with a device when we want a bus (a bit too clever). Auto-inserting bus > names in the middle is too clever by half! > > Such cleverness can be convenient in the human monitor, but all it adds > to QMP is complexity. > > I'm worried about possibly ambigous interpretation of paths. Would be > bad if a path could name different node after we add something to the > tree. I *think* your fine print makes that impossible, but it's not > exactly obvious. Heck, I could be wrong. For the above example, that would mean a bus called 'dev2' would have to be added to dev1. If that makes sense is one question. The other is why this should be more broken than the case that this happens to a bus specified at the end of some path? > > Wouldn't it be better to put the convenience into the interactive > completion function? That way we keep it out of QMP. That would leave user interfaces over QMP broken behind, or at least complicate their implementations as they would have to expand the qtree path on their own to achieve consistency. > > Subject is misleading, it's a feature, not a bug fix. Depends on the POV. For me it is a highly confusing inconsistency in the way you can specify qtree paths. IMHO, we either have to drop this path abbreviation or apply it to the whole path. As it's intention is (according to my understanding) to ease the usage, an unintuitive construction role is fairly counterproductive. Jan signature.asc Description: OpenPGP digital signature
Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback
On Sat, May 29, 2010 at 9:45 AM, Jan Kiszka wrote: > Blue Swirl wrote: >> 2010/5/29 Jan Kiszka : >>> Gleb Natapov wrote: On Fri, May 28, 2010 at 08:06:45PM +, Blue Swirl wrote: > 2010/5/28 Gleb Natapov : >> On Thu, May 27, 2010 at 06:37:10PM +, Blue Swirl wrote: >>> 2010/5/27 Gleb Natapov : On Wed, May 26, 2010 at 08:35:00PM +, Blue Swirl wrote: > On Wed, May 26, 2010 at 8:09 PM, Jan Kiszka wrote: >> Blue Swirl wrote: >>> On Tue, May 25, 2010 at 9:44 PM, Jan Kiszka >>> wrote: Anthony Liguori wrote: > On 05/25/2010 02:09 PM, Blue Swirl wrote: >> On Mon, May 24, 2010 at 8:13 PM, Jan Kiszka >> wrote: >> >>> From: Jan Kiszka >>> >>> This allows to communicate potential IRQ coalescing during >>> delivery from >>> the sink back to the source. Targets that support IRQ coalescing >>> workarounds need to register handlers that return the >>> appropriate >>> QEMU_IRQ_* code, and they have to propergate the code across >>> all IRQ >>> redirections. If the IRQ source receives a QEMU_IRQ_COALESCED, >>> it can >>> apply its workaround. If multiple sinks exist, the source may >>> only >>> consider an IRQ coalesced if all other sinks either report >>> QEMU_IRQ_COALESCED as well or QEMU_IRQ_MASKED. >>> >> No real devices are interested whether any of their output lines >> are >> even connected. This would introduce a new signal type, >> bidirectional >> multi-level, which is not correct. >> > I don't think it's really an issue of correct, but I wouldn't > disagree > to a suggestion that we ought to introduce a new signal type for > this > type of bidirectional feedback. Maybe it's qemu_coalesced_irq > and has a > similar interface as qemu_irq. A separate type would complicate the delivery of the feedback value across GPIO pins (as Paul requested for the RTC->HPET routing). >> I think the real solution to coalescing is put the logic inside >> one >> device, in this case APIC because it has the information about >> irq >> delivery. APIC could monitor incoming RTC irqs for frequency >> information and whether they get delivered or not. If not, an >> internal >> timer is installed which injects the lost irqs. That won't fly as the IRQs will already arrive at the APIC with a sufficiently high jitter. At the bare minimum, you need to tell the interrupt controller about the fact that a particular IRQ should be delivered at a specific regular rate. For this, you also need a generic interface - nothing really "won". >>> OK, let's simplify: just reinject at next possible chance. No need >>> to >>> monitor or tell anything. >> There are guests that won't like this (I know of one in-house, but >> others may even have more examples), specifically if you end up >> firing >> multiple IRQs in a row due to a longer backlog. For that reason, the >> RTC >> spreads the reinjection according to the current rate. > Then reinject with a constant delay, or next CPU exit. Such buggy If guest's time frequency is the same as host time frequency you can't reinject with constant delay. That is why current code mixes two approaches: reinject M interrupts in a raw then delay. >>> This approach can be also used by APIC-only version. >>> >> I don't know what APIC-only version you are talking about. I haven't >> seen the code and I don't understand hand waving, sorry. > There is no code, because we're still at architecture design stage. > Try to write test code to understand the problem better. > guests could also be assisted with special handling (like win2k > install hack), for example guest instructions could be counted > (approximately, for example using TB size or TSC) and only inject > after at least N instructions have passed. Guest instructions cannot be easily counted in KVM (it can be done more or less reliably using perf counters, may be). >>> Aren't there any debug registers or perf counters, which can generate >>> an interrupt after some number of instructions have been executed? >> Don't think debug registers have something like that and they are >> available for guest use anyway. Perf counters differs greatly from CPU >> to CPU (eve
[Qemu-devel] [PATCH 3/3] vnc: threaded VNC server
Implement a threaded VNC server using the producer-consumer model. The main thread will push encoding jobs (a list a rectangles to update) in a queue, and the VNC worker thread will consume that queue and send framebuffer updates to the output buffer. There is three levels of locking: - jobs queue lock: for each operation on the queue (push, pop, isEmpty?) - VncState global lock: mainly used for framebuffer updates to avoid screen corruption if the framebuffer is updated while the worker threaded is doing something. - VncState::output lock: used to make sure the output buffer is not corrupted if two threads try to write on it at the same time While the VNC worker thread is working, the VncState global lock is hold to avoid screen corruptions (this block vnc_refresh() for a short time) but the output lock is not hold because the thread work on its own output buffer. When the encoding job is done, the worker thread will hold the output lock and copy its output buffer in vs->output. The threaded VNC server can be enabled with ./configure --enable-vnc-thread. If you don't want it, just use ./configure --disable-vnc-thread and a syncrhonous queue of job will be used (which as exactly the same behavior as the old queue). If you disable the VNC thread, all thread related code will not be built and there will be no overhead. Signed-off-by: Corentin Chary --- Makefile|4 + Makefile.objs |7 +- configure | 13 ++ vnc-jobs-sync.c | 70 vnc-jobs.c | 328 +++ vnc.c | 161 +++ vnc.h | 73 7 files changed, 633 insertions(+), 23 deletions(-) create mode 100644 vnc-jobs-sync.c create mode 100644 vnc-jobs.c diff --git a/Makefile b/Makefile index b264e38..d870900 100644 --- a/Makefile +++ b/Makefile @@ -126,6 +126,10 @@ vnc-encoding-hextile.o: vnc-encoding-hextile.c vnc.h vnc-encoding-tight.o: vnc-encoding-tight.c vnc.h vnc-encoding-tight.h +vnc-jobs.o: vnc-jobs.c vnc.h + +vnc-jobs-sync.o: vnc-jobs-sync.c vnc.h + curses.o: curses.c keymaps.h curses_keys.h bt-host.o: QEMU_CFLAGS += $(BLUEZ_CFLAGS) diff --git a/Makefile.objs b/Makefile.objs index 070ee09..6534214 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -108,8 +108,13 @@ common-obj-y += vnc-encoding-tight.o common-obj-y += iov.o common-obj-$(CONFIG_VNC_TLS) += vnc-tls.o vnc-auth-vencrypt.o common-obj-$(CONFIG_VNC_SASL) += vnc-auth-sasl.o +ifdef CONFIG_VNC_THREAD +common-obj-y += vnc-jobs.o +else +common-obj-y += vnc-jobs-sync.o +endif common-obj-$(CONFIG_COCOA) += cocoa.o -common-obj-$(CONFIG_IOTHREAD) += qemu-thread.o +common-obj-$(CONFIG_THREAD) += qemu-thread.o common-obj-y += notify.o event_notifier.o common-obj-y += qemu-timer.o diff --git a/configure b/configure index 3cd2c5f..e886b67 100755 --- a/configure +++ b/configure @@ -263,6 +263,7 @@ uuid="" vde="" vnc_tls="" vnc_sasl="" +vnc_thread="" xen="" linux_aio="" vhost_net="" @@ -547,6 +548,10 @@ for opt do ;; --enable-vnc-sasl) vnc_sasl="yes" ;; + --disable-vnc-thread) vnc_thread="no" + ;; + --enable-vnc-thread) vnc_thread="yes" + ;; --disable-slirp) slirp="no" ;; --disable-uuid) uuid="no" @@ -779,6 +784,8 @@ echo " --disable-vnc-tlsdisable TLS encryption for VNC server" echo " --enable-vnc-tls enable TLS encryption for VNC server" echo " --disable-vnc-sasl disable SASL encryption for VNC server" echo " --enable-vnc-saslenable SASL encryption for VNC server" +echo " --disable-vnc-thread disable threaded VNC server" +echo " --enable-vnc-thread enable threaded VNC server" echo " --disable-curses disable curses output" echo " --enable-curses enable curses output" echo " --disable-curl disable curl connectivity" @@ -2019,6 +2026,7 @@ echo "Block whitelist $block_drv_whitelist" echo "Mixer emulation $mixemu" echo "VNC TLS support $vnc_tls" echo "VNC SASL support $vnc_sasl" +echo "VNC thread$vnc_thread" if test -n "$sparc_cpu"; then echo "Target Sparc Arch $sparc_cpu" fi @@ -2158,6 +2166,10 @@ if test "$vnc_sasl" = "yes" ; then echo "CONFIG_VNC_SASL=y" >> $config_host_mak echo "VNC_SASL_CFLAGS=$vnc_sasl_cflags" >> $config_host_mak fi +if test "$vnc_thread" = "yes" ; then + echo "CONFIG_VNC_THREAD=y" >> $config_host_mak + echo "CONFIG_THREAD=y" >> $config_host_mak +fi if test "$fnmatch" = "yes" ; then echo "CONFIG_FNMATCH=y" >> $config_host_mak fi @@ -2234,6 +2246,7 @@ if test "$xen" = "yes" ; then fi if test "$io_thread" = "yes" ; then echo "CONFIG_IOTHREAD=y" >> $config_host_mak + echo "CONFIG_THREAD=y" >> $config_host_mak fi if test "$linux_aio" = "yes" ; then echo "CONFIG_LINUX_AIO=y" >> $config_host_mak diff --git a/vnc-jobs-sync.c b/vnc-jobs-sync.c new file mode 100644 index 000..bcc6d6d --- /d
Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback
Blue Swirl wrote: >>> On the contrary, APIC is actually the only source of the IRQ ack >>> information. RTC hack would not work without APIC (or the >>> bidirectional IRQ) passing this info to RTC. >>> >>> What APIC doesn't have now is the timer frequency or period info. This >>> is known by RTC and also higher levels managing the clocks. >>> >> So APIC has one bit of information and RTC everything else. > > The information known by RTC (timer period) is also known by higher levels. Curious to see where you'll find this. > >> The current >> approach (and proposed patch) brings this one bit of information to RTC, >> you are arguing that RTC should be able to communicate all its info to >> APIC. Sorry I don't see that your way has any advantage. Just more >> complex interface and it is much easier to get it wrong for other time >> sources. > > I don't think anymore that APIC should be handling this but the > generic stuff, like vl.c or exec.c. Then there would be only > information passing from APIC to higher levels. You neglect the the information required to associate a periodic source (e.g. RTC) with an IRQ sink (e.g. APIC). Without that, you will have a hard time figuring out if a reported IRQ coalescing requires any activities or should simply be welcomed (for I/O IRQs). > >>> I keep ignoring the idea that the current model, where both RTC and >>> APIC must somehow work together to make coalescing work, is the only >>> possible just because it is committed and it happens to work in some >>> cases. It would be much better to concentrate this to one place, APIC >>> or preferably higher level where it may benefit other timers too. >>> Provided of course that the other models can be made to work. >>> >> So write the code and show us. You haven't show any evidence that RTC is >> the wrong place. RTC knows when interrupt was acknowledge to RTC, it >> know when clock frequency changes, it know when device reset happened. >> APIC knows only that interrupt was coalesced. It doesn't even know that >> it may be masked by a guest in IOAPIC (interrupts delivered while they >> are masked not considered coalesced). > > Oh, I thought interrupt masking was the reason for coalescing! What > exactly is the reason then? Missing acks, ie. the IRQ is still pending when the next one arrives. You want to filter out masked/suppressed IRQs to avoid running the de-coalescing logic on sources that are actually cut off (like the RTC IRQ when the HPET took over). > >> Time source knows only when >> frequency changes and may be when device reset happens if timer is >> stopped by device on reset. So RTC is actually a sweet spot if you want >> to minimize amount of info you need to pass between various layers. >> > Maybe that version would not bend backwards as much as the current to > cater for buggy hosts. > You mean "buggy guests"? >>> Yes, sorry. >>> What guests are not buggy in your opinion? Linux tries hard to be smart and as a result the only way to have stable clock with it is to go paravirt. >>> I'm not an OS designer, but I think an OS should never crash, even if >>> a burst of IRQs is received. Reprogramming the timer should consider >>> the pending IRQ situation (0 or 1 with real HW). Those bugs are one >>> cause of the problem. >> OS should never crash in the absence of HW bugs? I doubt you can design >> an OS that can run in a face of any HW failure. Anyway here we are >> trying to solve guests time keeping problem not crashes. Do you think >> you can design OS that can keep time accurately no matter how crazy all >> HW clock behaves? > > I think my OS design skills are not relevant in this discussion, but > IIRC there are fault tolerant operating systems for extreme conditions > so it can be done. No one can influence the design of released OS versions anymore. > >> The fact is that timer device is not "just like any >> other device" in virtual world. Any other device is easy: you just >> implement spec as close as possible and everything works. For time >> source device this is not enough. You can implement RTC+HPET to the >> letter and your guest will drift like crazy. > It's doable: a cycle accurate emulator will not cause any drift, > without any voodoo. The interrupts would come after executing the same > instruction as the real HW. For emulating any sufficiently buggy > guests in any sufficiently desperate low resource conditions, this may > be the only option that will always work. > Yes, but qemu and kvm are not cycle accurate emulators and don't strive to be one. On the contrary KVM runs at native host CPU speed most of the time, so any emulation done between two instruction is theoretically noticeable for a guest. TSC is bypassed directly to a guest too, so keeping all time source in perfect sync is also impossible. >>> That is actually another cause of the problem. KVM gives the guest an >>> ill
Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback
2010/5/29 Jan Kiszka : > Gleb Natapov wrote: >> On Fri, May 28, 2010 at 08:06:45PM +, Blue Swirl wrote: >>> 2010/5/28 Gleb Natapov : On Thu, May 27, 2010 at 06:37:10PM +, Blue Swirl wrote: > 2010/5/27 Gleb Natapov : >> On Wed, May 26, 2010 at 08:35:00PM +, Blue Swirl wrote: >>> On Wed, May 26, 2010 at 8:09 PM, Jan Kiszka wrote: Blue Swirl wrote: > On Tue, May 25, 2010 at 9:44 PM, Jan Kiszka wrote: >> Anthony Liguori wrote: >>> On 05/25/2010 02:09 PM, Blue Swirl wrote: On Mon, May 24, 2010 at 8:13 PM, Jan Kiszka wrote: > From: Jan Kiszka > > This allows to communicate potential IRQ coalescing during > delivery from > the sink back to the source. Targets that support IRQ coalescing > workarounds need to register handlers that return the appropriate > QEMU_IRQ_* code, and they have to propergate the code across all > IRQ > redirections. If the IRQ source receives a QEMU_IRQ_COALESCED, it > can > apply its workaround. If multiple sinks exist, the source may only > consider an IRQ coalesced if all other sinks either report > QEMU_IRQ_COALESCED as well or QEMU_IRQ_MASKED. > No real devices are interested whether any of their output lines are even connected. This would introduce a new signal type, bidirectional multi-level, which is not correct. >>> I don't think it's really an issue of correct, but I wouldn't >>> disagree >>> to a suggestion that we ought to introduce a new signal type for >>> this >>> type of bidirectional feedback. Maybe it's qemu_coalesced_irq and >>> has a >>> similar interface as qemu_irq. >> A separate type would complicate the delivery of the feedback value >> across GPIO pins (as Paul requested for the RTC->HPET routing). >> I think the real solution to coalescing is put the logic inside one device, in this case APIC because it has the information about irq delivery. APIC could monitor incoming RTC irqs for frequency information and whether they get delivered or not. If not, an internal timer is installed which injects the lost irqs. >> That won't fly as the IRQs will already arrive at the APIC with a >> sufficiently high jitter. At the bare minimum, you need to tell the >> interrupt controller about the fact that a particular IRQ should be >> delivered at a specific regular rate. For this, you also need a >> generic >> interface - nothing really "won". > OK, let's simplify: just reinject at next possible chance. No need to > monitor or tell anything. There are guests that won't like this (I know of one in-house, but others may even have more examples), specifically if you end up firing multiple IRQs in a row due to a longer backlog. For that reason, the RTC spreads the reinjection according to the current rate. >>> Then reinject with a constant delay, or next CPU exit. Such buggy >> If guest's time frequency is the same as host time frequency you can't >> reinject with constant delay. That is why current code mixes two >> approaches: reinject M interrupts in a raw then delay. > This approach can be also used by APIC-only version. > I don't know what APIC-only version you are talking about. I haven't seen the code and I don't understand hand waving, sorry. >>> There is no code, because we're still at architecture design stage. >>> >> Try to write test code to understand the problem better. >> >>> guests could also be assisted with special handling (like win2k >>> install hack), for example guest instructions could be counted >>> (approximately, for example using TB size or TSC) and only inject >>> after at least N instructions have passed. >> Guest instructions cannot be easily counted in KVM (it can be done more >> or less reliably using perf counters, may be). > Aren't there any debug registers or perf counters, which can generate > an interrupt after some number of instructions have been executed? Don't think debug registers have something like that and they are available for guest use anyway. Perf counters differs greatly from CPU to CPU (even between two CPUs of the same manufacturer), and we want to keep using them for profiling guests. And I don't see what problem it will solve anyway that can be solved by simple delay between irq reinjection. >>> This would allow counting the executed instructions and limit it. Thus >>> we could emulate a 500MHz CPU on a 2GHz CPU more
Re: [Qemu-devel] [PATCH] Name the default PCI bus "pci.0" on all architectures
Paul Brook writes: >> The system emulators for each arch are using inconsistent >> naming for the default PCI bus "pci" vs "pci.0". Since it >> is conceivable we'll have multiple PCI buses in the future >> standardize on "pci.0" for all architectures. This ensures >> mgmt apps can rely on a name when assigning PCI devices an >> address on the bus using eg '-device e1000,bus=pci.0,addr=3' > > No. Bus names are local to the parent device. None of the host bridges > support multiple bridges, so the ".0" suffix makes no sense. The parent > device has no idea whether it owns the "default" pci bus or not. > If you have multiple PCI busses then you can identify them by the device path. >From qbus_create_inplace(): if (name) { /* use supplied name */ bus->name = qemu_strdup(name); } else if (parent && parent->id) { /* parent device has id -> use it for bus name */ len = strlen(parent->id) + 16; buf = qemu_malloc(len); snprintf(buf, len, "%s.%d", parent->id, parent->num_child_bus); bus->name = buf; } else { /* no id -> use lowercase bus type for bus name */ len = strlen(info->name) + 16; buf = qemu_malloc(len); len = snprintf(buf, len, "%s.%d", info->name, parent ? parent->num_child_bus : 0); for (i = 0; i < len; i++) buf[i] = qemu_tolower(buf[i]); bus->name = buf; } If appending ".0" really makes no sense when the device has just one bus, then we shouldn't append it in cases 2 & 3. But I'd simply append it always. One bus is just as countable as many.
Re: [Qemu-devel] [PATCH] qemu-io: Fix error messages
On Fri, May 28, 2010 at 08:15:04PM +0200, Kevin Wolf wrote: > The truncate and getlength commands passed a negative error number to > strerror. > They also happen to be the two functions that are lacking a newline at the end > of their error message. > > Signed-off-by: Kevin Wolf Ok, Reviewed-by: Christoph Hellwig
Re: [Qemu-devel] cg14
On Sat, May 29, 2010 at 5:15 AM, Bob Breuer wrote: > Artyom Tarasenko wrote: >> 2010/5/28 Blue Swirl : >> >>> On Fri, May 28, 2010 at 7:54 AM, Bob Breuer wrote: >>> Artyom Tarasenko wrote: > 2010/5/27 Bob Breuer : > > >> Artyom Tarasenko wrote: >> >> >>> Was going to put some more empty slots into SS-10/20 (VSIMMs, SX) >>> after we are done with SS-5 (due to technical limitations I can switch >>> access from one real SS model to another one once a few days only). >>> >>> >>> >> I have a partial implementation of the SS-20 VSIMM (cg14) that I've been >> working on. With the Sun firmware, I have working text console, color >> boot logo, and programmable video resolutions up to 1600x1280. >> >> > Great news! This would allow qemu booting NeXTStep! Are you planning > to submit the patches any time soon? > > > It's not in a state to be submitted yet, but I've attached a working patch if you want to give it a try. I need to hook it up to qdev and fill in some more of the obviously incomplete switch cases before I'd sign off on it. >>> Nice work. I have a few comments below. >>> >>> This probably needs support from OpenBIOS to be usable without OBP. >>> >> >> Maybe it can be used as a second adapter without OpenBIOS support? At >> least under some OSes? >> >> > Probably won't be used without at least being in the firmware device > tree. One area that OpenBIOS could enhance would be a larger memory > size option. The real hardware was only available in 4M and 8M options, > but the memory map allows for 16M. OBP will identify a 16M VSIMM but > won't do anything else with it, and with 16M of vram it would allow for > a potential 2560x1600 32bit resolution. Awesome, PC with Cirrus VGA can't compete. Bob diff --git a/Makefile.target b/Makefile.target index fda5bf3..b17b3af 100644 --- a/Makefile.target +++ b/Makefile.target @@ -250,6 +250,7 @@ else obj-sparc-y = sun4m.o lance.o tcx.o sun4m_iommu.o slavio_intctl.o obj-sparc-y += slavio_timer.o slavio_misc.o sparc32_dma.o obj-sparc-y += cs4231.o eccmemctl.o sbi.o sun4c_intctl.o +obj-sparc-y += cg14.o endif obj-arm-y = integratorcp.o versatilepb.o arm_pic.o arm_timer.o diff --git a/hw/sun4m.c b/hw/sun4m.c index 7ba0f76..8b23c9b 100644 --- a/hw/sun4m.c +++ b/hw/sun4m.c @@ -864,6 +864,13 @@ static void sun4m_hw_init(const struct sun4m_hwdef *hwdef, ram_addr_t RAM_size, fprintf(stderr, "qemu: Unsupported depth: %d\n", graphic_depth); exit (1); } + if (hwdef->machine_id == 65) { /* SS-20 */ >>> hwdef structure should contain a field for cg14. If non-zero, install >>> cg14. Was cg14 only available for SS-20? Was it always included? This >>> is also interesting for OpenBIOS, we need to detect cg14 vs. TCX. >>> > The cg14 was only an option for SS-20 and the rare SS-10SX, but not the > regular SS-10, though the SS-10 chipset may have been capable of > supporting it. Each cg14 vsimm takes the place of a stick of memory > with 2 slots physically capable of holding a vsimm. > > Is there a way to pass the framebuffer type and/or address to OpenBIOS? Currently there's fw_cfg and we can also set NVRAM variables. > I would be inclined to have the SS-20 machine default to cg14 instead of > TCX, but it will blow a hole in the support of more than 2G of emulated > system ram. I'm not so worried about the RAM hole, the real machines didn't have so much RAM and we can also always add more RAM at some other location. But the machine default is a bit of a problem because we would not be compatible to old versions. Adding a new machine for each new configuration does not sound viable in longer term either. In this case it makes a lot of sense for SS-10SX and we could deprecate SS-20 with TCX. A generic solution for major device changes could be to pass a device tree (FDT), like some KVM PPC machines do. Another solution, which would give most realistic emulation, would be to add FCode ROMs which is used to probe and add the devices by OBP or OpenBIOS. + /* cg14.c */ + void cg14_init(target_phys_addr_t ctrl_base, target_phys_addr_t vram_base, + uint32_t vram_size); >>> This should go to sun4m.h or cg14.h. >>> >>> + + cg14_init(0x09c00ULL, 0x0fc00ULL, 8<<20); + } else >>> Please add braces and reindent. >>> >>> tcx_init(hwdef->tcx_base, 0x0010, graphic_width, graphic_height, graphic_depth); --- /dev/null Fri May 28 02:08:36 2010 +++ hw/cg14.c Fri May 28 01:58:49 2010 @@ -0,0 +1,785 @@ +/* + * QEMU CG14 Frame buffer + * + * Copyright (c) 2010 Bob Breuer + * + * Permission is hereby granted, free of charge, to any person obtaining a copy >>
Re: [Qemu-devel] Re: [PATCH v3 13/17] monitor: Allow to exclude commands from QMP
Luiz Capitulino writes: > On Sun, 23 May 2010 12:59:26 +0200 > Jan Kiszka wrote: > >> From: Jan Kiszka >> >> Ported commands that are marked 'user_only' will not be considered for >> QMP monitor sessions. This allows to implement new commands that do not >> (yet) provide a sufficiently stable interface for QMP use (e.g. >> device_show). > > This is fine for me, but two things I've been wondering: > > 1. Isn't a 'flags' struct member better? So that we can do (in the > qemu-monitor.hx entry): > > .flags = MONITOR_USER_ONLY | MONITOR_HANDLER_ASYNC, > > I'm not suggesting this is an async handler, just exemplifying multiple > flags. We also have at least one command that makes only sense in QMP: qmp_capabilities. Maybe we could use separate flags controlling command availability in human monitor and QMP. [...]
[Qemu-devel] Re: [PATCH v3 0/3]: QMP: Commands doc
Luiz Capitulino wrote: > This new version moves the documentation to qemu-monitor.hx and now > QMP/qmp-commands.txt is generated from there (thanks Jan!). > > I hope I've addressed all review comments in this version and now it should > describe reality. > > Next step is to fix glitches (after this series is merged, of course). This is a fragile series, breaking quickly when something changes in qemu-monitor.hx (as just happened). Can we get this rebased and merged ASAP? Thanks, Jan > > changelog > - > > v2 -> v3 > > > - Rebased > - Addressed review comments > - Move contents to qemu-monitor.hx (Jan) > > v1 -> v2 > > > - Rebased > - Addressed Markus's comments > - Changed indentation style > - Minor fixes and clarifications > > signature.asc Description: OpenPGP digital signature
Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback
2010/5/28 Gleb Natapov : > On Fri, May 28, 2010 at 08:06:45PM +, Blue Swirl wrote: >> 2010/5/28 Gleb Natapov : >> > On Thu, May 27, 2010 at 06:37:10PM +, Blue Swirl wrote: >> >> 2010/5/27 Gleb Natapov : >> >> > On Wed, May 26, 2010 at 08:35:00PM +, Blue Swirl wrote: >> >> >> On Wed, May 26, 2010 at 8:09 PM, Jan Kiszka wrote: >> >> >> > Blue Swirl wrote: >> >> >> >> On Tue, May 25, 2010 at 9:44 PM, Jan Kiszka >> >> >> >> wrote: >> >> >> >>> Anthony Liguori wrote: >> >> >> On 05/25/2010 02:09 PM, Blue Swirl wrote: >> >> >> > On Mon, May 24, 2010 at 8:13 PM, Jan Kiszka >> >> >> > wrote: >> >> >> > >> >> >> >> From: Jan Kiszka >> >> >> >> >> >> >> >> This allows to communicate potential IRQ coalescing during >> >> >> >> delivery from >> >> >> >> the sink back to the source. Targets that support IRQ coalescing >> >> >> >> workarounds need to register handlers that return the >> >> >> >> appropriate >> >> >> >> QEMU_IRQ_* code, and they have to propergate the code across >> >> >> >> all IRQ >> >> >> >> redirections. If the IRQ source receives a QEMU_IRQ_COALESCED, >> >> >> >> it can >> >> >> >> apply its workaround. If multiple sinks exist, the source may >> >> >> >> only >> >> >> >> consider an IRQ coalesced if all other sinks either report >> >> >> >> QEMU_IRQ_COALESCED as well or QEMU_IRQ_MASKED. >> >> >> >> >> >> >> > No real devices are interested whether any of their output lines >> >> >> > are >> >> >> > even connected. This would introduce a new signal type, >> >> >> > bidirectional >> >> >> > multi-level, which is not correct. >> >> >> > >> >> >> I don't think it's really an issue of correct, but I wouldn't >> >> >> disagree >> >> >> to a suggestion that we ought to introduce a new signal type for >> >> >> this >> >> >> type of bidirectional feedback. Maybe it's qemu_coalesced_irq >> >> >> and has a >> >> >> similar interface as qemu_irq. >> >> >> >>> A separate type would complicate the delivery of the feedback value >> >> >> >>> across GPIO pins (as Paul requested for the RTC->HPET routing). >> >> >> >>> >> >> >> > I think the real solution to coalescing is put the logic inside >> >> >> > one >> >> >> > device, in this case APIC because it has the information about >> >> >> > irq >> >> >> > delivery. APIC could monitor incoming RTC irqs for frequency >> >> >> > information and whether they get delivered or not. If not, an >> >> >> > internal >> >> >> > timer is installed which injects the lost irqs. >> >> >> >>> That won't fly as the IRQs will already arrive at the APIC with a >> >> >> >>> sufficiently high jitter. At the bare minimum, you need to tell the >> >> >> >>> interrupt controller about the fact that a particular IRQ should be >> >> >> >>> delivered at a specific regular rate. For this, you also need a >> >> >> >>> generic >> >> >> >>> interface - nothing really "won". >> >> >> >> >> >> >> >> OK, let's simplify: just reinject at next possible chance. No need >> >> >> >> to >> >> >> >> monitor or tell anything. >> >> >> > >> >> >> > There are guests that won't like this (I know of one in-house, but >> >> >> > others may even have more examples), specifically if you end up >> >> >> > firing >> >> >> > multiple IRQs in a row due to a longer backlog. For that reason, the >> >> >> > RTC >> >> >> > spreads the reinjection according to the current rate. >> >> >> >> >> >> Then reinject with a constant delay, or next CPU exit. Such buggy >> >> > If guest's time frequency is the same as host time frequency you can't >> >> > reinject with constant delay. That is why current code mixes two >> >> > approaches: reinject M interrupts in a raw then delay. >> >> >> >> This approach can be also used by APIC-only version. >> >> >> > I don't know what APIC-only version you are talking about. I haven't >> > seen the code and I don't understand hand waving, sorry. >> >> There is no code, because we're still at architecture design stage. >> > Try to write test code to understand the problem better. I will. >> >> >> guests could also be assisted with special handling (like win2k >> >> >> install hack), for example guest instructions could be counted >> >> >> (approximately, for example using TB size or TSC) and only inject >> >> >> after at least N instructions have passed. >> >> > Guest instructions cannot be easily counted in KVM (it can be done more >> >> > or less reliably using perf counters, may be). >> >> >> >> Aren't there any debug registers or perf counters, which can generate >> >> an interrupt after some number of instructions have been executed? >> > Don't think debug registers have something like that and they are >> > available for guest use anyway. Perf counters differs greatly from CPU >> > to CPU (even between two CPUs of the same manufacturer), and we want to >> > keep using them for profilin
Re: [Qemu-devel] Re: [PATCH v2 12/15] monitor: Add basic device state visualization
Markus Armbruster wrote: > Avi Kivity writes: > >> On 05/23/2010 10:57 AM, Jan Kiszka wrote: >>> Avi Kivity wrote: >>> On 05/22/2010 11:18 AM, Jan Kiszka wrote: > From: Jan Kiszka > > This introduces device_show, a monitor command that saves the vmstate of > a qdev device and visualizes it. QMP is also supported. Buffers are cut > after 16 byte by default, but the full content can be requested via > '-f'. To pretty-print sub-arrays, vmstate is extended to store the start > index name. A new qerror is introduced to signal a missing vmstate. And > it comes with documentation. > > + > +Dump a snapshot of the device state. Buffers are cut after 16 bytes > unless > +a full dump is requested. > + > +Arguments: > + > +- "path": the device's qtree path or unique ID (json-string) > > This may be ambiguous. >>> Can your elaborate what precisely is ambiguous? >>> >> Can't the user choose the unique ID so that it aliases an unrelated >> qtree path? >> >> I prefer having mutually exclusive 'path' and 'ref' arguments. > > Don't think that's necessary. If the string starts with '/', it's an > absolute path. Else, it's a relative path rooted at the node with the > ID equal to the first component. 'pci.0' can be both a (badly chosen) ID as well as an abbreviated qtree path. > > Currently breaks down when IDs contain '/', but permitting that is a > bug. There may be more problems; the path lookup code is way too > clever. Indeed. Less can sometimes be more. My impression is that some of the cleverness was motivated by lacking qtree completion for the HMP. Jan signature.asc Description: OpenPGP digital signature
Re: [Qemu-devel] [PATCH v2 03/15] qdev: Allow device addressing via 'driver.instance'
Markus Armbruster wrote: > Jan Kiszka writes: > >> From: Jan Kiszka >> >> Extend qbus_find_dev to allow addressing of devices without an unique id >> via an optional per-bus instance number. The new formats are >> 'driver.instance' and 'alias.instance'. >> >> Signed-off-by: Jan Kiszka >> --- >> docs/qdev-device-use.txt | 12 +++- >> hw/qdev.c| 23 ++- >> 2 files changed, 29 insertions(+), 6 deletions(-) >> >> diff --git a/docs/qdev-device-use.txt b/docs/qdev-device-use.txt >> index 9ac1fa1..5939481 100644 >> --- a/docs/qdev-device-use.txt >> +++ b/docs/qdev-device-use.txt >> @@ -1,6 +1,6 @@ >> = How to convert to -device & friends = >> >> -=== Specifying Bus and Address on Bus === >> +=== Specifying Bus, Address on Bus, and Devices === >> >> In qdev, each device has a parent bus. Some devices provide one or >> more buses for children. You can specify a device's parent bus with >> @@ -24,6 +24,16 @@ Furthermore, if a device only hosts a single bus, the bus >> name can be >> omitted in the path. Example: /i440FX-pcihost/PIIX3 abbreviates >> /i440FX-pcihost/pci.0/PIIX3/isa.0 as none of the buses has siblings. >> >> +Existing devices can be addressed either via a unique ID if it was >> +assigned during creation or via the device tree path: >> + >> +/full_bus_address/driver_name[.instance_number] >> +or >> +abbreviated_bus_address/driver_name[.instance_number] >> + >> +Example: /i440FX-pcihost/pci.0/e1000.2 addresses the second e1000 >> +adapter on the bus 'pci.0'. >> + >> Note: the USB device address can't be controlled at this time. > > "instance number" isn't defined in this document. True, only implicitly via the example. > > I understand the problem you're trying to solve; I've had it myself many > times. But is inventing an instance number the right solution? > > The two e1000 devices already have a perfectly fine unique identifier on > their bus: their bus address. What about recognizing bus addresses in > paths? Requires a suitable restriction on device names and IDs to avoid > ambiguity, say "start with letter". > > qdev currently doesn't abstract bus addresses, but that's fixable. You would also have to specify unique addressing scheme to those buses that do not have it yet. E.g. what should be the address of a ISA bus device? The base of its first ioport range? But if it does not have any as it only injects ISA IRQs? Jan signature.asc Description: OpenPGP digital signature
Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback
2010/5/29 Gleb Natapov : > On Sat, May 29, 2010 at 09:15:11AM +, Blue Swirl wrote: >> >> There is no code, because we're still at architecture design stage. >> >> >> > Try to write test code to understand the problem better. >> >> I will. >> > Please do ASAP. This discussion shows that you don't understand what is the > problem that we are dialing with. Which part of the problem you think I don't understand? >> >> >> >> guests could also be assisted with special handling (like win2k >> >> >> >> install hack), for example guest instructions could be counted >> >> >> >> (approximately, for example using TB size or TSC) and only inject >> >> >> >> after at least N instructions have passed. >> >> >> > Guest instructions cannot be easily counted in KVM (it can be done >> >> >> > more >> >> >> > or less reliably using perf counters, may be). >> >> >> >> >> >> Aren't there any debug registers or perf counters, which can generate >> >> >> an interrupt after some number of instructions have been executed? >> >> > Don't think debug registers have something like that and they are >> >> > available for guest use anyway. Perf counters differs greatly from CPU >> >> > to CPU (even between two CPUs of the same manufacturer), and we want to >> >> > keep using them for profiling guests. And I don't see what problem it >> >> > will solve anyway that can be solved by simple delay between irq >> >> > reinjection. >> >> >> >> This would allow counting the executed instructions and limit it. Thus >> >> we could emulate a 500MHz CPU on a 2GHz CPU more accurately. >> >> >> > Why would you want to limit number of instruction executed by guest if >> > CPU has nothing else to do anyway? The problem occurs not when we have >> > spare cycles so give to a guest, but in opposite case. >> >> I think one problem is that the guest has executed too much compared >> to what would happen with real HW with a lesser CPU. That explains the >> RTC frequency reprogramming case. > You think wrong. The problem is exactly opposite: the guest haven't > had enough execution time between two time interrupts. I don't know what > RTC frequency reprogramming case you are talking about here. The case you told me where N pending tick IRQs exist but the guest wants to change the RTC frequency from 64Hz to 1024Hz. Let's make this more concrete. 1 GHz CPU, initially 100Hz RTC, so 10Mcycles/tick or 10ms/tick. At T = 30Mcycles, guest wants to change the frequency to 1000Hz. The problem for emulation is that for the same 3 ticks, there has been so little execution power that the ticks have been coalesced. But isn't the guest cycle count then much lower than 30Mcyc? Isn't it so that the guest must be above 30Mcyc to be able to want the change? But if we reach that point, the problem must have not been too little execution time, but too much. > >> >> > >> >> >> >> >> >> >> >> >> >> >> > And even if the rate did not matter, the APIC woult still have to >> >> >> >> > now >> >> >> >> > about the fact that an IRQ is really periodic and does not only >> >> >> >> > appear >> >> >> >> > as such for a certain interval. This really does not sound like >> >> >> >> > simplifying things or even make them cleaner. >> >> >> >> >> >> >> >> It would, the voodoo would be contained only in APIC, RTC would be >> >> >> >> just like any other device. With the bidirectional irqs, this voodoo >> >> >> >> would probably eventually spread to many other devices. The logical >> >> >> >> conclusion of that would be a system where all devices would be >> >> >> >> careful not to disturb the guest at wrong moment because that would >> >> >> >> trigger a bug. >> >> >> >> >> >> >> > This voodoo will be so complex and unreliable that it will make RTC >> >> >> > hack >> >> >> > pale in comparison (and I still don't see how you are going to make >> >> >> > it >> >> >> > actually work). >> >> >> >> >> >> Implement everything inside APIC: only coalescing and reinjection. >> >> > APIC has zero info needed to implement reinjection correctly as was >> >> > shown to you several time in this thread and you simply keep ignoring >> >> > it. >> >> >> >> On the contrary, APIC is actually the only source of the IRQ ack >> >> information. RTC hack would not work without APIC (or the >> >> bidirectional IRQ) passing this info to RTC. >> >> >> >> What APIC doesn't have now is the timer frequency or period info. This >> >> is known by RTC and also higher levels managing the clocks. >> >> >> > So APIC has one bit of information and RTC everything else. >> >> The information known by RTC (timer period) is also known by higher levels. >> > What do you mean by higher level here? vl.c or APIC. vl.c, qemu-timer.c. >> > The current >> > approach (and proposed patch) brings this one bit of information to RTC, >> > you are arguing that RTC should be able to communicate all its info to >> > APIC. Sorry I don't see that your way has any advantage. Just more >> > complex interface and it is much easier to get
Re: [Qemu-devel] Re: [PATCH v3 13/17] monitor: Allow to exclude commands from QMP
Markus Armbruster wrote: > Luiz Capitulino writes: > >> On Sun, 23 May 2010 12:59:26 +0200 >> Jan Kiszka wrote: >> >>> From: Jan Kiszka >>> >>> Ported commands that are marked 'user_only' will not be considered for >>> QMP monitor sessions. This allows to implement new commands that do not >>> (yet) provide a sufficiently stable interface for QMP use (e.g. >>> device_show). >> This is fine for me, but two things I've been wondering: >> >> 1. Isn't a 'flags' struct member better? So that we can do (in the >> qemu-monitor.hx entry): >> >> .flags = MONITOR_USER_ONLY | MONITOR_HANDLER_ASYNC, >> >> I'm not suggesting this is an async handler, just exemplifying multiple >> flags. > > We also have at least one command that makes only sense in QMP: > qmp_capabilities. Maybe we could use separate flags controlling command > availability in human monitor and QMP. Sounds reasonable. Then I think I will lay the ground for this by introducing flags already in this patch. A v4 run is required anyway. Jan signature.asc Description: OpenPGP digital signature
Re: [Qemu-devel] [PATCH v3 12/17] monitor: return length of printed string via monitor_[v]printf
Jan Kiszka writes: > From: Jan Kiszka > > This simply forwards the result of the internal vsnprintf to the callers > of monitor_printf and monitor_vprintf. When invoked over a QMP session > or in absence of an active monitor, -1 is returned. > > Signed-off-by: Jan Kiszka Appreciated! Opens the door to addressing the TODOs in qemu-error.c.
Re: [Qemu-devel] Re: [PATCH v3 06/17] qdev: Allow device specification by qtree path for device_del
Markus Armbruster wrote: > Jan Kiszka writes: > >> Markus Armbruster wrote: >>> Jan Kiszka writes: >>> Luiz Capitulino wrote: > On Sun, 23 May 2010 12:59:19 +0200 > Jan Kiszka wrote: > >> From: Jan Kiszka >> >> Allow to specify the device to be removed via device_del not only by ID >> but also by its full or abbreviated qtree path. For this purpose, >> qdev_find is introduced which combines walking the qtree with searching >> for device IDs if required. > [...] > >> Arguments: >> >> -- "id": the device's ID (json-string) >> +- "path": the device's qtree path or unique ID (json-string) >> >> Example: >> >> --> { "execute": "device_del", "arguments": { "id": "net1" } } >> +-> { "execute": "device_del", "arguments": { "path": "net1" } } > Doesn't seem like a good change to me, besides being incompatible[1] we > shouldn't overload arguments this way in QMP as overloading leads to > interface degradation (harder to use, understand, maintain). It's not overloaded, think of an ID as a (weak) symbolic link in the qtree filesystem. The advantage of basing everything on top of full or abbreviated qtree paths is that IDs are not always assigned, paths are. >>> As long as your patch doesn't change the interpretation of IDs, we can >>> keep the old name. >>> >>> The recent review of QMP documentation may lead to a "clean up bad >>> names" flag day. One more wouldn't make it worse, I guess. >>> > Maybe we could have both arguments as optional, but one must be passed. This would at least require some way to keep the proposed unified path specification for the human monitor (having separate arguments there is really unhandy). >>> Correct. >>> >>> It would be nice to have device_del support paths in addition to IDs. >>> I'd expect management tools to slap IDs on everything, so they won't >>> care, but human users do. >>> >>> As far as I know, we have two places where we let the user name a node >>> in the qtree: device_add bus=X and device_del X. The former names a >>> bus, the latter a device. But both are nodes in the same tree, so >>> consistency is in order. >>> >>> Only devices have user-specified IDs. Buses have names assigned by the >>> system. Unique names, hopefully. >> ...but not necessarily. The bus name device_add accepts can also be a >> full, thus unambiguous path. >> >>> If the user doesn't specify a device ID, the driver name is used >>> instead. If you put multiple instances of the same device on the same >>> bus, they have the *same* path. For instance, here's a snippet of info >>> qtree after adding two usb-mouse: >>> >>> dev: piix3-usb-uhci, id "" >>> bus-prop: addr = 01.2 >>> bus-prop: romfile = >>> bus-prop: rombar = 1 >>> class USB controller, addr 00:01.2, pci id 8086:7020 (sub 1af4:1100) >>> bar 4: i/o at 0x [0x1e] >>> bus: usb.0 >>> type USB >>> dev: usb-hub, id "" >>> addr 0.0, speed 12, name QEMU USB Hub, attached >>> dev: usb-mouse, id "no2" >>> addr 0.0, speed 12, name QEMU USB Mouse, attached >>> dev: usb-mouse, id "" >>> addr 0.0, speed 12, name QEMU USB Mouse, attached >>> >>> Both devices have the same full path >>> /i440FX-pcihost/pci.0/piix3-usb-uhci/usb.0/usb-mouse >>> Which one does your code pick? Shouldn't it refuse to pick? >> Patch 3 of this series resolves this as follows: >> >> usb-mouse[.0] -> first listed instance >> usb-mouse.1 -> second instance >> ... >> >> We should probably include this numbering in the qtree dump, I guess. >> >>> By the way, you *can* put '/' in IDs. I call that a bug. >> Even if we prevent this, IDs can still collide with abbreviated device >> or bus paths. Therefore I give paths precedence over IDs in patch 4. > > You're fixing problems in the overly complex and subtle path name lookup > by making it more complex and subtle. Let's make it simple and > straightforward instead. I have no problem with ripping out all of those abbreviations, requiring the user to either specify a '/'-less unique ID or a full qtree path that must start with a '/'. If paths get long, we now have interactive completions. Jan signature.asc Description: OpenPGP digital signature
Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback
On Sat, May 29, 2010 at 04:03:22PM +, Blue Swirl wrote: > 2010/5/29 Gleb Natapov : > > On Sat, May 29, 2010 at 09:15:11AM +, Blue Swirl wrote: > >> >> There is no code, because we're still at architecture design stage. > >> >> > >> > Try to write test code to understand the problem better. > >> > >> I will. > >> > > Please do ASAP. This discussion shows that you don't understand what is the > > problem that we are dialing with. > > Which part of the problem you think I don't understand? > It seams to me you don't understand how Windows uses RTC for time keeping and how the QEMU solves the problem today. > >> >> >> >> guests could also be assisted with special handling (like win2k > >> >> >> >> install hack), for example guest instructions could be counted > >> >> >> >> (approximately, for example using TB size or TSC) and only inject > >> >> >> >> after at least N instructions have passed. > >> >> >> > Guest instructions cannot be easily counted in KVM (it can be done > >> >> >> > more > >> >> >> > or less reliably using perf counters, may be). > >> >> >> > >> >> >> Aren't there any debug registers or perf counters, which can generate > >> >> >> an interrupt after some number of instructions have been executed? > >> >> > Don't think debug registers have something like that and they are > >> >> > available for guest use anyway. Perf counters differs greatly from CPU > >> >> > to CPU (even between two CPUs of the same manufacturer), and we want > >> >> > to > >> >> > keep using them for profiling guests. And I don't see what problem it > >> >> > will solve anyway that can be solved by simple delay between irq > >> >> > reinjection. > >> >> > >> >> This would allow counting the executed instructions and limit it. Thus > >> >> we could emulate a 500MHz CPU on a 2GHz CPU more accurately. > >> >> > >> > Why would you want to limit number of instruction executed by guest if > >> > CPU has nothing else to do anyway? The problem occurs not when we have > >> > spare cycles so give to a guest, but in opposite case. > >> > >> I think one problem is that the guest has executed too much compared > >> to what would happen with real HW with a lesser CPU. That explains the > >> RTC frequency reprogramming case. > > You think wrong. The problem is exactly opposite: the guest haven't > > had enough execution time between two time interrupts. I don't know what > > RTC frequency reprogramming case you are talking about here. > > The case you told me where N pending tick IRQs exist but the guest > wants to change the RTC frequency from 64Hz to 1024Hz. > > Let's make this more concrete. 1 GHz CPU, initially 100Hz RTC, so > 10Mcycles/tick or 10ms/tick. At T = 30Mcycles, guest wants to change > the frequency to 1000Hz. > > The problem for emulation is that for the same 3 ticks, there has been > so little execution power that the ticks have been coalesced. But > isn't the guest cycle count then much lower than 30Mcyc? > > Isn't it so that the guest must be above 30Mcyc to be able to want the > change? But if we reach that point, the problem must have not been > too little execution time, but too much. > Sorry I tried hard to understand what have you said above but failed. What do you mean "to be able to want the change"? Guest sometimes wants to get 64 timer interrupts per second and sometimes it wants to get 1024 timer interrupt per second. It wants it not as a result of time drift or anything. It's just how guest behaves. You seams to be to fixated on guest frequency change. It's just something you have to take into account when you reinject interrupts. > > > >> > >> > > >> >> >> > >> >> >> >> > >> >> >> >> > And even if the rate did not matter, the APIC woult still have > >> >> >> >> > to now > >> >> >> >> > about the fact that an IRQ is really periodic and does not only > >> >> >> >> > appear > >> >> >> >> > as such for a certain interval. This really does not sound like > >> >> >> >> > simplifying things or even make them cleaner. > >> >> >> >> > >> >> >> >> It would, the voodoo would be contained only in APIC, RTC would be > >> >> >> >> just like any other device. With the bidirectional irqs, this > >> >> >> >> voodoo > >> >> >> >> would probably eventually spread to many other devices. The > >> >> >> >> logical > >> >> >> >> conclusion of that would be a system where all devices would be > >> >> >> >> careful not to disturb the guest at wrong moment because that > >> >> >> >> would > >> >> >> >> trigger a bug. > >> >> >> >> > >> >> >> > This voodoo will be so complex and unreliable that it will make > >> >> >> > RTC hack > >> >> >> > pale in comparison (and I still don't see how you are going to > >> >> >> > make it > >> >> >> > actually work). > >> >> >> > >> >> >> Implement everything inside APIC: only coalescing and reinjection. > >> >> > APIC has zero info needed to implement reinjection correctly as was > >> >> > shown to you several time in this thread and you simply keep ignoring > >> >> >
Re: [Qemu-devel] Re: [PATCH v3 06/17] qdev: Allow device specification by qtree path for device_del
Jan Kiszka writes: > Markus Armbruster wrote: >> Jan Kiszka writes: >> >>> Luiz Capitulino wrote: On Sun, 23 May 2010 12:59:19 +0200 Jan Kiszka wrote: > From: Jan Kiszka > > Allow to specify the device to be removed via device_del not only by ID > but also by its full or abbreviated qtree path. For this purpose, > qdev_find is introduced which combines walking the qtree with searching > for device IDs if required. [...] > Arguments: > > -- "id": the device's ID (json-string) > +- "path": the device's qtree path or unique ID (json-string) > > Example: > > --> { "execute": "device_del", "arguments": { "id": "net1" } } > +-> { "execute": "device_del", "arguments": { "path": "net1" } } Doesn't seem like a good change to me, besides being incompatible[1] we shouldn't overload arguments this way in QMP as overloading leads to interface degradation (harder to use, understand, maintain). >>> It's not overloaded, think of an ID as a (weak) symbolic link in the >>> qtree filesystem. The advantage of basing everything on top of full or >>> abbreviated qtree paths is that IDs are not always assigned, paths are. >> >> As long as your patch doesn't change the interpretation of IDs, we can >> keep the old name. >> >> The recent review of QMP documentation may lead to a "clean up bad >> names" flag day. One more wouldn't make it worse, I guess. >> Maybe we could have both arguments as optional, but one must be passed. >>> This would at least require some way to keep the proposed unified path >>> specification for the human monitor (having separate arguments there is >>> really unhandy). >> >> Correct. >> >> It would be nice to have device_del support paths in addition to IDs. >> I'd expect management tools to slap IDs on everything, so they won't >> care, but human users do. >> >> As far as I know, we have two places where we let the user name a node >> in the qtree: device_add bus=X and device_del X. The former names a >> bus, the latter a device. But both are nodes in the same tree, so >> consistency is in order. >> >> Only devices have user-specified IDs. Buses have names assigned by the >> system. Unique names, hopefully. > > ...but not necessarily. The bus name device_add accepts can also be a > full, thus unambiguous path. > >> >> If the user doesn't specify a device ID, the driver name is used >> instead. If you put multiple instances of the same device on the same >> bus, they have the *same* path. For instance, here's a snippet of info >> qtree after adding two usb-mouse: >> >> dev: piix3-usb-uhci, id "" >> bus-prop: addr = 01.2 >> bus-prop: romfile = >> bus-prop: rombar = 1 >> class USB controller, addr 00:01.2, pci id 8086:7020 (sub 1af4:1100) >> bar 4: i/o at 0x [0x1e] >> bus: usb.0 >> type USB >> dev: usb-hub, id "" >> addr 0.0, speed 12, name QEMU USB Hub, attached >> dev: usb-mouse, id "no2" >> addr 0.0, speed 12, name QEMU USB Mouse, attached >> dev: usb-mouse, id "" >> addr 0.0, speed 12, name QEMU USB Mouse, attached >> >> Both devices have the same full path >> /i440FX-pcihost/pci.0/piix3-usb-uhci/usb.0/usb-mouse >> Which one does your code pick? Shouldn't it refuse to pick? > > Patch 3 of this series resolves this as follows: > > usb-mouse[.0] -> first listed instance > usb-mouse.1 -> second instance > ... > > We should probably include this numbering in the qtree dump, I guess. > >> >> By the way, you *can* put '/' in IDs. I call that a bug. > > Even if we prevent this, IDs can still collide with abbreviated device > or bus paths. Therefore I give paths precedence over IDs in patch 4. You're fixing problems in the overly complex and subtle path name lookup by making it more complex and subtle. Let's make it simple and straightforward instead.
Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback
On Sat, May 29, 2010 at 2:46 PM, Gleb Natapov wrote: > On Sat, May 29, 2010 at 09:35:30AM +, Blue Swirl wrote: >> > I still don't see how the alternative is supposed to simplify our life >> > or improve the efficiency of the de-coalescing workaround. It's rather >> > problematic like Gleb pointed out: The de-coalescing logic needs to be >> > informed about periodicity changes that can only be delivered along >> > IRQs. So what to do with the backlog when the timer is stopped? >> >> What happens with the current design? Gleb only mentioned the >> frequency change, I thought that was not so big problem. But I don't >> think this case should be allowed happen at all, it can't exist on >> real HW. >> > Hm, why it can't exist on real HW? Do simple exercise. Run WindowsXP > inside QEMU, connect with gdb to QEMU process and check what frequency > RTC configured with (hint: it will be 64Hz), now run video inside the > guest and check frequency again (hint: it will be 1Khz). You missed the key word 'stopped'. If the timer is really stopped, no IRQs should ever come out afterwards, just like on real HW. For the emulation, this means loss of ticks which should have been delivered before the change. But what if the guest changed the frequency very often, and between changes used zero value, like 64Hz -> 0Hz -> 128Hz -> 0Hz -> 64Hz?
[Qemu-devel] [PATCH 1/3] qemu-thread: add qemu_mutex/cond_destroy and qemu_mutex_exit
Add some missing functions in qemu-thread. Currently qemu-thread is only used for io-thread but it will used by the vnc server soon and we need those functions instead of calling pthread directly. Signed-off-by: Corentin Chary --- qemu-thread.c | 22 ++ qemu-thread.h |4 2 files changed, 26 insertions(+), 0 deletions(-) diff --git a/qemu-thread.c b/qemu-thread.c index 3923db7..afc9933 100644 --- a/qemu-thread.c +++ b/qemu-thread.c @@ -34,6 +34,15 @@ void qemu_mutex_init(QemuMutex *mutex) error_exit(err, __func__); } +void qemu_mutex_destroy(QemuMutex *mutex) +{ +int err; + +err = pthread_mutex_destroy(&mutex->lock); +if (err) +error_exit(err, __func__); +} + void qemu_mutex_lock(QemuMutex *mutex) { int err; @@ -90,6 +99,15 @@ void qemu_cond_init(QemuCond *cond) error_exit(err, __func__); } +void qemu_cond_destroy(QemuCond *cond) +{ +int err; + +err = pthread_cond_destroy(&cond->cond); +if (err) +error_exit(err, __func__); +} + void qemu_cond_signal(QemuCond *cond) { int err; @@ -161,3 +179,7 @@ int qemu_thread_equal(QemuThread *thread1, QemuThread *thread2) return pthread_equal(thread1->thread, thread2->thread); } +void qemu_thread_exit(void *retval) +{ +pthread_exit(retval); +} diff --git a/qemu-thread.h b/qemu-thread.h index 5ef4a3a..19bb30c 100644 --- a/qemu-thread.h +++ b/qemu-thread.h @@ -20,12 +20,14 @@ typedef struct QemuCond QemuCond; typedef struct QemuThread QemuThread; void qemu_mutex_init(QemuMutex *mutex); +void qemu_mutex_destroy(QemuMutex *mutex); void qemu_mutex_lock(QemuMutex *mutex); int qemu_mutex_trylock(QemuMutex *mutex); int qemu_mutex_timedlock(QemuMutex *mutex, uint64_t msecs); void qemu_mutex_unlock(QemuMutex *mutex); void qemu_cond_init(QemuCond *cond); +void qemu_cond_destroy(QemuCond *cond); void qemu_cond_signal(QemuCond *cond); void qemu_cond_broadcast(QemuCond *cond); void qemu_cond_wait(QemuCond *cond, QemuMutex *mutex); @@ -37,4 +39,6 @@ void qemu_thread_create(QemuThread *thread, void qemu_thread_signal(QemuThread *thread, int sig); void qemu_thread_self(QemuThread *thread); int qemu_thread_equal(QemuThread *thread1, QemuThread *thread2); +void qemu_thread_exit(void *retval); + #endif -- 1.7.1
Re: [Qemu-devel] [PATCH v3 04/17] qdev: Give qtree names precedence over user-assigned IDs
Jan Kiszka writes: > From: Jan Kiszka > > As the user may specify ambiguous device IDs, let's search for their > official names first before considering the user-supplied identifiers. > > Signed-off-by: Jan Kiszka The problem is letting the user specify ambiguous device IDs in the first place! That way is madness...
Re: [Qemu-devel] [PATCH v2 03/15] qdev: Allow device addressing via 'driver.instance'
Jan Kiszka writes: > From: Jan Kiszka > > Extend qbus_find_dev to allow addressing of devices without an unique id > via an optional per-bus instance number. The new formats are > 'driver.instance' and 'alias.instance'. > > Signed-off-by: Jan Kiszka > --- > docs/qdev-device-use.txt | 12 +++- > hw/qdev.c| 23 ++- > 2 files changed, 29 insertions(+), 6 deletions(-) > > diff --git a/docs/qdev-device-use.txt b/docs/qdev-device-use.txt > index 9ac1fa1..5939481 100644 > --- a/docs/qdev-device-use.txt > +++ b/docs/qdev-device-use.txt > @@ -1,6 +1,6 @@ > = How to convert to -device & friends = > > -=== Specifying Bus and Address on Bus === > +=== Specifying Bus, Address on Bus, and Devices === > > In qdev, each device has a parent bus. Some devices provide one or > more buses for children. You can specify a device's parent bus with > @@ -24,6 +24,16 @@ Furthermore, if a device only hosts a single bus, the bus > name can be > omitted in the path. Example: /i440FX-pcihost/PIIX3 abbreviates > /i440FX-pcihost/pci.0/PIIX3/isa.0 as none of the buses has siblings. > > +Existing devices can be addressed either via a unique ID if it was > +assigned during creation or via the device tree path: > + > +/full_bus_address/driver_name[.instance_number] > +or > +abbreviated_bus_address/driver_name[.instance_number] > + > +Example: /i440FX-pcihost/pci.0/e1000.2 addresses the second e1000 > +adapter on the bus 'pci.0'. > + > Note: the USB device address can't be controlled at this time. "instance number" isn't defined in this document. I understand the problem you're trying to solve; I've had it myself many times. But is inventing an instance number the right solution? The two e1000 devices already have a perfectly fine unique identifier on their bus: their bus address. What about recognizing bus addresses in paths? Requires a suitable restriction on device names and IDs to avoid ambiguity, say "start with letter". qdev currently doesn't abstract bus addresses, but that's fixable. [...]
Re: [Qemu-devel] [PATCH] sparc64: fix 128-bit atomic load from nucleus context v1
Thanks, applied. On Fri, May 28, 2010 at 9:05 PM, Igor V. Kovalenko wrote: > From: Igor V. Kovalenko > > - change 128-bit atomic loads to reference nucleus context > v0->v1: dropped disassembler change > Signed-off-by: Igor V. Kovalenko > --- > target-sparc/op_helper.c | 10 +- > 1 files changed, 5 insertions(+), 5 deletions(-) > > diff --git a/target-sparc/op_helper.c b/target-sparc/op_helper.c > index edeeb44..f468e7b 100644 > --- a/target-sparc/op_helper.c > +++ b/target-sparc/op_helper.c > @@ -3060,19 +3060,19 @@ void helper_ldda_asi(target_ulong addr, int asi, int > rd) > case 0x2c: // Nucleus quad LDD 128 bit atomic LE > helper_check_align(addr, 0xf); > if (rd == 0) { > - env->gregs[1] = ldq_kernel(addr + 8); > + env->gregs[1] = ldq_nucleus(addr + 8); > if (asi == 0x2c) > bswap64s(&env->gregs[1]); > } else if (rd < 8) { > - env->gregs[rd] = ldq_kernel(addr); > - env->gregs[rd + 1] = ldq_kernel(addr + 8); > + env->gregs[rd] = ldq_nucleus(addr); > + env->gregs[rd + 1] = ldq_nucleus(addr + 8); > if (asi == 0x2c) { > bswap64s(&env->gregs[rd]); > bswap64s(&env->gregs[rd + 1]); > } > } else { > - env->regwptr[rd] = ldq_kernel(addr); > - env->regwptr[rd + 1] = ldq_kernel(addr + 8); > + env->regwptr[rd] = ldq_nucleus(addr); > + env->regwptr[rd + 1] = ldq_nucleus(addr + 8); > if (asi == 0x2c) { > bswap64s(&env->regwptr[rd]); > bswap64s(&env->regwptr[rd + 1]); > > >
Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback
Blue Swirl wrote: > On Sat, May 29, 2010 at 10:16 AM, Jan Kiszka wrote: >> Blue Swirl wrote: >> This is - according to my current understanding - the proposed >> alternative architecture: >> >> .---. >> | de-coalescing | >> | logic | >> '---' >>^ | >> period,| |IRQ >> coalesced| |(or tuned VM clock) >>(yes/no)| v >> .---. .. .---. >> | RTC |-IRQ->| router |-IRQ>| APIC | >> '---' '' '---' >>|^| ^ >>||| | >>'---period---''--period---' > The period information is already known by the higher level clock > management, The timer device models program the next event of some qemu-timer. There is no tag attached explaining the timer subsystem or anyone else the reason behind this programming. >>> Yes, but why would we care? All timers in the system besides RTC >>> should be affected likewise, this would in fact be the benefit from >>> handling the problem at higher level. >> And how does your equation for calculating the clock slow-down look like? > > How about icount_adjust()? I would suggest that you implement your ideas now. Please keep us informed about the progress as this series (and more) depends on a decision. Jan signature.asc Description: OpenPGP digital signature
[Qemu-devel] [PATCH 2/3] qemu-thread: add cleanup_push() and cleanup_pop()
>From pthread man: These functions manipulate the calling thread's stack of thread-cancellation clean-up handlers. A clean-up handler is a function that is automatically executed when a thread is canceled [...] it might, for example, unlock a mutex so that it becomes available to other threads in the process. These two functions are implemented using macros because there is no other way to do that (pthread man, again): On Linux, the pthread_cleanup_push() and pthread_cleanup_pop() functions are implemented as macros that expand to text containing '{' and '}', respectively. This means that variables declared within the scope of paired calls to these functions will only be visible within that scope. Signed-off-by: Corentin Chary --- qemu-thread.h |4 1 files changed, 4 insertions(+), 0 deletions(-) diff --git a/qemu-thread.h b/qemu-thread.h index 19bb30c..e5006bb 100644 --- a/qemu-thread.h +++ b/qemu-thread.h @@ -41,4 +41,8 @@ void qemu_thread_self(QemuThread *thread); int qemu_thread_equal(QemuThread *thread1, QemuThread *thread2); void qemu_thread_exit(void *retval); +#define qemu_thread_cleanup_pop(execute) pthread_cleanup_pop(execute) +#define qemu_thread_cleanup_push(routine, arg) \ +pthread_cleanup_push(routine, arg) + #endif -- 1.7.1
Re: [Qemu-devel] [PATCH v2 02/15] qdev: Fix scanning across single-bus devices
[cc: kraxel] Jan Kiszka writes: > From: Jan Kiszka > > As long as we allow /dev.1 as shortcut for /dev1/bus1, we also have to > make sure that /dev1/dev2 works for /dev1/bus1/dev2/bus2 - as long as > there is only one child bus per device. We auto-root a path not starting with '/' via convention "first component is ID then" (I like that). We auto-complete a path ending with a device when we want a bus (a bit too clever). Auto-inserting bus names in the middle is too clever by half! Such cleverness can be convenient in the human monitor, but all it adds to QMP is complexity. I'm worried about possibly ambigous interpretation of paths. Would be bad if a path could name different node after we add something to the tree. I *think* your fine print makes that impossible, but it's not exactly obvious. Heck, I could be wrong. Wouldn't it be better to put the convenience into the interactive completion function? That way we keep it out of QMP. Subject is misleading, it's a feature, not a bug fix.
[Qemu-devel] Re: [PATCH] savevm: Really verify if a drive supports snapshots
Kevin Wolf writes: > Am 28.05.2010 20:18, schrieb Miguel Di Ciurcio Filho: >> Both bdrv_can_snapshot() and bdrv_has_snapshot() does not work as advertized. >> >> First issue: Their names implies different porpouses, but they do the same >> thing >> and have exactly the same code. Maybe copied and pasted and forgotten? >> bdrv_has_snapshot() is called in various places for actually checking if >> there >> is snapshots or not. >> >> Second issue: the way bdrv_can_snapshot() verifies if a block driver >> supports or >> not snapshots does not catch all cases. E.g.: a raw image. >> >> So when do_savevm() is called, first thing it does is to set a global >> BlockDriverState to save the VM memory state calling get_bs_snapshots(). >> >> static BlockDriverState *get_bs_snapshots(void) >> { >> BlockDriverState *bs; >> DriveInfo *dinfo; >> >> if (bs_snapshots) >> return bs_snapshots; >> QTAILQ_FOREACH(dinfo, &drives, next) { >> bs = dinfo->bdrv; >> if (bdrv_can_snapshot(bs)) >> goto ok; >> } >> return NULL; >> ok: >> bs_snapshots = bs; >> return bs; >> } >> >> bdrv_can_snapshot() may return a BlockDriverState that does not support >> snapshots and do_savevm() goes on. >> >> Later on in do_savevm(), we find: >> >> QTAILQ_FOREACH(dinfo, &drives, next) { >> bs1 = dinfo->bdrv; >> if (bdrv_has_snapshot(bs1)) { >> /* Write VM state size only to the image that contains the state >> */ >> sn->vm_state_size = (bs == bs1 ? vm_state_size : 0); >> ret = bdrv_snapshot_create(bs1, sn); >> if (ret < 0) { >> monitor_printf(mon, "Error while creating snapshot on >> '%s'\n", >>bdrv_get_device_name(bs1)); >> } >> } >> } >> >> bdrv_has_snapshot(bs1) is not checking if the device does support or has >> snapshots as explained above. Only in bdrv_snapshot_create() the device is >> actually checked for snapshot support. >> >> So, in cases where the first device supports snapshots, and the second does >> not, >> the snapshot on the first will happen anyways. I believe this is not a good >> behavior. It should be an all or nothing process. >> >> This patch addresses these issues by making bdrv_can_snapshot() and >> bdrv_has_snapshot() actually do what they must do and enforces better tests >> to >> avoid errors in the middle of do_savevm(). >> >> The functions were moved from savevm.c to block.c. It makes more sense to me. >> >> The bdrv_has_snapshot() is not beaultiful, but it does the job. I think >> having >> this function avaible in the BlockDriver would be the best option. >> >> The loadvm_state() function was updated too to enforce that when loading a >> VM at >> least all writable devices must support snapshots too. >> >> Signed-off-by: Miguel Di Ciurcio Filho > > Markus, I think this implements mostly what we discussed the other day. > Not sure if you already have a patch for doing this - if so, maybe could > compare the patches and give it a review this way? Glad I don't, because this one looks pretty good. > I seem to remember that we came to the conclusion that > bdrv_has_snapshot() isn't needed at all and should be dropped. Any user > should be using bdrv_can_snapshot() instead as this is what they really > want. Our reasoning adapted to the patched code goes like this. The remaining uses of bdrv_has_snapshot() are of the form: if (bdrv_has_snapshot(bs) some_snapshot_op() where some_snapshot_op() fails when there are no snapshots, and the code can recognize that failure as "sorry, no snapshots" (that's what it does before your patch, when bdrv_has_snapshot() is really no more than a necessary, but not sufficient condition for "has snapshots"). Therefore, we don't have to check for actual existence of snapshots with bdrv_has_snapshot(). bdrv_can_snapshot() suffices. In general, I consider if (predict_whether_foo_will_work) if (foo() < 0) // foo failed else // foo would have failed (we think) an anti-pattern. Just make foo() safe to try, and check its status: if (foo() < 0) // foo failed Except when foo() is too expensive to try, or can't be made safe. >> --- >> block.c | 47 ++- >> block.h |2 ++ >> savevm.c | 48 +--- >> 3 files changed, 69 insertions(+), 28 deletions(-) >> >> diff --git a/block.c b/block.c >> index cd70730..7eddc15 100644 >> --- a/block.c >> +++ b/block.c >> @@ -1720,15 +1720,52 @@ void bdrv_debug_event(BlockDriverState *bs, >> BlkDebugEvent event) >> /**/ >> /* handling of snapshots */ >> >> -int bdrv_snapshot_create(BlockDriverState *bs, >> - QEMUSnapshotInfo *sn_info) >> +int bdrv_can_snapshot(BlockDriverState *bs) >> {
Re: [Qemu-devel] cg14
Artyom Tarasenko wrote: > 2010/5/28 Blue Swirl : > >> On Fri, May 28, 2010 at 7:54 AM, Bob Breuer wrote: >> >>> Artyom Tarasenko wrote: >>> 2010/5/27 Bob Breuer : > Artyom Tarasenko wrote: > > >> Was going to put some more empty slots into SS-10/20 (VSIMMs, SX) >> after we are done with SS-5 (due to technical limitations I can switch >> access from one real SS model to another one once a few days only). >> >> >> > I have a partial implementation of the SS-20 VSIMM (cg14) that I've been > working on. With the Sun firmware, I have working text console, color > boot logo, and programmable video resolutions up to 1600x1280. > > Great news! This would allow qemu booting NeXTStep! Are you planning to submit the patches any time soon? >>> It's not in a state to be submitted yet, but I've attached a working >>> patch if you want to give it a try. I need to hook it up to qdev and >>> fill in some more of the obviously incomplete switch cases before I'd >>> sign off on it. >>> >> Nice work. I have a few comments below. >> >> This probably needs support from OpenBIOS to be usable without OBP. >> > > Maybe it can be used as a second adapter without OpenBIOS support? At > least under some OSes? > > Probably won't be used without at least being in the firmware device tree. One area that OpenBIOS could enhance would be a larger memory size option. The real hardware was only available in 4M and 8M options, but the memory map allows for 16M. OBP will identify a 16M VSIMM but won't do anything else with it, and with 16M of vram it would allow for a potential 2560x1600 32bit resolution. >>> Bob >>> >>> >>> diff --git a/Makefile.target b/Makefile.target >>> index fda5bf3..b17b3af 100644 >>> --- a/Makefile.target >>> +++ b/Makefile.target >>> @@ -250,6 +250,7 @@ else >>> obj-sparc-y = sun4m.o lance.o tcx.o sun4m_iommu.o slavio_intctl.o >>> obj-sparc-y += slavio_timer.o slavio_misc.o sparc32_dma.o >>> obj-sparc-y += cs4231.o eccmemctl.o sbi.o sun4c_intctl.o >>> +obj-sparc-y += cg14.o >>> endif >>> >>> obj-arm-y = integratorcp.o versatilepb.o arm_pic.o arm_timer.o >>> diff --git a/hw/sun4m.c b/hw/sun4m.c >>> index 7ba0f76..8b23c9b 100644 >>> --- a/hw/sun4m.c >>> +++ b/hw/sun4m.c >>> @@ -864,6 +864,13 @@ static void sun4m_hw_init(const struct sun4m_hwdef >>> *hwdef, ram_addr_t RAM_size, >>> fprintf(stderr, "qemu: Unsupported depth: %d\n", graphic_depth); >>> exit (1); >>> } >>> + if (hwdef->machine_id == 65) { /* SS-20 */ >>> >> hwdef structure should contain a field for cg14. If non-zero, install >> cg14. Was cg14 only available for SS-20? Was it always included? This >> is also interesting for OpenBIOS, we need to detect cg14 vs. TCX. >> The cg14 was only an option for SS-20 and the rare SS-10SX, but not the regular SS-10, though the SS-10 chipset may have been capable of supporting it. Each cg14 vsimm takes the place of a stick of memory with 2 slots physically capable of holding a vsimm. Is there a way to pass the framebuffer type and/or address to OpenBIOS? I would be inclined to have the SS-20 machine default to cg14 instead of TCX, but it will blow a hole in the support of more than 2G of emulated system ram. >>> +/* cg14.c */ >>> +void cg14_init(target_phys_addr_t ctrl_base, target_phys_addr_t >>> vram_base, >>> +uint32_t vram_size); >>> >> This should go to sun4m.h or cg14.h. >> >> >>> + >>> +cg14_init(0x09c00ULL, 0x0fc00ULL, 8<<20); >>> + } else >>> >> Please add braces and reindent. >> >> >>> tcx_init(hwdef->tcx_base, 0x0010, graphic_width, graphic_height, >>> graphic_depth); >>> >>> --- /dev/null Fri May 28 02:08:36 2010 >>> +++ hw/cg14.c Fri May 28 01:58:49 2010 >>> @@ -0,0 +1,785 @@ >>> +/* >>> + * QEMU CG14 Frame buffer >>> + * >>> + * Copyright (c) 2010 Bob Breuer >>> + * >>> + * Permission is hereby granted, free of charge, to any person obtaining a >>> copy >>> + * of this software and associated documentation files (the "Software"), >>> to deal >>> + * in the Software without restriction, including without limitation the >>> rights >>> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or >>> sell >>> + * copies of the Software, and to permit persons to whom the Software is >>> + * furnished to do so, subject to the following conditions: >>> + * >>> + * The above copyright notice and this permission notice shall be included >>> in >>> + * all copies or substantial portions of the Software. >>> + * >>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS >>> OR >>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >>> + * THE AUTHORS OR COPYRIGHT HOLDE
[Qemu-devel] Re: [SeaBIOS] SMBIOS strings
On Fri, May 28, 2010 at 05:24:47PM +0200, Jes Sorensen wrote: > We were looking at the dmidecode output from qemu-kvm pre-seabios and > current qemu-kvm and noticed some of the strings have changed. [...] > I wanted to check with the lists if there are any strong feelings about > this, and whether some of these changes were made for specific reasons? > > For example: [...] > - Vendor: QEMU > - Version: QEMU > + Vendor: Bochs > + Version: Bochs [...] > - Manufacturer: QEMU > - ID: 63 06 00 00 FD FB 8B 07 > + Manufacturer: Bochs > + ID: 23 06 00 00 FD FB 8B 07 I made these changes in SeaBIOS when I was cleaning up the ifdefs in the code. Instead of "ifdef QEMU" spread around the code, I abstracted out the various names into CONFIG_APPNAME. I defaulted the names to the Bochs based ones because they were the default in the original Bochs bios code. So, the only reason for these string differences is that the default name hasn't been changed. > Otherwise, if there are no objections, I'll look at adding some patches > to make it more backwards compatible. I wonder if making the default names be "SeaBIOS" based instead of bochs/qemu based would make sense. That way there isn't an impulse to change the settings with every emulator seabios runs on. (Ideally, the same binary would be run on multiple emulators to simplify testing.) -Kevin
Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback
On Sat, May 29, 2010 at 04:13:22PM +, Blue Swirl wrote: > On Sat, May 29, 2010 at 2:46 PM, Gleb Natapov wrote: > > On Sat, May 29, 2010 at 09:35:30AM +, Blue Swirl wrote: > >> > I still don't see how the alternative is supposed to simplify our life > >> > or improve the efficiency of the de-coalescing workaround. It's rather > >> > problematic like Gleb pointed out: The de-coalescing logic needs to be > >> > informed about periodicity changes that can only be delivered along > >> > IRQs. So what to do with the backlog when the timer is stopped? > >> > >> What happens with the current design? Gleb only mentioned the > >> frequency change, I thought that was not so big problem. But I don't > >> think this case should be allowed happen at all, it can't exist on > >> real HW. > >> > > Hm, why it can't exist on real HW? Do simple exercise. Run WindowsXP > > inside QEMU, connect with gdb to QEMU process and check what frequency > > RTC configured with (hint: it will be 64Hz), now run video inside the > > guest and check frequency again (hint: it will be 1Khz). > > You missed the key word 'stopped'. If the timer is really stopped, no > IRQs should ever come out afterwards, just like on real HW. For the > emulation, this means loss of ticks which should have been delivered > before the change. > I haven't missed it. I describe to you reality of the situation. You want to change reality to be more close to what you want it to be by adding words to my description. Please just go write code, experiment, debug and _then_ come here with design. > But what if the guest changed the frequency very often, and between > changes used zero value, like 64Hz -> 0Hz -> 128Hz -> 0Hz -> 64Hz? Too bad, the world is not perfect. -- Gleb.
Re: [Qemu-devel] [PATCH] arm: fix arm kernel boot for non zero start addr
On Fri, May 28, 2010 at 9:24 PM, Aurelien Jarno wrote: > On Sat, May 08, 2010 at 10:43:35PM +0200, Lars Munch wrote: >> Booting an arm kernel has been broken a while when booting from non zero >> start >> address. This is due to the order of events: board init loads the kernel and >> sets register 15 to the start address and then qemu_system_reset reset the >> cpu >> making register 15 zero again. >> >> This patch fixes the usage of the register 15 start address trick in >> combination with arm_load_kernel. >> >> Signed-off-by: Lars Munch >> --- >> hw/arm_boot.c | 1 + >> hw/gumstix.c | 4 >> hw/mainstone.c | 3 --- >> hw/nseries.c | 7 --- >> hw/omap_sx1.c | 5 - >> hw/palm.c | 4 >> hw/spitz.c | 3 --- >> hw/tosa.c | 3 --- >> target-arm/helper.c | 1 - >> 9 files changed, 1 insertions(+), 30 deletions(-) >> >> diff --git a/hw/arm_boot.c b/hw/arm_boot.c >> index df031a5..620550b 100644 >> --- a/hw/arm_boot.c >> +++ b/hw/arm_boot.c >> @@ -187,6 +187,7 @@ static void main_cpu_reset(void *opaque) >> env->regs[15] = info->entry & 0xfffe; >> env->thumb = info->entry & 1; >> } else { >> + env->regs[15] = info->loader_start; >> if (old_param) { >> set_kernel_args_old(info, info->initrd_size, >> info->loader_start); > > This parts looks fine. > >> diff --git a/hw/gumstix.c b/hw/gumstix.c >> index 3fd31f4..b64e04e 100644 >> --- a/hw/gumstix.c >> +++ b/hw/gumstix.c >> @@ -74,8 +74,6 @@ static void connex_init(ram_addr_t ram_size, >> exit(1); >> } >> >> - cpu->env->regs[15] = 0x; >> - >> /* Interrupt line of NIC is connected to GPIO line 36 */ >> smc91c111_init(&nd_table[0], 0x04000300, >> pxa2xx_gpio_in_get(cpu->gpio)[36]); >> @@ -114,8 +112,6 @@ static void verdex_init(ram_addr_t ram_size, >> exit(1); >> } >> >> - cpu->env->regs[15] = 0x; >> - >> /* Interrupt line of NIC is connected to GPIO line 99 */ >> smc91c111_init(&nd_table[0], 0x04000300, >> pxa2xx_gpio_in_get(cpu->gpio)[99]); >> diff --git a/hw/mainstone.c b/hw/mainstone.c >> index a4379e3..54bacfb 100644 >> --- a/hw/mainstone.c >> +++ b/hw/mainstone.c >> @@ -89,9 +89,6 @@ static void mainstone_common_init(ram_addr_t ram_size, >> cpu_register_physical_memory(0, MAINSTONE_ROM, >> qemu_ram_alloc(MAINSTONE_ROM) | IO_MEM_ROM); >> >> - /* Setup initial (reset) machine state */ >> - cpu->env->regs[15] = mainstone_binfo.loader_start; >> - >> #ifdef TARGET_WORDS_BIGENDIAN >> be = 1; >> #else >> diff --git a/hw/nseries.c b/hw/nseries.c >> index 0273eee..04a028d 100644 >> --- a/hw/nseries.c >> +++ b/hw/nseries.c >> @@ -1016,7 +1016,6 @@ static void n8x0_boot_init(void *opaque) >> n800_dss_init(&s->blizzard); >> >> /* CPU setup */ >> - s->cpu->env->regs[15] = s->cpu->env->boot_info->loader_start; >> s->cpu->env->GE = 0x5; >> >> /* If the machine has a slided keyboard, open it */ >> @@ -1317,11 +1316,6 @@ static void n8x0_init(ram_addr_t ram_size, const char >> *boot_device, >> if (usb_enabled) >> n8x0_usb_setup(s); >> >> - /* Setup initial (reset) machine state */ >> - >> - /* Start at the OneNAND bootloader. */ >> - s->cpu->env->regs[15] = 0; >> - >> if (kernel_filename) { >> /* Or at the linux loader. */ >> binfo->kernel_filename = kernel_filename; >> @@ -1330,7 +1324,6 @@ static void n8x0_init(ram_addr_t ram_size, const char >> *boot_device, >> arm_load_kernel(s->cpu->env, binfo); >> >> qemu_register_reset(n8x0_boot_init, s); >> - n8x0_boot_init(s); >> } >> >> if (option_rom[0] && (boot_device[0] == 'n' || !kernel_filename)) { >> diff --git a/hw/omap_sx1.c b/hw/omap_sx1.c >> index ca0a7d1..2e9879f 100644 >> --- a/hw/omap_sx1.c >> +++ b/hw/omap_sx1.c >> @@ -195,15 +195,10 @@ static void sx1_init(ram_addr_t ram_size, >> >> /* Load the kernel. */ >> if (kernel_filename) { >> - /* Start at bootloader. */ >> - cpu->env->regs[15] = sx1_binfo.loader_start; >> - >> sx1_binfo.kernel_filename = kernel_filename; >> sx1_binfo.kernel_cmdline = kernel_cmdline; >> sx1_binfo.initrd_filename = initrd_filename; >> arm_load_kernel(cpu->env, &sx1_binfo); >> - } else { >> - cpu->env->regs[15] = 0x; >> } >> >> /* TODO: fix next line */ >> diff --git a/hw/palm.c b/hw/palm.c >> index ba7c398..8db133d 100644 >> --- a/hw/palm.c >> +++ b/hw/palm.c >> @@ -243,7 +243,6 @@ static void palmte_init(ram_addr_t ram_size, >> rom_size = load_image_targphys(option_rom[0], OMAP_CS0_BASE, >> flash_size); >> rom_loaded = 1; >> - cpu->env->regs[15] = 0x00
Re: [Qemu-devel] [PATCH] arm: fix arm kernel boot for non zero start addr
On Sat, May 29, 2010 at 08:42:52PM +0200, Lars Munch wrote: > On Fri, May 28, 2010 at 9:24 PM, Aurelien Jarno wrote: > > On Sat, May 08, 2010 at 10:43:35PM +0200, Lars Munch wrote: > >> Booting an arm kernel has been broken a while when booting from non zero > >> start > >> address. This is due to the order of events: board init loads the kernel > >> and > >> sets register 15 to the start address and then qemu_system_reset reset the > >> cpu > >> making register 15 zero again. > >> > >> This patch fixes the usage of the register 15 start address trick in > >> combination with arm_load_kernel. > >> > >> Signed-off-by: Lars Munch > >> --- > >> hw/arm_boot.c | 1 + > >> hw/gumstix.c | 4 > >> hw/mainstone.c | 3 --- > >> hw/nseries.c | 7 --- > >> hw/omap_sx1.c | 5 - > >> hw/palm.c | 4 > >> hw/spitz.c | 3 --- > >> hw/tosa.c | 3 --- > >> target-arm/helper.c | 1 - > >> 9 files changed, 1 insertions(+), 30 deletions(-) > >> > >> diff --git a/hw/arm_boot.c b/hw/arm_boot.c > >> index df031a5..620550b 100644 > >> --- a/hw/arm_boot.c > >> +++ b/hw/arm_boot.c > >> @@ -187,6 +187,7 @@ static void main_cpu_reset(void *opaque) > >> env->regs[15] = info->entry & 0xfffe; > >> env->thumb = info->entry & 1; > >> } else { > >> + env->regs[15] = info->loader_start; > >> if (old_param) { > >> set_kernel_args_old(info, info->initrd_size, > >> info->loader_start); > > > > This parts looks fine. > > > >> diff --git a/hw/gumstix.c b/hw/gumstix.c > >> index 3fd31f4..b64e04e 100644 > >> --- a/hw/gumstix.c > >> +++ b/hw/gumstix.c > >> @@ -74,8 +74,6 @@ static void connex_init(ram_addr_t ram_size, > >> exit(1); > >> } > >> > >> - cpu->env->regs[15] = 0x; > >> - > >> /* Interrupt line of NIC is connected to GPIO line 36 */ > >> smc91c111_init(&nd_table[0], 0x04000300, > >> pxa2xx_gpio_in_get(cpu->gpio)[36]); > >> @@ -114,8 +112,6 @@ static void verdex_init(ram_addr_t ram_size, > >> exit(1); > >> } > >> > >> - cpu->env->regs[15] = 0x; > >> - > >> /* Interrupt line of NIC is connected to GPIO line 99 */ > >> smc91c111_init(&nd_table[0], 0x04000300, > >> pxa2xx_gpio_in_get(cpu->gpio)[99]); > >> diff --git a/hw/mainstone.c b/hw/mainstone.c > >> index a4379e3..54bacfb 100644 > >> --- a/hw/mainstone.c > >> +++ b/hw/mainstone.c > >> @@ -89,9 +89,6 @@ static void mainstone_common_init(ram_addr_t ram_size, > >> cpu_register_physical_memory(0, MAINSTONE_ROM, > >> qemu_ram_alloc(MAINSTONE_ROM) | IO_MEM_ROM); > >> > >> - /* Setup initial (reset) machine state */ > >> - cpu->env->regs[15] = mainstone_binfo.loader_start; > >> - > >> #ifdef TARGET_WORDS_BIGENDIAN > >> be = 1; > >> #else > >> diff --git a/hw/nseries.c b/hw/nseries.c > >> index 0273eee..04a028d 100644 > >> --- a/hw/nseries.c > >> +++ b/hw/nseries.c > >> @@ -1016,7 +1016,6 @@ static void n8x0_boot_init(void *opaque) > >> n800_dss_init(&s->blizzard); > >> > >> /* CPU setup */ > >> - s->cpu->env->regs[15] = s->cpu->env->boot_info->loader_start; > >> s->cpu->env->GE = 0x5; > >> > >> /* If the machine has a slided keyboard, open it */ > >> @@ -1317,11 +1316,6 @@ static void n8x0_init(ram_addr_t ram_size, const > >> char *boot_device, > >> if (usb_enabled) > >> n8x0_usb_setup(s); > >> > >> - /* Setup initial (reset) machine state */ > >> - > >> - /* Start at the OneNAND bootloader. */ > >> - s->cpu->env->regs[15] = 0; > >> - > >> if (kernel_filename) { > >> /* Or at the linux loader. */ > >> binfo->kernel_filename = kernel_filename; > >> @@ -1330,7 +1324,6 @@ static void n8x0_init(ram_addr_t ram_size, const > >> char *boot_device, > >> arm_load_kernel(s->cpu->env, binfo); > >> > >> qemu_register_reset(n8x0_boot_init, s); > >> - n8x0_boot_init(s); > >> } > >> > >> if (option_rom[0] && (boot_device[0] == 'n' || !kernel_filename)) { > >> diff --git a/hw/omap_sx1.c b/hw/omap_sx1.c > >> index ca0a7d1..2e9879f 100644 > >> --- a/hw/omap_sx1.c > >> +++ b/hw/omap_sx1.c > >> @@ -195,15 +195,10 @@ static void sx1_init(ram_addr_t ram_size, > >> > >> /* Load the kernel. */ > >> if (kernel_filename) { > >> - /* Start at bootloader. */ > >> - cpu->env->regs[15] = sx1_binfo.loader_start; > >> - > >> sx1_binfo.kernel_filename = kernel_filename; > >> sx1_binfo.kernel_cmdline = kernel_cmdline; > >> sx1_binfo.initrd_filename = initrd_filename; > >> arm_load_kernel(cpu->env, &sx1_binfo); > >> - } else { > >> - cpu->env->regs[15] = 0x; > >> } > >> > >> /* TODO: fix next line */ > >> diff --git a/hw/palm.c b/hw/palm.c > >> index ba7c398..8db13
[Qemu-devel] Re: [PATCH] savevm: Really verify if a drive supports snapshots
On Sat, May 29, 2010 at 3:06 AM, Markus Armbruster wrote: > >> I seem to remember that we came to the conclusion that >> bdrv_has_snapshot() isn't needed at all and should be dropped. Any user >> should be using bdrv_can_snapshot() instead as this is what they really >> want. > > Our reasoning adapted to the patched code goes like this. The remaining > uses of bdrv_has_snapshot() are of the form: > > if (bdrv_has_snapshot(bs) > some_snapshot_op() > > where some_snapshot_op() fails when there are no snapshots, and the code > can recognize that failure as "sorry, no snapshots" (that's what it does > before your patch, when bdrv_has_snapshot() is really no more than a > necessary, but not sufficient condition for "has snapshots"). > > Therefore, we don't have to check for actual existence of snapshots with > bdrv_has_snapshot(). bdrv_can_snapshot() suffices. I had this feeling too, but I was conservative and kept bdrv_has_snapshot(). I will remove it and replace by bdrv_can_snapshot() where appropriate. >>> +int bdrv_can_snapshot(BlockDriverState *bs) >>> { >>> BlockDriver *drv = bs->drv; >>> - if (!drv) >>> + if (!drv) { >>> + return -ENOMEDIUM; >>> + } >>> + >>> + if (!drv->bdrv_snapshot_create || bdrv_is_removable(bs) || >>> + bdrv_is_read_only(bs)) { >>> + return -ENOTSUP; >>> + } >>> + >>> + return 1; >>> +} >> >> Returning either 1 or -errno is a strange interface. I'm not sure which > > Agree. > >> of 1/0 or 0/-errno is better in this case, but I'd suggest to take one >> of these. > > Does any existing caller care for the error codes? If not, just go with > 1/0. When bdrv_can_snapshot() is called the specific reason for not supporting snapshots does not matter. So, 1/0 is better. I will fix it and resend. Regards, Miguel
Re: [Qemu-devel] [PATCH] arm: fix arm kernel boot for non zero start addr
On Sat, May 29, 2010 at 8:51 PM, Aurelien Jarno wrote: > On Sat, May 29, 2010 at 08:42:52PM +0200, Lars Munch wrote: >> On Fri, May 28, 2010 at 9:24 PM, Aurelien Jarno wrote: >> > On Sat, May 08, 2010 at 10:43:35PM +0200, Lars Munch wrote: >> >> Booting an arm kernel has been broken a while when booting from non zero >> >> start >> >> address. This is due to the order of events: board init loads the kernel >> >> and >> >> sets register 15 to the start address and then qemu_system_reset reset >> >> the cpu >> >> making register 15 zero again. >> >> >> >> This patch fixes the usage of the register 15 start address trick in >> >> combination with arm_load_kernel. >> >> >> >> Signed-off-by: Lars Munch >> >> --- >> >> hw/arm_boot.c | 1 + >> >> hw/gumstix.c | 4 >> >> hw/mainstone.c | 3 --- >> >> hw/nseries.c | 7 --- >> >> hw/omap_sx1.c | 5 - >> >> hw/palm.c | 4 >> >> hw/spitz.c | 3 --- >> >> hw/tosa.c | 3 --- >> >> target-arm/helper.c | 1 - >> >> 9 files changed, 1 insertions(+), 30 deletions(-) >> >> >> >> diff --git a/hw/arm_boot.c b/hw/arm_boot.c >> >> index df031a5..620550b 100644 >> >> --- a/hw/arm_boot.c >> >> +++ b/hw/arm_boot.c >> >> @@ -187,6 +187,7 @@ static void main_cpu_reset(void *opaque) >> >> env->regs[15] = info->entry & 0xfffe; >> >> env->thumb = info->entry & 1; >> >> } else { >> >> + env->regs[15] = info->loader_start; >> >> if (old_param) { >> >> set_kernel_args_old(info, info->initrd_size, >> >> info->loader_start); >> > >> > This parts looks fine. >> > >> >> diff --git a/hw/gumstix.c b/hw/gumstix.c >> >> index 3fd31f4..b64e04e 100644 >> >> --- a/hw/gumstix.c >> >> +++ b/hw/gumstix.c >> >> @@ -74,8 +74,6 @@ static void connex_init(ram_addr_t ram_size, >> >> exit(1); >> >> } >> >> >> >> - cpu->env->regs[15] = 0x; >> >> - >> >> /* Interrupt line of NIC is connected to GPIO line 36 */ >> >> smc91c111_init(&nd_table[0], 0x04000300, >> >> pxa2xx_gpio_in_get(cpu->gpio)[36]); >> >> @@ -114,8 +112,6 @@ static void verdex_init(ram_addr_t ram_size, >> >> exit(1); >> >> } >> >> >> >> - cpu->env->regs[15] = 0x; >> >> - >> >> /* Interrupt line of NIC is connected to GPIO line 99 */ >> >> smc91c111_init(&nd_table[0], 0x04000300, >> >> pxa2xx_gpio_in_get(cpu->gpio)[99]); >> >> diff --git a/hw/mainstone.c b/hw/mainstone.c >> >> index a4379e3..54bacfb 100644 >> >> --- a/hw/mainstone.c >> >> +++ b/hw/mainstone.c >> >> @@ -89,9 +89,6 @@ static void mainstone_common_init(ram_addr_t ram_size, >> >> cpu_register_physical_memory(0, MAINSTONE_ROM, >> >> qemu_ram_alloc(MAINSTONE_ROM) | IO_MEM_ROM); >> >> >> >> - /* Setup initial (reset) machine state */ >> >> - cpu->env->regs[15] = mainstone_binfo.loader_start; >> >> - >> >> #ifdef TARGET_WORDS_BIGENDIAN >> >> be = 1; >> >> #else >> >> diff --git a/hw/nseries.c b/hw/nseries.c >> >> index 0273eee..04a028d 100644 >> >> --- a/hw/nseries.c >> >> +++ b/hw/nseries.c >> >> @@ -1016,7 +1016,6 @@ static void n8x0_boot_init(void *opaque) >> >> n800_dss_init(&s->blizzard); >> >> >> >> /* CPU setup */ >> >> - s->cpu->env->regs[15] = s->cpu->env->boot_info->loader_start; >> >> s->cpu->env->GE = 0x5; >> >> >> >> /* If the machine has a slided keyboard, open it */ >> >> @@ -1317,11 +1316,6 @@ static void n8x0_init(ram_addr_t ram_size, const >> >> char *boot_device, >> >> if (usb_enabled) >> >> n8x0_usb_setup(s); >> >> >> >> - /* Setup initial (reset) machine state */ >> >> - >> >> - /* Start at the OneNAND bootloader. */ >> >> - s->cpu->env->regs[15] = 0; >> >> - >> >> if (kernel_filename) { >> >> /* Or at the linux loader. */ >> >> binfo->kernel_filename = kernel_filename; >> >> @@ -1330,7 +1324,6 @@ static void n8x0_init(ram_addr_t ram_size, const >> >> char *boot_device, >> >> arm_load_kernel(s->cpu->env, binfo); >> >> >> >> qemu_register_reset(n8x0_boot_init, s); >> >> - n8x0_boot_init(s); >> >> } >> >> >> >> if (option_rom[0] && (boot_device[0] == 'n' || !kernel_filename)) { >> >> diff --git a/hw/omap_sx1.c b/hw/omap_sx1.c >> >> index ca0a7d1..2e9879f 100644 >> >> --- a/hw/omap_sx1.c >> >> +++ b/hw/omap_sx1.c >> >> @@ -195,15 +195,10 @@ static void sx1_init(ram_addr_t ram_size, >> >> >> >> /* Load the kernel. */ >> >> if (kernel_filename) { >> >> - /* Start at bootloader. */ >> >> - cpu->env->regs[15] = sx1_binfo.loader_start; >> >> - >> >> sx1_binfo.kernel_filename = kernel_filename; >> >> sx1_binfo.kernel_cmdline = kernel_cmdline; >> >> sx1_binfo.initrd_filename = initrd_filename; >> >> arm_load_kernel(cpu->env, &sx1_binfo);
[Qemu-devel] [PATCH v2] savevm: Really verify if a drive supports snapshots
Both bdrv_can_snapshot() and bdrv_has_snapshot() does not work as advertized. First issue: Their names implies different porpouses, but they do the same thing and have exactly the same code. Maybe copied and pasted and forgotten? bdrv_has_snapshot() is called in various places for actually checking if there is snapshots or not. Second issue: the way bdrv_can_snapshot() verifies if a block driver supports or not snapshots does not catch all cases. E.g.: a raw image. So when do_savevm() is called, first thing it does is to set a global BlockDriverState to save the VM memory state calling get_bs_snapshots(). static BlockDriverState *get_bs_snapshots(void) { BlockDriverState *bs; DriveInfo *dinfo; if (bs_snapshots) return bs_snapshots; QTAILQ_FOREACH(dinfo, &drives, next) { bs = dinfo->bdrv; if (bdrv_can_snapshot(bs)) goto ok; } return NULL; ok: bs_snapshots = bs; return bs; } bdrv_can_snapshot() may return a BlockDriverState that does not support snapshots and do_savevm() goes on. Later on in do_savevm(), we find: QTAILQ_FOREACH(dinfo, &drives, next) { bs1 = dinfo->bdrv; if (bdrv_has_snapshot(bs1)) { /* Write VM state size only to the image that contains the state */ sn->vm_state_size = (bs == bs1 ? vm_state_size : 0); ret = bdrv_snapshot_create(bs1, sn); if (ret < 0) { monitor_printf(mon, "Error while creating snapshot on '%s'\n", bdrv_get_device_name(bs1)); } } } bdrv_has_snapshot(bs1) is not checking if the device does support or has snapshots as explained above. Only in bdrv_snapshot_create() the device is actually checked for snapshot support. So, in cases where the first device supports snapshots, and the second does not, the snapshot on the first will happen anyways. I believe this is not a good behavior. It should be an all or nothing process. This patch addresses these issues by making bdrv_can_snapshot() actually do what it must do and enforces better tests to avoid errors in the middle of do_savevm(). bdrv_has_snapshot() is removed and replaced by bdrv_can_snapshot() where appropriate. bdrv_can_snapshot() was moved from savevm.c to block.c. It makes more sense to me. The loadvm_state() function was updated too to enforce that when loading a VM at least all writable devices must support snapshots too. Signed-off-by: Miguel Di Ciurcio Filho --- block.c | 21 - block.h |1 + monitor.c |5 - savevm.c | 58 -- 4 files changed, 57 insertions(+), 28 deletions(-) diff --git a/block.c b/block.c index cd70730..1115a60 100644 --- a/block.c +++ b/block.c @@ -1720,15 +1720,26 @@ void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event) /**/ /* handling of snapshots */ +int bdrv_can_snapshot(BlockDriverState *bs) +{ +BlockDriver *drv = bs->drv; +if (!drv || !drv->bdrv_snapshot_create || bdrv_is_removable(bs) || +bdrv_is_read_only(bs)) { +return 0; +} + +return 1; +} + int bdrv_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) { BlockDriver *drv = bs->drv; -if (!drv) -return -ENOMEDIUM; -if (!drv->bdrv_snapshot_create) -return -ENOTSUP; -return drv->bdrv_snapshot_create(bs, sn_info); +if (bdrv_can_snapshot(bs)) { +return drv->bdrv_snapshot_create(bs, sn_info); +} + +return -1; } int bdrv_snapshot_goto(BlockDriverState *bs, diff --git a/block.h b/block.h index 24efeb6..fbcd8af 100644 --- a/block.h +++ b/block.h @@ -173,6 +173,7 @@ int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi); const char *bdrv_get_encrypted_filename(BlockDriverState *bs); void bdrv_get_backing_filename(BlockDriverState *bs, char *filename, int filename_size); +int bdrv_can_snapshot(BlockDriverState *bs); int bdrv_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info); int bdrv_snapshot_goto(BlockDriverState *bs, diff --git a/monitor.c b/monitor.c index ad50f12..8b4a1d7 100644 --- a/monitor.c +++ b/monitor.c @@ -2468,8 +2468,11 @@ static void do_loadvm(Monitor *mon, const QDict *qdict) vm_stop(0); -if (load_vmstate(name) >= 0 && saved_vm_running) +if (load_vmstate(name) >= 0 && saved_vm_running) { vm_start(); +} else { +monitor_printf(mon, "Failed to load VM state. VM stopped.\n"); +} } int monitor_get_fd(Monitor *mon, const char *fdname) diff --git a/savevm.c b/savevm.c index dc20390..6549ca7 100644 --- a/savevm.c +++ b/savevm.c @@ -1574,22 +1574,6 @@ out: return ret; } -/* device can contain snapshots */ -static int bdrv_can_snapshot(BlockDriverState *bs) -{ -return (bs && -
[Qemu-devel] [PATCH] sparc32 SuperSPARC MMU Breakpoint Action register (SS-20 OBP fix)
SuperSPARC MMU Breakpoint Action register is used by OBP at boot The patch allows booting Solaris and some other OS with SPARCStation-20 OBP. Signed-off-by: Artyom Tarasenko --- target-sparc/op_helper.c |1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/target-sparc/op_helper.c b/target-sparc/op_helper.c index aaacfc4..ef3504f 100644 --- a/target-sparc/op_helper.c +++ b/target-sparc/op_helper.c @@ -1745,6 +1745,7 @@ uint64_t helper_ld_asi(target_ulong addr, int asi, int size, int sign) case 0x31: // Turbosparc RAM snoop case 0x32: // Turbosparc page table descriptor diagnostic case 0x39: /* data cache diagnostic register */ +case 0x4c: /* SuperSPARC MMU Breakpoint Action register */ ret = 0; break; case 0x38: /* SuperSPARC MMU Breakpoint Control Registers */ -- 1.6.2.5
Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback
On Sat, May 29, 2010 at 4:32 PM, Gleb Natapov wrote: > On Sat, May 29, 2010 at 04:03:22PM +, Blue Swirl wrote: >> 2010/5/29 Gleb Natapov : >> > On Sat, May 29, 2010 at 09:15:11AM +, Blue Swirl wrote: >> >> >> There is no code, because we're still at architecture design stage. >> >> >> >> >> > Try to write test code to understand the problem better. >> >> >> >> I will. >> >> >> > Please do ASAP. This discussion shows that you don't understand what is the >> > problem that we are dialing with. >> >> Which part of the problem you think I don't understand? >> > It seams to me you don't understand how Windows uses RTC for time > keeping and how the QEMU solves the problem today. RTC causes periodic interrupts and Windows interrupt handler increments jiffies, like Linux? >> >> >> >> >> guests could also be assisted with special handling (like win2k >> >> >> >> >> install hack), for example guest instructions could be counted >> >> >> >> >> (approximately, for example using TB size or TSC) and only inject >> >> >> >> >> after at least N instructions have passed. >> >> >> >> > Guest instructions cannot be easily counted in KVM (it can be >> >> >> >> > done more >> >> >> >> > or less reliably using perf counters, may be). >> >> >> >> >> >> >> >> Aren't there any debug registers or perf counters, which can >> >> >> >> generate >> >> >> >> an interrupt after some number of instructions have been executed? >> >> >> > Don't think debug registers have something like that and they are >> >> >> > available for guest use anyway. Perf counters differs greatly from >> >> >> > CPU >> >> >> > to CPU (even between two CPUs of the same manufacturer), and we want >> >> >> > to >> >> >> > keep using them for profiling guests. And I don't see what problem it >> >> >> > will solve anyway that can be solved by simple delay between irq >> >> >> > reinjection. >> >> >> >> >> >> This would allow counting the executed instructions and limit it. Thus >> >> >> we could emulate a 500MHz CPU on a 2GHz CPU more accurately. >> >> >> >> >> > Why would you want to limit number of instruction executed by guest if >> >> > CPU has nothing else to do anyway? The problem occurs not when we have >> >> > spare cycles so give to a guest, but in opposite case. >> >> >> >> I think one problem is that the guest has executed too much compared >> >> to what would happen with real HW with a lesser CPU. That explains the >> >> RTC frequency reprogramming case. >> > You think wrong. The problem is exactly opposite: the guest haven't >> > had enough execution time between two time interrupts. I don't know what >> > RTC frequency reprogramming case you are talking about here. >> >> The case you told me where N pending tick IRQs exist but the guest >> wants to change the RTC frequency from 64Hz to 1024Hz. >> >> Let's make this more concrete. 1 GHz CPU, initially 100Hz RTC, so >> 10Mcycles/tick or 10ms/tick. At T = 30Mcycles, guest wants to change >> the frequency to 1000Hz. >> >> The problem for emulation is that for the same 3 ticks, there has been >> so little execution power that the ticks have been coalesced. But >> isn't the guest cycle count then much lower than 30Mcyc? >> >> Isn't it so that the guest must be above 30Mcyc to be able to want the >> change? But if we reach that point, the problem must have not been >> too little execution time, but too much. >> > Sorry I tried hard to understand what have you said above but failed. > What do you mean "to be able to want the change"? Guest sometimes wants > to get 64 timer interrupts per second and sometimes it wants to get 1024 > timer interrupt per second. It wants it not as a result of time drift or > anything. It's just how guest behaves. You seams to be to fixated on > guest frequency change. It's just something you have to take into > account when you reinject interrupts. I meant that in the scenario, the guest won't change the RTC before 30Mcyc because of some built in determinism in the guest. At that point, because of some reason, the change would happen. > >> > >> >> >> >> > >> >> >> >> >> >> >> >> >> >> >> >> >> >> > And even if the rate did not matter, the APIC woult still have >> >> >> >> >> > to now >> >> >> >> >> > about the fact that an IRQ is really periodic and does not >> >> >> >> >> > only appear >> >> >> >> >> > as such for a certain interval. This really does not sound like >> >> >> >> >> > simplifying things or even make them cleaner. >> >> >> >> >> >> >> >> >> >> It would, the voodoo would be contained only in APIC, RTC would >> >> >> >> >> be >> >> >> >> >> just like any other device. With the bidirectional irqs, this >> >> >> >> >> voodoo >> >> >> >> >> would probably eventually spread to many other devices. The >> >> >> >> >> logical >> >> >> >> >> conclusion of that would be a system where all devices would be >> >> >> >> >> careful not to disturb the guest at wrong moment because that >> >> >> >> >> would >> >> >> >> >> trigger a bug. >> >> >> >> >> >> >> >>
[Qemu-devel] [Bug 587344] [NEW] gfxmenu from GRUB or GRUB4DOS hang qemu.
Public bug reported: When you run the graphical menu GRUB (gfxmenu, any theme) execution stops. Test image (iso, as an attachment): http://www.multiupload.com/AAI7MGMOLE Image make options: mkisofs -R -b grldr -no-emul-boot -boot-load-seg 0x1000 -o bootable.iso image_root/ With options and GFX menu: qemu -cdrom bootable.iso execution stops after rendering background. With options and GFX menu:: qemu -cdrom bootable.iso --no-kvm the menu is drawn, and the execution stops. ** Affects: qemu Importance: Undecided Status: New ** Tags: gfx gfxmenu grub grub4dos -- gfxmenu from GRUB or GRUB4DOS hang qemu. https://bugs.launchpad.net/bugs/587344 You received this bug notification because you are a member of qemu- devel-ml, which is subscribed to QEMU. Status in QEMU: New Bug description: When you run the graphical menu GRUB (gfxmenu, any theme) execution stops. Test image (iso, as an attachment): http://www.multiupload.com/AAI7MGMOLE Image make options: mkisofs -R -b grldr -no-emul-boot -boot-load-seg 0x1000 -o bootable.iso image_root/ With options and GFX menu: qemu -cdrom bootable.iso execution stops after rendering background. With options and GFX menu:: qemu -cdrom bootable.iso --no-kvm the menu is drawn, and the execution stops.
[Qemu-devel] [Bug 587344] Re: gfxmenu from GRUB or GRUB4DOS hang qemu.
** Attachment added: "Test image (iso, gzipped)" http://launchpadlibrarian.net/49329042/bootable.iso.gz -- gfxmenu from GRUB or GRUB4DOS hang qemu. https://bugs.launchpad.net/bugs/587344 You received this bug notification because you are a member of qemu- devel-ml, which is subscribed to QEMU. Status in QEMU: New Bug description: When you run the graphical menu GRUB (gfxmenu, any theme) execution stops. Test image (iso, as an attachment): http://www.multiupload.com/AAI7MGMOLE Image make options: mkisofs -R -b grldr -no-emul-boot -boot-load-seg 0x1000 -o bootable.iso image_root/ With options and GFX menu: qemu -cdrom bootable.iso execution stops after rendering background. With options and GFX menu:: qemu -cdrom bootable.iso --no-kvm the menu is drawn, and the execution stops.
Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback
On Sat, May 29, 2010 at 4:37 PM, Gleb Natapov wrote: > On Sat, May 29, 2010 at 04:13:22PM +, Blue Swirl wrote: >> On Sat, May 29, 2010 at 2:46 PM, Gleb Natapov wrote: >> > On Sat, May 29, 2010 at 09:35:30AM +, Blue Swirl wrote: >> >> > I still don't see how the alternative is supposed to simplify our life >> >> > or improve the efficiency of the de-coalescing workaround. It's rather >> >> > problematic like Gleb pointed out: The de-coalescing logic needs to be >> >> > informed about periodicity changes that can only be delivered along >> >> > IRQs. So what to do with the backlog when the timer is stopped? >> >> >> >> What happens with the current design? Gleb only mentioned the >> >> frequency change, I thought that was not so big problem. But I don't >> >> think this case should be allowed happen at all, it can't exist on >> >> real HW. >> >> >> > Hm, why it can't exist on real HW? Do simple exercise. Run WindowsXP >> > inside QEMU, connect with gdb to QEMU process and check what frequency >> > RTC configured with (hint: it will be 64Hz), now run video inside the >> > guest and check frequency again (hint: it will be 1Khz). >> >> You missed the key word 'stopped'. If the timer is really stopped, no >> IRQs should ever come out afterwards, just like on real HW. For the >> emulation, this means loss of ticks which should have been delivered >> before the change. >> > I haven't missed it. I describe to you reality of the situation. You want > to change reality to be more close to what you want it to be by adding > words to my description. Quoting Jan: 'So what to do with the backlog when the timer is stopped?' I didn't add any words to your description, please be more careful with your attributions. Why do you think I want to change the reality? XP frequency change isn't the same case as timer being stopped. > Please just go write code, experiment, debug > and _then_ come here with design. I added some debugging to RTC, PIC and APIC. I also built a small guest in x86 assembly to test the coalescing. However, in the tests with this guest and others I noticed that the coalescing only happens in some obscure conditions. By default the APIC's delivery method for IRQs is ExtInt and coalescing counting happens only with Fixed. This means that the guest needs to reprogram APIC. It also looks like RTC interrupts need to be triggered. But I didn't see both of these to happen simultaneously in my tests with Linux and Windows guests. Of course, -rtc-td-hack flag must be used and I also disabled HPET to be sure that RTC would be used. With DEBUG_COALESCING enabled, I just get increasing numbers for apic_irq_delivered: apic: apic_set_irq: coalescing 67123 apic: apic_set_irq: coalescing 67124 apic: apic_set_irq: coalescing 67125 If the hack were active, the numbers would be close to zero (or at least some point) because apic_reset_irq_delivered would be called, but this does not happen. Could you specify a clear test case with which the coalescing action could be tested? Linux or BSD based, please. >> But what if the guest changed the frequency very often, and between >> changes used zero value, like 64Hz -> 0Hz -> 128Hz -> 0Hz -> 64Hz? > Too bad, the world is not perfect. > > -- > Gleb. >
[Qemu-devel] Re: [PATCH] sparc32 SuperSPARC MMU Breakpoint Action register (SS-20 OBP fix)
Thanks, applied. On Sat, May 29, 2010 at 8:48 PM, Artyom Tarasenko wrote: > SuperSPARC MMU Breakpoint Action register is used by OBP at boot > > The patch allows booting Solaris and some other OS with > SPARCStation-20 OBP. > > Signed-off-by: Artyom Tarasenko > --- > target-sparc/op_helper.c | 1 + > 1 files changed, 1 insertions(+), 0 deletions(-) > > diff --git a/target-sparc/op_helper.c b/target-sparc/op_helper.c > index aaacfc4..ef3504f 100644 > --- a/target-sparc/op_helper.c > +++ b/target-sparc/op_helper.c > @@ -1745,6 +1745,7 @@ uint64_t helper_ld_asi(target_ulong addr, int asi, int > size, int sign) > case 0x31: // Turbosparc RAM snoop > case 0x32: // Turbosparc page table descriptor diagnostic > case 0x39: /* data cache diagnostic register */ > + case 0x4c: /* SuperSPARC MMU Breakpoint Action register */ > ret = 0; > break; > case 0x38: /* SuperSPARC MMU Breakpoint Control Registers */ > -- > 1.6.2.5 > >
[Qemu-devel] sparc mmu
2010/5/29 Blue Swirl : > Robert Reif did some improvements to SuperSparc emulation, but the > work was not finished. That should be a good starting point. Do you mean the last patch he sent to us or are there some earlier unapplied patches? The last patch _seems_ to be mainly refactoring and disabling features which are not presented in certain models. I might have missed something of course: the patch is large. For me is also interesting what do we miss in the microSPARC implementation. If I switch off POST (which crashes due to the known FPU problems) LX/CX/X OBPs hang. Looks like it's expecting some interrupt (the SS-5 OBP escapes a similar endless loop on a timer irq), but not getting it. Do you know anything obvious qemu is missing in LX machine? -- Regards, Artyom Tarasenko solaris/sparc under qemu blog: http://tyom.blogspot.com/
Re: [Qemu-devel] [PATCH] Compile dma only once
> On Fri, May 28, 2010 at 7:34 PM, Paul Brook wrote: > >> Use a qemu_irq to request CPU exit. > > > > Needing to request a CPU exit at all is just wrong. See previous > > discussions about how any use of qemu_bh_schedule_idle is fundamentally > > broken. > > I agree for the device case. Is the attached patch then OK? No. You can't remove code without understanding why it was there in the first place. I'm pretty sure this will break FDC emulation, or at least make it unfeasibly slow. The underlying problem is that devices (in particular the FDC) don't communicate properly with the DMA engine. Instead they rely on the DMA device polling state at poorly defined intervals. Removing DMA_schedule without removing qemu_bh_schedule_idle is almost certainly wrong. My main objection to you original patch is that it introduces a new API for something that is just plain wrong. At minimum it needs a comment documenting that this function should never be used for anything - It only exists because we're too lazy to fix legacy code. > But what about other uses (with the patch applied): I was just referring to device emulation. qemu_notify_event is also pretty bogus. It is a horrible hack to workaround deficiencies in the network API. Paul
[Qemu-devel] Arm big endian?
I'm trying to get arm big endian support to work. I patched the 2.6.33 kernel to pretend that good old versatilepb can have a big endian CPU plugged into it (attached), and then I built a kernel with the attached .config, and qemu went "boing": $ qemu-system-arm -M versatilepb -nographic -no-reboot -kernel zImage-armv4eb -hda image-armv4eb.sqf -append root=/dev/sda rw init=/usr/sbin/init.sh panic=1 PATH=/usr/bin console=ttyAMA0 HOST=armv4eb -net nic,model=rtl8139 -net user qemu: hardware error: pl011_read: Bad offset 6c8 CPU #0: R00= R01=0183 R02=0100 R03= R04=101f36ca R05= R06= R07= R08=ffe4 R09= R10= R11= R12= R13= R14=00010088 R15=0001 PSR=01db A und32 Aborted It does this with both the release version I have on the host and a recent git (thursday-ish). Does this look more like a kernel error, or a qemu error? Do I need to select a different -cpu emulation? Thanks, Rob -- Latency is more important than throughput. It's that simple. - Linus Torvalds --- linux/arch/arm/mach-versatile/Kconfig 2010-05-29 18:44:21.0 -0500 +++ linx/arch/arm/mach-versatile/Kconfig 2010-05-29 19:19:38.0 -0500 @@ -12,4 +12,12 @@ help Include support for the ARM(R) Versatile/AP platform. +if ARCH_VERSATILE_PB || ARCH_VERSATILE_AB + +config ARCH_SUPPORTS_BIG_ENDIAN +bool +default y + +endif + endmenu # # Automatically generated make config: don't edit # Linux kernel version: 2.6.33 # Sat May 29 21:20:49 2010 # CONFIG_ARM=y CONFIG_SYS_SUPPORTS_APM_EMULATION=y CONFIG_GENERIC_TIME=y CONFIG_GENERIC_CLOCKEVENTS=y CONFIG_GENERIC_HARDIRQS=y CONFIG_STACKTRACE_SUPPORT=y CONFIG_HAVE_LATENCYTOP_SUPPORT=y CONFIG_LOCKDEP_SUPPORT=y CONFIG_TRACE_IRQFLAGS_SUPPORT=y CONFIG_HARDIRQS_SW_RESEND=y CONFIG_GENERIC_IRQ_PROBE=y CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_HWEIGHT=y CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y CONFIG_VECTORS_BASE=0x CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" CONFIG_CONSTRUCTORS=y # # General setup # CONFIG_EXPERIMENTAL=y CONFIG_BROKEN_ON_SMP=y CONFIG_INIT_ENV_ARG_LIMIT=32 CONFIG_LOCALVERSION="" # CONFIG_LOCALVERSION_AUTO is not set CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_LZO=y CONFIG_KERNEL_GZIP=y # CONFIG_KERNEL_BZIP2 is not set # CONFIG_KERNEL_LZMA is not set # CONFIG_KERNEL_LZO is not set CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_SYSVIPC_SYSCTL=y CONFIG_POSIX_MQUEUE=y CONFIG_POSIX_MQUEUE_SYSCTL=y # CONFIG_BSD_PROCESS_ACCT is not set # CONFIG_TASKSTATS is not set # CONFIG_AUDIT is not set # # RCU Subsystem # CONFIG_TREE_RCU=y # CONFIG_TREE_PREEMPT_RCU is not set # CONFIG_TINY_RCU is not set # CONFIG_RCU_TRACE is not set CONFIG_RCU_FANOUT=32 # CONFIG_RCU_FANOUT_EXACT is not set # CONFIG_TREE_RCU_TRACE is not set CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=17 # CONFIG_GROUP_SCHED is not set # CONFIG_CGROUPS is not set # CONFIG_SYSFS_DEPRECATED_V2 is not set # CONFIG_RELAY is not set CONFIG_NAMESPACES=y # CONFIG_UTS_NS is not set # CONFIG_IPC_NS is not set # CONFIG_USER_NS is not set # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y CONFIG_INITRAMFS_SOURCE="" CONFIG_RD_GZIP=y CONFIG_RD_BZIP2=y CONFIG_RD_LZMA=y CONFIG_RD_LZO=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_SYSCTL=y CONFIG_ANON_INODES=y # CONFIG_EMBEDDED is not set CONFIG_UID16=y CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_EXTRA_PASS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y CONFIG_ELF_CORE=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y CONFIG_SIGNALFD=y CONFIG_TIMERFD=y CONFIG_EVENTFD=y CONFIG_SHMEM=y CONFIG_AIO=y # # Kernel Performance Events And Counters # CONFIG_VM_EVENT_COUNTERS=y CONFIG_PCI_QUIRKS=y CONFIG_SLUB_DEBUG=y # CONFIG_COMPAT_BRK is not set # CONFIG_SLAB is not set CONFIG_SLUB=y # CONFIG_SLOB is not set # CONFIG_PROFILING is not set CONFIG_HAVE_OPROFILE=y CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_CLK=y # # GCOV-based kernel profiling # # CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y CONFIG_BASE_SMALL=0 # CONFIG_MODULES is not set CONFIG_BLOCK=y # CONFIG_LBDAF is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_INTEGRITY is not set # # IO Schedulers # CONFIG_IOSCHED_NOOP=y # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set # CONFIG_DEFAULT_DEADLINE is not set # CONFIG_DEFAULT_CFQ is not set CONFIG_DEFAULT_NOOP=y CONFIG_DEFAULT_IOSCHED="noop" # CONFIG_INLINE_SPIN_TRYLOCK is not set # CONFIG_INLINE_SPIN_TRYLOCK_BH is not set # CONFIG_INLINE_SPIN_LOCK is not set # CONFIG_INLINE_SPIN_LOCK_BH is not set # CONFIG_INLINE_SPIN_LOCK_IRQ is not set # CONFIG_INLINE_SPIN_LOCK_IRQSAVE is not set CONFIG_INLINE_SPIN_UNLOCK=y # CONFIG_INLINE_SPIN_UNLOCK_BH is not set CONFIG_INLINE_SPIN_UNLOCK_IRQ=y # CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE is not set # CONFIG_INLINE_READ_TRYLOCK is
[Qemu-devel] Re: [QEMU-KVM]: Megasas + TCM_Loop + SG_IO into Windows XP guests
On Tue, 2010-05-18 at 04:18 -0700, Nicholas A. Bellinger wrote: > On Tue, 2010-05-18 at 11:43 +0200, Hannes Reinecke wrote: > > Nicholas A. Bellinger wrote: > > > On Fri, 2010-05-14 at 02:42 -0700, Nicholas A. Bellinger wrote: > > > Greetings Hannes, > > > > > > So I spent some more time with XP guests this weekend, and I noticed two > > > things immediately when using hw/lsi53c895a.c instead of hw/megasas.c > > > with the same two TCM_Loop SAS LUNs via SG_IO from last week: > > > > > > 1) With lsi53c895a, XP guests are able to boot successfully w/ out the > > > synchronous SG_IO hack that is currently required to get past the first > > > 36-byte INQUIRY for megasas + XP SP2 > > > > > > 2) With lsi53c895a, XP is able to successfully create and mount a NTFS > > > filesystem, reboot, and read blocks appear to be functioning properly. > > > FYI I have not run any 'write known pattern then read-back and compare > > > blocks' data integrity tests from with in the XP guests just yet, but I > > > am confident that TCM scatterlist -> se_mem_t mapping is working as > > > expected on the KVM Host. > > > > > > Futhermore, after formatting a 5 GB TCM/FILEIO LUN with lsi53c895a, and > > > then rebooting with megasas with the same two configured TCM_Loop SG_IO > > > devices, it appears to be able to mount and read blocks successfully. > > > Attempting to write new blocks on the mounted filesystem also appears to > > > work to some degree, but throughput slows down to a crawl during XP > > > guest buffer cache flush, which is likely attributed to the use of my > > > quick SYNC SG_IO hack. > > > > > > So it appears that there are two seperate issues here, and AFAICT they > > > both look to be XP and megasas specific. For #2, it may be something > > > about the format of the incoming scatterlists generated during XP's > > > mkfs.ntfs that is causing some issues. While watching output during fs > > > creation, I noticed the following WRITE_10s with a starting 4088 byte > > > scatterlist and a trailing 8 byte scatterlist: > > > > > > megasas: writel mmio 40: 2b0b003 > > > megasas: Found mapped frame 2 context 82b0b000 pa 2b0b000 > > > megasas: Enqueue frame context 82b0b000 tail 493 busy 1 > > > megasas: LD SCSI dev 2 lun 0 sdev 0xdc0230 xfer 16384 > > > scsi-generic: Using cur_addr: 0x0ff6c008 cur_len: > > > 0x0ff8 > > > scsi-generic: Adding iovec for mem: 0x7f1783b96008 len: 0x0ff8 > > > scsi-generic: Using cur_addr: 0x0fd6e000 cur_len: > > > 0x1000 > > > scsi-generic: Adding iovec for mem: 0x7f1783998000 len: 0x1000 > > > scsi-generic: Using cur_addr: 0x0fe2f000 cur_len: > > > 0x1000 > > > scsi-generic: Adding iovec for mem: 0x7f1783a59000 len: 0x1000 > > > scsi-generic: Using cur_addr: 0x0fdf cur_len: > > > 0x1000 > > > scsi-generic: Adding iovec for mem: 0x7f1783a1a000 len: 0x1000 > > > scsi-generic: Using cur_addr: 0x0fded000 cur_len: > > > 0x0008 > > > scsi-generic: Adding iovec for mem: 0x7f1783a17000 len: 0x0008 > > > scsi-generic: execute IOV: iovec_count: 5, dxferp: 0xd92420, dxfer_len: > > > 16384 > > > scsi-generic: ---> Issuing SG_IO CDB len 10: 0x2a 00 > > > 00 00 fa be 00 00 20 00 > > > scsi-generic: scsi_write_complete() ret = 0 > > > scsi-generic: Command complete 0x0xd922c0 tag=0x82b0b000 status=0 > > > megasas: LD SCSI req 0xd922c0 cmd 0xda92c0 lun 0xdc0230 finished with > > > status 0 len 16384 > > > megasas: Complete frame context 82b0b000 tail 493 busy 0 doorbell 0 > > > > > > Also, the final READ_10 that produces the 'could not create filesystem' > > > exception is for LBA 63 and XP looking for the first FS blocks after > > > GPT. > > > > > > Could there be some breakage in megasas with a length < PAGE_SIZE for > > > the scatterlist..?As lsi53c895a seems to work OK for this case, is > > > there something about the logic of parsing the incoming struct > > > scatterlists that is different between the two HBA drivers..? AFAICT > > > both are using Gerd's common code in hw/scsi-bus.c, unless there is > > > something about megasas_map_sgl() that is causing issues with the > > > above..? > > > > > > > The usual disclaimer here: I'm less than happy with the current SCSI disk > > handling. > > Currently we have the two options: > > - Using 'scsi-disk', which will _emulate_ a SCSI disk internally, but allow > > to use > > asynchronous I/O using normal read/write syscalls > > - Using 'scsi-generic', which will allow you to pass-through any SCSI > > device, but > > disallow asynchronous I/O and requires you to use the SG_IO interface. > > Well, this is only true so far for the SYNC SG_IO patch with KVM XP > guests. The asynchronous I/O still works as expected for Linux KVM > guests for 10 Gb/sec sec throughput. > > > The latter also implies that the host will mark _all_ I/O commands as > > 'bloc
Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback
On Sat, May 29, 2010 at 08:52:34PM +, Blue Swirl wrote: > On Sat, May 29, 2010 at 4:32 PM, Gleb Natapov wrote: > > On Sat, May 29, 2010 at 04:03:22PM +, Blue Swirl wrote: > >> 2010/5/29 Gleb Natapov : > >> > On Sat, May 29, 2010 at 09:15:11AM +, Blue Swirl wrote: > >> >> >> There is no code, because we're still at architecture design stage. > >> >> >> > >> >> > Try to write test code to understand the problem better. > >> >> > >> >> I will. > >> >> > >> > Please do ASAP. This discussion shows that you don't understand what is > >> > the > >> > problem that we are dialing with. > >> > >> Which part of the problem you think I don't understand? > >> > > It seams to me you don't understand how Windows uses RTC for time > > keeping and how the QEMU solves the problem today. > > RTC causes periodic interrupts and Windows interrupt handler > increments jiffies, like Linux? > Linux does much more complicated things than that to keep time, so the only way to fix time drift in Linux was to introduce pvclock. For Window it is not so accurate too, since Windows can change clock frequency any time it can't calculate time from jiffies, it needs to update clock at each time tick. > >> >> >> >> >> guests could also be assisted with special handling (like win2k > >> >> >> >> >> install hack), for example guest instructions could be counted > >> >> >> >> >> (approximately, for example using TB size or TSC) and only > >> >> >> >> >> inject > >> >> >> >> >> after at least N instructions have passed. > >> >> >> >> > Guest instructions cannot be easily counted in KVM (it can be > >> >> >> >> > done more > >> >> >> >> > or less reliably using perf counters, may be). > >> >> >> >> > >> >> >> >> Aren't there any debug registers or perf counters, which can > >> >> >> >> generate > >> >> >> >> an interrupt after some number of instructions have been executed? > >> >> >> > Don't think debug registers have something like that and they are > >> >> >> > available for guest use anyway. Perf counters differs greatly from > >> >> >> > CPU > >> >> >> > to CPU (even between two CPUs of the same manufacturer), and we > >> >> >> > want to > >> >> >> > keep using them for profiling guests. And I don't see what problem > >> >> >> > it > >> >> >> > will solve anyway that can be solved by simple delay between irq > >> >> >> > reinjection. > >> >> >> > >> >> >> This would allow counting the executed instructions and limit it. > >> >> >> Thus > >> >> >> we could emulate a 500MHz CPU on a 2GHz CPU more accurately. > >> >> >> > >> >> > Why would you want to limit number of instruction executed by guest if > >> >> > CPU has nothing else to do anyway? The problem occurs not when we have > >> >> > spare cycles so give to a guest, but in opposite case. > >> >> > >> >> I think one problem is that the guest has executed too much compared > >> >> to what would happen with real HW with a lesser CPU. That explains the > >> >> RTC frequency reprogramming case. > >> > You think wrong. The problem is exactly opposite: the guest haven't > >> > had enough execution time between two time interrupts. I don't know what > >> > RTC frequency reprogramming case you are talking about here. > >> > >> The case you told me where N pending tick IRQs exist but the guest > >> wants to change the RTC frequency from 64Hz to 1024Hz. > >> > >> Let's make this more concrete. 1 GHz CPU, initially 100Hz RTC, so > >> 10Mcycles/tick or 10ms/tick. At T = 30Mcycles, guest wants to change > >> the frequency to 1000Hz. > >> > >> The problem for emulation is that for the same 3 ticks, there has been > >> so little execution power that the ticks have been coalesced. But > >> isn't the guest cycle count then much lower than 30Mcyc? > >> > >> Isn't it so that the guest must be above 30Mcyc to be able to want the > >> change? But if we reach that point, the problem must have not been > >> too little execution time, but too much. > >> > > Sorry I tried hard to understand what have you said above but failed. > > What do you mean "to be able to want the change"? Guest sometimes wants > > to get 64 timer interrupts per second and sometimes it wants to get 1024 > > timer interrupt per second. It wants it not as a result of time drift or > > anything. It's just how guest behaves. You seams to be to fixated on > > guest frequency change. It's just something you have to take into > > account when you reinject interrupts. > > I meant that in the scenario, the guest won't change the RTC before > 30Mcyc because of some built in determinism in the guest. At that > point, because of some reason, the change would happen. > I still don't understand what are you trying to say here. Guest changes frequency because of some even in the guest. It is totally independent of what happens in QEMUs RTC emulation. > > > >> > > >> >> > >> >> > > >> >> >> >> > >> >> >> >> >> > >> >> >> >> >> > And even if the rate did not matter, the APIC woult still > >> >> >> >> >> > have to now > >> >> >> >>
Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback
On Sat, May 29, 2010 at 09:21:14PM +, Blue Swirl wrote: > On Sat, May 29, 2010 at 4:37 PM, Gleb Natapov wrote: > > On Sat, May 29, 2010 at 04:13:22PM +, Blue Swirl wrote: > >> On Sat, May 29, 2010 at 2:46 PM, Gleb Natapov wrote: > >> > On Sat, May 29, 2010 at 09:35:30AM +, Blue Swirl wrote: > >> >> > I still don't see how the alternative is supposed to simplify our life > >> >> > or improve the efficiency of the de-coalescing workaround. It's rather > >> >> > problematic like Gleb pointed out: The de-coalescing logic needs to be > >> >> > informed about periodicity changes that can only be delivered along > >> >> > IRQs. So what to do with the backlog when the timer is stopped? > >> >> > >> >> What happens with the current design? Gleb only mentioned the > >> >> frequency change, I thought that was not so big problem. But I don't > >> >> think this case should be allowed happen at all, it can't exist on > >> >> real HW. > >> >> > >> > Hm, why it can't exist on real HW? Do simple exercise. Run WindowsXP > >> > inside QEMU, connect with gdb to QEMU process and check what frequency > >> > RTC configured with (hint: it will be 64Hz), now run video inside the > >> > guest and check frequency again (hint: it will be 1Khz). > >> > >> You missed the key word 'stopped'. If the timer is really stopped, no > >> IRQs should ever come out afterwards, just like on real HW. For the > >> emulation, this means loss of ticks which should have been delivered > >> before the change. > >> > > I haven't missed it. I describe to you reality of the situation. You want > > to change reality to be more close to what you want it to be by adding > > words to my description. > > Quoting Jan: 'So what to do with the backlog when the timer is > stopped?' I didn't add any words to your description, please be more > careful with your attributions. Why do you think I want to change the > reality? Please refer to my words when you answer to my quote. You quoted my answer to you statement: Gleb only mentioned the frequency change, I thought that was not so big problem. But I don't think this case should be allowed happen at all, it can't exist on real HW. No 'stopped' was under discussion nowhere. FWIW 'stopped' is just a case of frequency change. > > XP frequency change isn't the same case as timer being stopped. > And what is the big difference exactly? > > Please just go write code, experiment, debug > > and _then_ come here with design. > > I added some debugging to RTC, PIC and APIC. I also built a small > guest in x86 assembly to test the coalescing. However, in the tests > with this guest and others I noticed that the coalescing only happens > in some obscure conditions. So try with real guest and with real load. > > By default the APIC's delivery method for IRQs is ExtInt and > coalescing counting happens only with Fixed. This means that the guest > needs to reprogram APIC. It also looks like RTC interrupts need to be > triggered. But I didn't see both of these to happen simultaneously in > my tests with Linux and Windows guests. Of course, -rtc-td-hack flag > must be used and I also disabled HPET to be sure that RTC would be > used. > > With DEBUG_COALESCING enabled, I just get increasing numbers for > apic_irq_delivered: > apic: apic_set_irq: coalescing 67123 > apic: apic_set_irq: coalescing 67124 > apic: apic_set_irq: coalescing 67125 So have you actually used -rtc-td-hack option? I compiled head of qemu.git with DEBUG_COALESCING and run WindowsXP guest with -rtc-td-hack and I get: apic: apic_reset_irq_delivered: old coalescing 3 apic: apic_set_irq: coalescing 1 apic: apic_get_irq_delivered: returning coalescing 1 apic: apic_set_irq: coalescing 2 apic: apic_set_irq: coalescing 3 apic: apic_set_irq: coalescing 4 apic: apic_set_irq: coalescing 5 apic: apic_set_irq: coalescing 6 apic: apic_reset_irq_delivered: old coalescing 6 apic: apic_set_irq: coalescing 1 apic: apic_get_irq_delivered: returning coalescing 1 > > If the hack were active, the numbers would be close to zero (or at > least some point) because apic_reset_irq_delivered would be called, > but this does not happen. Could you specify a clear test case with > which the coalescing action could be tested? Linux or BSD based, > please. Linux don't use RTC as time source and I don't know about BSD, so no Linux or BSD test case for you, sorry. Run WindowXP standard HAL and put heavy load on the host. You can run video inside the gust to trigger coalescing more easily. > > >> But what if the guest changed the frequency very often, and between > >> changes used zero value, like 64Hz -> 0Hz -> 128Hz -> 0Hz -> 64Hz? > > Too bad, the world is not perfect. > > > > -- > > Gleb. > > -- Gleb.