This patch enhances the "-drive ,directio=on" mode with IDE drive emulation by removing the buffer used in the IDE emulation. --- block.c | 10 +++ block.h | 2 block_int.h | 1 cpu-all.h | 1 exec.c | 19 ++++++ hw/ide.c | 176 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++--- vl.c | 1 7 files changed, 204 insertions(+), 6 deletions(-)
Index: qemu/block.c =================================================================== --- qemu.orig/block.c 2007-11-27 10:49:56.000000000 +0100 +++ qemu/block.c 2007-11-27 10:49:57.000000000 +0100 @@ -752,6 +752,11 @@ void bdrv_set_translation_hint(BlockDriv bs->translation = translation; } +void bdrv_set_directio_hint(BlockDriverState *bs, int directio) +{ + bs->directio = directio; +} + void bdrv_get_geometry_hint(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs) { @@ -780,6 +785,11 @@ int bdrv_is_read_only(BlockDriverState * return bs->read_only; } +int bdrv_is_directio(BlockDriverState *bs) +{ + return bs->directio; +} + /* XXX: no longer used */ void bdrv_set_change_cb(BlockDriverState *bs, void (*change_cb)(void *opaque), void *opaque) Index: qemu/block.h =================================================================== --- qemu.orig/block.h 2007-11-27 10:49:56.000000000 +0100 +++ qemu/block.h 2007-11-27 10:49:57.000000000 +0100 @@ -112,6 +112,7 @@ void bdrv_set_geometry_hint(BlockDriverS int cyls, int heads, int secs); void bdrv_set_type_hint(BlockDriverState *bs, int type); void bdrv_set_translation_hint(BlockDriverState *bs, int translation); +void bdrv_set_directio_hint(BlockDriverState *bs, int directio); void bdrv_get_geometry_hint(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs); int bdrv_get_type_hint(BlockDriverState *bs); @@ -119,6 +120,7 @@ int bdrv_get_translation_hint(BlockDrive int bdrv_is_removable(BlockDriverState *bs); int bdrv_is_read_only(BlockDriverState *bs); int bdrv_is_inserted(BlockDriverState *bs); +int bdrv_is_directio(BlockDriverState *bs); int bdrv_media_changed(BlockDriverState *bs); int bdrv_is_locked(BlockDriverState *bs); void bdrv_set_locked(BlockDriverState *bs, int locked); Index: qemu/block_int.h =================================================================== --- qemu.orig/block_int.h 2007-11-27 10:46:43.000000000 +0100 +++ qemu/block_int.h 2007-11-27 10:49:57.000000000 +0100 @@ -118,6 +118,7 @@ struct BlockDriverState { drivers. They are not used by the block driver */ int cyls, heads, secs, translation; int type; + int directio; char device_name[32]; BlockDriverState *next; }; Index: qemu/vl.c =================================================================== --- qemu.orig/vl.c 2007-11-27 10:49:56.000000000 +0100 +++ qemu/vl.c 2007-11-27 10:49:57.000000000 +0100 @@ -5108,6 +5108,7 @@ static int drive_init(const char *str, i bdrv_flags |= BDRV_O_SNAPSHOT; if (directio) bdrv_flags |= BDRV_O_DIRECT; + bdrv_set_directio_hint(bdrv, directio); if (bdrv_open(bdrv, file, bdrv_flags) < 0 || qemu_key_check(bdrv, file)) { fprintf(stderr, "qemu: could not open disk image %s\n", file); Index: qemu/hw/ide.c =================================================================== --- qemu.orig/hw/ide.c 2007-11-27 10:49:56.000000000 +0100 +++ qemu/hw/ide.c 2007-11-27 10:49:57.000000000 +0100 @@ -816,7 +816,7 @@ static int dma_buf_rw(BMDMAState *bm, in } /* XXX: handle errors */ -static void ide_read_dma_cb(void *opaque, int ret) +static void ide_read_dma_cb_buffered(void *opaque, int ret) { BMDMAState *bm = opaque; IDEState *s = bm->ide_if; @@ -856,7 +856,86 @@ static void ide_read_dma_cb(void *opaque printf("aio_read: sector_num=%lld n=%d\n", sector_num, n); #endif bm->aiocb = bdrv_aio_read(s->bs, sector_num, s->io_buffer, n, - ide_read_dma_cb, bm); + ide_read_dma_cb_buffered, bm); +} + +static void ide_read_dma_cb_unbuffered(void *opaque, int ret) +{ + BMDMAState *bm = opaque; + IDEState *s = bm->ide_if; + int64_t sector_num; + int nsector; + int len; + uint8_t *phy_addr; + + if (s->nsector == 0) { + s->status = READY_STAT | SEEK_STAT; + ide_set_irq(s); + eot: + bm->status &= ~BM_STATUS_DMAING; + bm->status |= BM_STATUS_INT; + bm->dma_cb = NULL; + bm->ide_if = NULL; + bm->aiocb = NULL; + return; + } + + /* launch next transfer */ + + if (bm->cur_prd_len == 0) { + struct { + uint32_t addr; + uint32_t size; + } prd; + + cpu_physical_memory_read(bm->cur_addr, (uint8_t *)&prd, 8); + + bm->cur_addr += 8; + prd.addr = le32_to_cpu(prd.addr); + prd.size = le32_to_cpu(prd.size); + len = prd.size & 0xfffe; + if (len == 0) + len = 0x10000; + bm->cur_prd_addr = prd.addr; + while ((bm->cur_addr - bm->addr) < 4096) { + int tmp_len; + cpu_physical_memory_read(bm->cur_addr, (uint8_t *)&prd, 8); + prd.addr = le32_to_cpu(prd.addr); + prd.size = le32_to_cpu(prd.size); + if (bm->cur_prd_addr + len != prd.addr) + break; + tmp_len = prd.size & 0xfffe; + if (tmp_len == 0) + tmp_len = 0x10000; + len += tmp_len; + bm->cur_addr += 8; + if (prd.size & 0x80000000) + break; + } + bm->cur_prd_len = len; + } + + phy_addr = cpu_physical_page_addr(bm->cur_prd_addr); + if (phy_addr == (uint8_t *)-1) + goto eot; + + len = (s->nsector<<9); + if (len > bm->cur_prd_len) + len = bm->cur_prd_len; + + nsector = (len>>9); + bm->cur_prd_addr += (nsector<<9); + bm->cur_prd_len -= (nsector<<9); + + sector_num = ide_get_sector(s); + ide_set_sector(s, sector_num + nsector); + s->nsector-=nsector; + +#ifdef DEBUG_AIO + printf("aio_read: sector_num=%lld n=%d\n", (unsigned long long)sector_num, nsector); +#endif + bm->aiocb = bdrv_aio_read(s->bs, sector_num, phy_addr, nsector, + ide_read_dma_cb_unbuffered, bm); } static void ide_sector_read_dma(IDEState *s) @@ -864,7 +943,10 @@ static void ide_sector_read_dma(IDEState s->status = READY_STAT | SEEK_STAT | DRQ_STAT | BUSY_STAT; s->io_buffer_index = 0; s->io_buffer_size = 0; - ide_dma_start(s, ide_read_dma_cb); + if (bdrv_is_directio(s->bs)) + ide_dma_start(s, ide_read_dma_cb_unbuffered); + else + ide_dma_start(s, ide_read_dma_cb_buffered); } static void ide_sector_write_timer_cb(void *opaque) @@ -917,7 +999,7 @@ static void ide_sector_write(IDEState *s } /* XXX: handle errors */ -static void ide_write_dma_cb(void *opaque, int ret) +static void ide_write_dma_cb_buffered(void *opaque, int ret) { BMDMAState *bm = opaque; IDEState *s = bm->ide_if; @@ -958,7 +1040,86 @@ static void ide_write_dma_cb(void *opaqu printf("aio_write: sector_num=%lld n=%d\n", sector_num, n); #endif bm->aiocb = bdrv_aio_write(s->bs, sector_num, s->io_buffer, n, - ide_write_dma_cb, bm); + ide_write_dma_cb_buffered, bm); +} + +static void ide_write_dma_cb_unbuffered(void *opaque, int ret) +{ + BMDMAState *bm = opaque; + IDEState *s = bm->ide_if; + int64_t sector_num; + int nsector; + int len; + uint8_t *phy_addr; + + if (s->nsector == 0) { + s->status = READY_STAT | SEEK_STAT; + ide_set_irq(s); + eot: + bm->status &= ~BM_STATUS_DMAING; + bm->status |= BM_STATUS_INT; + bm->dma_cb = NULL; + bm->ide_if = NULL; + bm->aiocb = NULL; + return; + } + + /* launch next transfer */ + + if (bm->cur_prd_len == 0) { + struct { + uint32_t addr; + uint32_t size; + } prd; + + cpu_physical_memory_read(bm->cur_addr, (uint8_t *)&prd, 8); + + bm->cur_addr += 8; + prd.addr = le32_to_cpu(prd.addr); + prd.size = le32_to_cpu(prd.size); + len = prd.size & 0xfffe; + if (len == 0) + len = 0x10000; + bm->cur_prd_addr = prd.addr; + while ((bm->cur_addr - bm->addr) < 4096) { + int tmp_len; + cpu_physical_memory_read(bm->cur_addr, (uint8_t *)&prd, 8); + prd.addr = le32_to_cpu(prd.addr); + prd.size = le32_to_cpu(prd.size); + if (bm->cur_prd_addr + len != prd.addr) + break; + tmp_len = prd.size & 0xfffe; + if (tmp_len == 0) + tmp_len = 0x10000; + len += tmp_len; + bm->cur_addr += 8; + if (prd.size & 0x80000000) + break; + } + bm->cur_prd_len = len; + } + + phy_addr = cpu_physical_page_addr(bm->cur_prd_addr); + if (phy_addr == (uint8_t *)-1) + goto eot; + + len = (s->nsector<<9); + if (len > bm->cur_prd_len) + len = bm->cur_prd_len; + + nsector = (len>>9); + bm->cur_prd_addr += (nsector<<9); + bm->cur_prd_len -= (nsector<<9); + + sector_num = ide_get_sector(s); + ide_set_sector(s, sector_num + nsector); + s->nsector-=nsector; + +#ifdef DEBUG_AIO + printf("aio_write: sector_num=%lld n=%d\n", (unsigned long long)sector_num, nsector); +#endif + bm->aiocb = bdrv_aio_write(s->bs, sector_num, phy_addr, nsector, + ide_write_dma_cb_unbuffered, bm); } static void ide_sector_write_dma(IDEState *s) @@ -966,7 +1127,10 @@ static void ide_sector_write_dma(IDEStat s->status = READY_STAT | SEEK_STAT | DRQ_STAT | BUSY_STAT; s->io_buffer_index = 0; s->io_buffer_size = 0; - ide_dma_start(s, ide_write_dma_cb); + if (bdrv_is_directio(s->bs)) + ide_dma_start(s, ide_write_dma_cb_unbuffered); + else + ide_dma_start(s, ide_write_dma_cb_buffered); } static void ide_atapi_cmd_ok(IDEState *s) Index: qemu/exec.c =================================================================== --- qemu.orig/exec.c 2007-11-27 10:46:43.000000000 +0100 +++ qemu/exec.c 2007-11-27 10:49:57.000000000 +0100 @@ -2054,6 +2054,25 @@ void cpu_register_physical_memory(target } } +uint8_t * cpu_physical_page_addr(target_phys_addr_t addr) +{ + target_phys_addr_t page; + unsigned long pd; + PhysPageDesc *p; + unsigned long addr1; + + page = addr & TARGET_PAGE_MASK; + p = phys_page_find(page >> TARGET_PAGE_BITS); + if (!p) + return (uint8_t*)-1; + + pd = p->phys_offset; + + addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK); + + return phys_ram_base + addr1; +} + /* XXX: temporary until new memory mapping API */ uint32_t cpu_get_physical_page_desc(target_phys_addr_t addr) { Index: qemu/cpu-all.h =================================================================== --- qemu.orig/cpu-all.h 2007-11-27 10:46:58.000000000 +0100 +++ qemu/cpu-all.h 2007-11-27 10:49:57.000000000 +0100 @@ -837,6 +837,7 @@ int cpu_register_io_memory(int io_index, CPUWriteMemoryFunc **cpu_get_io_memory_write(int io_index); CPUReadMemoryFunc **cpu_get_io_memory_read(int io_index); +extern uint8_t * cpu_physical_page_addr(target_phys_addr_t addr); void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, int len, int is_write); static inline void cpu_physical_memory_read(target_phys_addr_t addr,