This looks good to me. Thanks for addressing my v3 request. I have some new remarks here. I feel *really* bad for not finding them earlier. (If you get tired of working on this series, I could pick it up and try to shepherd it further.)
On 10/01/15 14:16, Marc Marí wrote: > Based on the specifications on docs/specs/fw_cfg.txt > > This interface is an addon. The old interface can still be used as usual. > > Based on Gerd Hoffman's initial implementation. > > Signed-off-by: Marc Marí <mar...@redhat.com> > --- > hw/arm/virt.c | 2 +- > hw/nvram/fw_cfg.c | 231 > +++++++++++++++++++++++++++++++++++++++++++--- > include/hw/nvram/fw_cfg.h | 16 +++- > 3 files changed, 233 insertions(+), 16 deletions(-) > > diff --git a/hw/arm/virt.c b/hw/arm/virt.c > index d25d6cf..7ae984f 100644 > --- a/hw/arm/virt.c > +++ b/hw/arm/virt.c > @@ -683,7 +683,7 @@ static void create_fw_cfg(const VirtBoardInfo *vbi) > hwaddr size = vbi->memmap[VIRT_FW_CFG].size; > char *nodename; > > - fw_cfg_init_mem_wide(base + 8, base, 8); > + fw_cfg_init_mem_wide(base + 8, base, 8, 0, NULL); > > nodename = g_strdup_printf("/fw-cfg@%" PRIx64, base); > qemu_fdt_add_subnode(vbi->fdt, nodename); > diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c > index 658f8c4..59933b3 100644 > --- a/hw/nvram/fw_cfg.c > +++ b/hw/nvram/fw_cfg.c > @@ -23,6 +23,7 @@ > */ > #include "hw/hw.h" > #include "sysemu/sysemu.h" > +#include "sysemu/dma.h" > #include "hw/isa/isa.h" > #include "hw/nvram/fw_cfg.h" > #include "hw/sysbus.h" > @@ -30,7 +31,7 @@ > #include "qemu/error-report.h" > #include "qemu/config-file.h" > > -#define FW_CFG_SIZE 2 > +#define FW_CFG_CTL_SIZE 2 > #define FW_CFG_NAME "fw_cfg" > #define FW_CFG_PATH "/machine/" FW_CFG_NAME > > @@ -42,6 +43,16 @@ > #define FW_CFG_IO(obj) OBJECT_CHECK(FWCfgIoState, (obj), TYPE_FW_CFG_IO) > #define FW_CFG_MEM(obj) OBJECT_CHECK(FWCfgMemState, (obj), TYPE_FW_CFG_MEM) > > +/* FW_CFG_VERSION bits */ > +#define FW_CFG_VERSION 0x01 > +#define FW_CFG_VERSION_DMA 0x02 > + > +/* FW_CFG_DMA_CONTROL bits */ > +#define FW_CFG_DMA_CTL_ERROR 0x01 > +#define FW_CFG_DMA_CTL_READ 0x02 > +#define FW_CFG_DMA_CTL_SKIP 0x04 > +#define FW_CFG_DMA_CTL_SELECT 0x08 > + > typedef struct FWCfgEntry { > uint32_t len; > uint8_t *data; > @@ -59,6 +70,10 @@ struct FWCfgState { > uint16_t cur_entry; > uint32_t cur_offset; > Notifier machine_ready; > + > + bool dma_enabled; > + AddressSpace *dma_as; > + dma_addr_t dma_addr; > }; > > struct FWCfgIoState { > @@ -66,8 +81,8 @@ struct FWCfgIoState { > FWCfgState parent_obj; > /*< public >*/ > > - MemoryRegion comb_iomem; > - uint32_t iobase; > + MemoryRegion comb_iomem, dma_iomem; > + uint32_t iobase, dma_iobase; > }; > > struct FWCfgMemState { > @@ -75,7 +90,7 @@ struct FWCfgMemState { > FWCfgState parent_obj; > /*< public >*/ > > - MemoryRegion ctl_iomem, data_iomem; > + MemoryRegion ctl_iomem, data_iomem, dma_iomem; > uint32_t data_width; > MemoryRegionOps wide_data_ops; > }; (1) I *think* the new "dma_iomem" field, of type MemoryRegion, could be moved up to the parent struct FWCfgEntry, from both FWCfgMemState and FWCfgIoState. (And the references in the rest of the code could be updated.) ( Independently, some loud thinking, mostly for myself: I've always been surprised by the difference between (a) FWCfgIoState *carrying* "dma_iobase" as a field -- and a property! --, and (b) FWCfgMemState *not* carrying the same as a field -- nor as a property. I think I finally understand this difference now. It is all rooted in the difference between the internal APIs sysbus_add_io() and sysbus_init_mmio(). Both of these are called from the device realize functions, but the first (sysbus_add_io()) wants the IO port address at once, whereas the second (sysbus_init_mmio()) doesn't want the address -- the actual mapping (sysbus_mmio_map()) is delayed to board code; the device code doesn't want to be aware of it. And this ripples to the top. Because sysbus_add_io() wants the IO port address, we must pass that address to the device realize function. And for that, we need a device property -- "dma_iobase". This is not new, it just follows the example of the preexistent "iobase" field / property. Whereas, in the sysbus_init_mmio() case, we can keep the MMIO address private to the board code; the realize function need not know the address. However, the realize function does need to know the *fact* that we're going to do DMA. Given that we must maintain this fact (in "FWCfgState.dma_enabled") anyway, for other -- e.g. migration subsection -- purposes as well, it makes sense to expose that same field of the parent struct as a property, so we can set it in the memory mapped case *before* the realize function looks at it. I feel better now, thanks for listening. ) Then, > @@ -292,6 +307,119 @@ static void fw_cfg_data_mem_write(void *opaque, hwaddr > addr, > } while (i); > } > > +static void fw_cfg_dma_transfer(FWCfgState *s) > +{ > + dma_addr_t len; > + FWCfgDmaAccess dma; > + int arch; > + FWCfgEntry *e; > + int read; > + dma_addr_t dma_addr; > + > + /* Reset the address before the next access */ > + dma_addr = s->dma_addr; > + s->dma_addr = 0; > + > + dma.address = ldq_be_dma(s->dma_as, > + dma_addr + offsetof(FWCfgDmaAccess, address)); > + dma.length = ldl_be_dma(s->dma_as, > + dma_addr + offsetof(FWCfgDmaAccess, length)); > + dma.control = ldl_be_dma(s->dma_as, > + dma_addr + offsetof(FWCfgDmaAccess, control)); > + > + if (dma.control & FW_CFG_DMA_CTL_SELECT) { > + fw_cfg_select(s, dma.control >> 16); > + } > + > + arch = !!(s->cur_entry & FW_CFG_ARCH_LOCAL); > + e = &s->entries[arch][s->cur_entry & FW_CFG_ENTRY_MASK]; > + > + if (dma.control & FW_CFG_DMA_CTL_READ) { > + read = 1; > + } else if (dma.control & FW_CFG_DMA_CTL_SKIP) { > + read = 0; > + } else { > + dma.length = 0; I can see you addressed Kevin's comment here. > + } > + > + dma.control = 0; > + > + while (dma.length > 0 && !(dma.control & FW_CFG_DMA_CTL_ERROR)) { > + if (s->cur_entry == FW_CFG_INVALID || !e->data || > + s->cur_offset >= e->len) { > + len = dma.length; > + > + /* If the access is not a read access, it will be a skip access, > + * tested before. > + */ > + if (read) { > + if (dma_memory_set(s->dma_as, dma.address, 0, len)) { > + dma.control |= FW_CFG_DMA_CTL_ERROR; > + } > + } > + > + } else { > + if (dma.length <= (e->len - s->cur_offset)) { > + len = dma.length; > + } else { > + len = (e->len - s->cur_offset); > + } > + > + if (e->read_callback) { > + e->read_callback(e->callback_opaque, s->cur_offset); > + } > + > + /* If the access is not a read access, it will be a skip access, > + * tested before. > + */ > + if (read) { > + if (dma_memory_write(s->dma_as, dma.address, > + &e->data[s->cur_offset], len)) { > + dma.control |= FW_CFG_DMA_CTL_ERROR; > + } > + } > + > + s->cur_offset += len; > + } > + > + dma.address += len; > + dma.length -= len; > + > + } > + > + stl_be_dma(s->dma_as, dma_addr + offsetof(FWCfgDmaAccess, control), > + dma.control); > + > + trace_fw_cfg_read(s, 0); > +} Seems OK to me. > + > +static void fw_cfg_dma_mem_write(void *opaque, hwaddr addr, > + uint64_t value, unsigned size) > +{ > + FWCfgState *s = opaque; > + > + if (size == 4) { > + if (addr == 0) { > + /* FWCfgDmaAccess high address */ > + s->dma_addr = value << 32; > + } else if (addr == 4) { > + /* FWCfgDmaAccess low address */ > + s->dma_addr |= value; > + fw_cfg_dma_transfer(s); > + } > + } else if (size == 8 && addr == 0) { > + s->dma_addr = value; > + fw_cfg_dma_transfer(s); > + } > +} Seems to match the zeroing of s->dma_addr in fw_cfg_dma_transfer(). Good. > + > +static bool fw_cfg_dma_mem_valid(void *opaque, hwaddr addr, > + unsigned size, bool is_write) > +{ > + return is_write && ((size == 4 && (addr == 0 || addr == 4)) || > + (size == 8 && addr == 0)); > +} > + > static bool fw_cfg_data_mem_valid(void *opaque, hwaddr addr, > unsigned size, bool is_write) > { > @@ -359,6 +487,12 @@ static const MemoryRegionOps fw_cfg_comb_mem_ops = { > .valid.accepts = fw_cfg_comb_valid, > }; > > +static const MemoryRegionOps fw_cfg_dma_mem_ops = { > + .write = fw_cfg_dma_mem_write, > + .endianness = DEVICE_BIG_ENDIAN, > + .valid.accepts = fw_cfg_dma_mem_valid, > +}; (2) Okay. This is somewhat important, and *completely* non-intuitive, unfortunately. Without setting *both* .valid.max_access_size = 8, .impl.max_access_size = 8, here, the memory subsystem will split up all 8-byte wide accesses (from the guest side) to two 4-byte wide calls to fw_cfg_dma_mem_write()). Those calls do satisfy the ordering logic in fw_cfg_dma_mem_write(), but nonetheless, the lack of the above setting makes the following code in fw_cfg_dma_mem_write() dead: > + } else if (size == 8 && addr == 0) { > + s->dma_addr = value; > + fw_cfg_dma_transfer(s); > + } (I verified this claim with gdb on aarch64.) So, please initialize both of the above fields to 8. > + > static void fw_cfg_reset(DeviceState *d) > { > FWCfgState *s = FW_CFG(d); > @@ -399,6 +533,22 @@ static bool is_version_1(void *opaque, int version_id) > return version_id == 1; > } > > +static bool fw_cfg_dma_enabled(void *opaque) > +{ > + FWCfgState *s = opaque; > + > + return s->dma_enabled; > +} > + > +static VMStateDescription vmstate_fw_cfg_dma = { > + .name = "fw_cfg/dma", > + .needed = fw_cfg_dma_enabled, > + .fields = (VMStateField[]) { > + VMSTATE_UINT64(dma_addr, FWCfgState), > + VMSTATE_END_OF_LIST() > + }, > +}; Looks good to me. All fields that come from the command line (ie. management layer) need not / must not be part of the migration stream. And all data that is programmed by the guest, must. Here, "dma_addr" is the only such item. Okay. > + > static const VMStateDescription vmstate_fw_cfg = { > .name = "fw_cfg", > .version_id = 2, > @@ -408,6 +558,10 @@ static const VMStateDescription vmstate_fw_cfg = { > VMSTATE_UINT16_HACK(cur_offset, FWCfgState, is_version_1), > VMSTATE_UINT32_V(cur_offset, FWCfgState, 2), > VMSTATE_END_OF_LIST() > + }, > + .subsections = (const VMStateDescription*[]) { > + &vmstate_fw_cfg_dma, > + NULL, > } > }; > > @@ -593,7 +747,6 @@ static void fw_cfg_init1(DeviceState *dev) > qdev_init_nofail(dev); > > fw_cfg_add_bytes(s, FW_CFG_SIGNATURE, (char *)"QEMU", 4); > - fw_cfg_add_i32(s, FW_CFG_ID, 1); > fw_cfg_add_bytes(s, FW_CFG_UUID, qemu_uuid, 16); > fw_cfg_add_i16(s, FW_CFG_NOGRAPHIC, (uint16_t)(display_type == > DT_NOGRAPHIC)); > fw_cfg_add_i16(s, FW_CFG_NB_CPUS, (uint16_t)smp_cpus); This is called from fw_cfg_init_io() and fw_cfg_init_mem_wide(). The former is renamed to fw_cfg_init_io_dma() -- and gets a wrapper under the original name --, and sets FW_CFG_ID expliticly. The latter sets FW_CFG_ID expliticly. Okay. > @@ -605,25 +758,52 @@ static void fw_cfg_init1(DeviceState *dev) > qemu_add_machine_init_done_notifier(&s->machine_ready); > } > > -FWCfgState *fw_cfg_init_io(uint32_t iobase) > +FWCfgState *fw_cfg_init_io_dma(uint32_t iobase, uint32_t dma_iobase, > + AddressSpace *dma_as) > { > DeviceState *dev; > + FWCfgState *s; > + uint32_t version = FW_CFG_VERSION; > > dev = qdev_create(NULL, TYPE_FW_CFG_IO); > qdev_prop_set_uint32(dev, "iobase", iobase); > + qdev_prop_set_uint32(dev, "dma_iobase", dma_iobase); > + > fw_cfg_init1(dev); > + s = FW_CFG(dev); > + > + if (dma_as) { > + /* 64 bits for the address field */ > + s->dma_as = dma_as; > + s->dma_enabled = true; > + s->dma_addr = 0; > + > + version |= FW_CFG_VERSION_DMA; > + } > > - return FW_CFG(dev); > + fw_cfg_add_i32(s, FW_CFG_ID, version); > + > + return s; > } > > -FWCfgState *fw_cfg_init_mem_wide(hwaddr ctl_addr, hwaddr data_addr, > - uint32_t data_width) > +FWCfgState *fw_cfg_init_io(uint32_t iobase) > +{ > + return fw_cfg_init_io_dma(iobase, 0, NULL); > +} > + > +FWCfgState *fw_cfg_init_mem_wide(hwaddr ctl_addr, > + hwaddr data_addr, uint32_t data_width, > + hwaddr dma_addr, AddressSpace *dma_as) > { > DeviceState *dev; > SysBusDevice *sbd; > + FWCfgState *s; > + uint32_t version = FW_CFG_VERSION; > + bool dma_enabled = dma_addr && dma_as; > > dev = qdev_create(NULL, TYPE_FW_CFG_MEM); > qdev_prop_set_uint32(dev, "data_width", data_width); > + qdev_prop_set_bit(dev, "dma_enabled", dma_enabled); > > fw_cfg_init1(dev); > > @@ -631,13 +811,25 @@ FWCfgState *fw_cfg_init_mem_wide(hwaddr ctl_addr, > hwaddr data_addr, > sysbus_mmio_map(sbd, 0, ctl_addr); > sysbus_mmio_map(sbd, 1, data_addr); > > - return FW_CFG(dev); > + s = FW_CFG(dev); > + > + if (dma_enabled) { > + s->dma_as = dma_as; > + s->dma_addr = 0; > + sysbus_mmio_map(sbd, 2, dma_addr); > + version |= FW_CFG_VERSION_DMA; > + } > + > + fw_cfg_add_i32(s, FW_CFG_ID, version); > + > + return s; > } > > FWCfgState *fw_cfg_init_mem(hwaddr ctl_addr, hwaddr data_addr) > { > return fw_cfg_init_mem_wide(ctl_addr, data_addr, > - fw_cfg_data_mem_ops.valid.max_access_size); > + fw_cfg_data_mem_ops.valid.max_access_size, > + 0, NULL); > } > > > @@ -664,6 +856,7 @@ static const TypeInfo fw_cfg_info = { > > static Property fw_cfg_io_properties[] = { > DEFINE_PROP_UINT32("iobase", FWCfgIoState, iobase, -1), > + DEFINE_PROP_UINT32("dma_iobase", FWCfgIoState, dma_iobase, -1), > DEFINE_PROP_END_OF_LIST(), > }; > > @@ -673,8 +866,12 @@ static void fw_cfg_io_realize(DeviceState *dev, Error > **errp) > SysBusDevice *sbd = SYS_BUS_DEVICE(dev); > > memory_region_init_io(&s->comb_iomem, OBJECT(s), &fw_cfg_comb_mem_ops, > - FW_CFG(s), "fwcfg", FW_CFG_SIZE); > + FW_CFG(s), "fwcfg", FW_CFG_CTL_SIZE); > sysbus_add_io(sbd, s->iobase, &s->comb_iomem); > + > + memory_region_init_io(&s->dma_iomem, OBJECT(s), &fw_cfg_dma_mem_ops, > + FW_CFG(s), "fwcfg.dma", sizeof(dma_addr_t)); > + sysbus_add_io(sbd, s->dma_iobase, &s->dma_iomem); > } (3) Hmmmm. I think this should be made conditional. sysbus_add_io() maps the region into IO port space immediately. Callers of fw_cfg_init_io() should *not* reach sysbus_add_io(); it makes no sense to map the DMA addr register at IO port 0. (And then you can omit memory_region_init_io() as well, if dma_iobase is zero.) The rest of the code looks fine to me. Again, I apologize for sucking this much at timely reviews lately. If you fix (2) and (3) above -- optionally: (1) as well --, then you'll have my R-b. If you've lost your patience, I can pick up this series. :) Thank you Laszlo > > static void fw_cfg_io_class_init(ObjectClass *klass, void *data) > @@ -695,6 +892,8 @@ static const TypeInfo fw_cfg_io_info = { > > static Property fw_cfg_mem_properties[] = { > DEFINE_PROP_UINT32("data_width", FWCfgMemState, data_width, -1), > + DEFINE_PROP_BOOL("dma_enabled", FWCfgMemState, parent_obj.dma_enabled, > + false), > DEFINE_PROP_END_OF_LIST(), > }; > > @@ -705,7 +904,7 @@ static void fw_cfg_mem_realize(DeviceState *dev, Error > **errp) > const MemoryRegionOps *data_ops = &fw_cfg_data_mem_ops; > > memory_region_init_io(&s->ctl_iomem, OBJECT(s), &fw_cfg_ctl_mem_ops, > - FW_CFG(s), "fwcfg.ctl", FW_CFG_SIZE); > + FW_CFG(s), "fwcfg.ctl", FW_CFG_CTL_SIZE); > sysbus_init_mmio(sbd, &s->ctl_iomem); > > if (s->data_width > data_ops->valid.max_access_size) { > @@ -723,6 +922,12 @@ static void fw_cfg_mem_realize(DeviceState *dev, Error > **errp) > memory_region_init_io(&s->data_iomem, OBJECT(s), data_ops, FW_CFG(s), > "fwcfg.data", data_ops->valid.max_access_size); > sysbus_init_mmio(sbd, &s->data_iomem); > + > + if (FW_CFG(s)->dma_enabled) { > + memory_region_init_io(&s->dma_iomem, OBJECT(s), &fw_cfg_dma_mem_ops, > + FW_CFG(s), "fwcfg.dma", sizeof(dma_addr_t)); > + sysbus_init_mmio(sbd, &s->dma_iomem); > + } > } > > static void fw_cfg_mem_class_init(ObjectClass *klass, void *data) > diff --git a/include/hw/nvram/fw_cfg.h b/include/hw/nvram/fw_cfg.h > index e60d3ca..ee0cd8a 100644 > --- a/include/hw/nvram/fw_cfg.h > +++ b/include/hw/nvram/fw_cfg.h > @@ -61,6 +61,15 @@ typedef struct FWCfgFiles { > FWCfgFile f[]; > } FWCfgFiles; > > +/* Control as first field allows for different structures selected by this > + * field, which might be useful in the future > + */ > +typedef struct FWCfgDmaAccess { > + uint32_t control; > + uint32_t length; > + uint64_t address; > +} QEMU_PACKED FWCfgDmaAccess; > + > typedef void (*FWCfgCallback)(void *opaque, uint8_t *data); > typedef void (*FWCfgReadCallback)(void *opaque, uint32_t offset); > > @@ -77,10 +86,13 @@ void fw_cfg_add_file_callback(FWCfgState *s, const char > *filename, > void *data, size_t len); > void *fw_cfg_modify_file(FWCfgState *s, const char *filename, void *data, > size_t len); > +FWCfgState *fw_cfg_init_io_dma(uint32_t iobase, uint32_t dma_iobase, > + AddressSpace *dma_as); > FWCfgState *fw_cfg_init_io(uint32_t iobase); > FWCfgState *fw_cfg_init_mem(hwaddr ctl_addr, hwaddr data_addr); > -FWCfgState *fw_cfg_init_mem_wide(hwaddr ctl_addr, hwaddr data_addr, > - uint32_t data_width); > +FWCfgState *fw_cfg_init_mem_wide(hwaddr ctl_addr, > + hwaddr data_addr, uint32_t data_width, > + hwaddr dma_addr, AddressSpace *dma_as); > > FWCfgState *fw_cfg_find(void); > >