On Thu, 1 Oct 2015 16:36:07 +0200 Laszlo Ersek <ler...@redhat.com> wrote:
> This looks good to me. Thanks for addressing my v3 request. > > I have some new remarks here. I feel *really* bad for not finding them > earlier. (If you get tired of working on this series, I could pick it > up and try to shepherd it further.) I'll continue with it, don't worry. Thanks for your comments. Marc > > On 10/01/15 14:16, Marc Marí wrote: > > Based on the specifications on docs/specs/fw_cfg.txt > > > > This interface is an addon. The old interface can still be used as > > usual. > > > > Based on Gerd Hoffman's initial implementation. > > > > Signed-off-by: Marc Marí <mar...@redhat.com> > > --- > > hw/arm/virt.c | 2 +- > > hw/nvram/fw_cfg.c | 231 > > +++++++++++++++++++++++++++++++++++++++++++--- > > include/hw/nvram/fw_cfg.h | 16 +++- 3 files changed, 233 > > insertions(+), 16 deletions(-) > > > > diff --git a/hw/arm/virt.c b/hw/arm/virt.c > > index d25d6cf..7ae984f 100644 > > --- a/hw/arm/virt.c > > +++ b/hw/arm/virt.c > > @@ -683,7 +683,7 @@ static void create_fw_cfg(const VirtBoardInfo > > *vbi) hwaddr size = vbi->memmap[VIRT_FW_CFG].size; > > char *nodename; > > > > - fw_cfg_init_mem_wide(base + 8, base, 8); > > + fw_cfg_init_mem_wide(base + 8, base, 8, 0, NULL); > > > > nodename = g_strdup_printf("/fw-cfg@%" PRIx64, base); > > qemu_fdt_add_subnode(vbi->fdt, nodename); > > diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c > > index 658f8c4..59933b3 100644 > > --- a/hw/nvram/fw_cfg.c > > +++ b/hw/nvram/fw_cfg.c > > @@ -23,6 +23,7 @@ > > */ > > #include "hw/hw.h" > > #include "sysemu/sysemu.h" > > +#include "sysemu/dma.h" > > #include "hw/isa/isa.h" > > #include "hw/nvram/fw_cfg.h" > > #include "hw/sysbus.h" > > @@ -30,7 +31,7 @@ > > #include "qemu/error-report.h" > > #include "qemu/config-file.h" > > > > -#define FW_CFG_SIZE 2 > > +#define FW_CFG_CTL_SIZE 2 > > #define FW_CFG_NAME "fw_cfg" > > #define FW_CFG_PATH "/machine/" FW_CFG_NAME > > > > @@ -42,6 +43,16 @@ > > #define FW_CFG_IO(obj) OBJECT_CHECK(FWCfgIoState, (obj), > > TYPE_FW_CFG_IO) #define FW_CFG_MEM(obj) OBJECT_CHECK(FWCfgMemState, > > (obj), TYPE_FW_CFG_MEM) > > +/* FW_CFG_VERSION bits */ > > +#define FW_CFG_VERSION 0x01 > > +#define FW_CFG_VERSION_DMA 0x02 > > + > > +/* FW_CFG_DMA_CONTROL bits */ > > +#define FW_CFG_DMA_CTL_ERROR 0x01 > > +#define FW_CFG_DMA_CTL_READ 0x02 > > +#define FW_CFG_DMA_CTL_SKIP 0x04 > > +#define FW_CFG_DMA_CTL_SELECT 0x08 > > + > > typedef struct FWCfgEntry { > > uint32_t len; > > uint8_t *data; > > @@ -59,6 +70,10 @@ struct FWCfgState { > > uint16_t cur_entry; > > uint32_t cur_offset; > > Notifier machine_ready; > > + > > + bool dma_enabled; > > + AddressSpace *dma_as; > > + dma_addr_t dma_addr; > > }; > > > > struct FWCfgIoState { > > @@ -66,8 +81,8 @@ struct FWCfgIoState { > > FWCfgState parent_obj; > > /*< public >*/ > > > > - MemoryRegion comb_iomem; > > - uint32_t iobase; > > + MemoryRegion comb_iomem, dma_iomem; > > + uint32_t iobase, dma_iobase; > > }; > > > > struct FWCfgMemState { > > @@ -75,7 +90,7 @@ struct FWCfgMemState { > > FWCfgState parent_obj; > > /*< public >*/ > > > > - MemoryRegion ctl_iomem, data_iomem; > > + MemoryRegion ctl_iomem, data_iomem, dma_iomem; > > uint32_t data_width; > > MemoryRegionOps wide_data_ops; > > }; > > (1) I *think* the new "dma_iomem" field, of type MemoryRegion, could > be moved up to the parent struct FWCfgEntry, from both FWCfgMemState > and FWCfgIoState. (And the references in the rest of the code could > be updated.) > > ( > > Independently, some loud thinking, mostly for myself: I've always been > surprised by the difference between (a) FWCfgIoState *carrying* > "dma_iobase" as a field -- and a property! --, and (b) FWCfgMemState > *not* carrying the same as a field -- nor as a property. > > I think I finally understand this difference now. It is all rooted in > the difference between the internal APIs sysbus_add_io() and > sysbus_init_mmio(). Both of these are called from the device realize > functions, but the first (sysbus_add_io()) wants the IO port address > at once, whereas the second (sysbus_init_mmio()) doesn't want the > address -- the actual mapping (sysbus_mmio_map()) is delayed to board > code; the device code doesn't want to be aware of it. > > And this ripples to the top. Because sysbus_add_io() wants the IO port > address, we must pass that address to the device realize function. And > for that, we need a device property -- "dma_iobase". This is not new, > it just follows the example of the preexistent "iobase" field / > property. > > Whereas, in the sysbus_init_mmio() case, we can keep the MMIO address > private to the board code; the realize function need not know the > address. However, the realize function does need to know the *fact* > that we're going to do DMA. Given that we must maintain this fact (in > "FWCfgState.dma_enabled") anyway, for other -- e.g. migration > subsection -- purposes as well, it makes sense to expose that same > field of the parent struct as a property, so we can set it in the > memory mapped case *before* the realize function looks at it. > > I feel better now, thanks for listening. > > ) > > Then, > > > @@ -292,6 +307,119 @@ static void fw_cfg_data_mem_write(void > > *opaque, hwaddr addr, } while (i); > > } > > > > +static void fw_cfg_dma_transfer(FWCfgState *s) > > +{ > > + dma_addr_t len; > > + FWCfgDmaAccess dma; > > + int arch; > > + FWCfgEntry *e; > > + int read; > > + dma_addr_t dma_addr; > > + > > + /* Reset the address before the next access */ > > + dma_addr = s->dma_addr; > > + s->dma_addr = 0; > > + > > + dma.address = ldq_be_dma(s->dma_as, > > + dma_addr + offsetof(FWCfgDmaAccess, > > address)); > > + dma.length = ldl_be_dma(s->dma_as, > > + dma_addr + offsetof(FWCfgDmaAccess, > > length)); > > + dma.control = ldl_be_dma(s->dma_as, > > + dma_addr + offsetof(FWCfgDmaAccess, > > control)); + > > + if (dma.control & FW_CFG_DMA_CTL_SELECT) { > > + fw_cfg_select(s, dma.control >> 16); > > + } > > + > > + arch = !!(s->cur_entry & FW_CFG_ARCH_LOCAL); > > + e = &s->entries[arch][s->cur_entry & FW_CFG_ENTRY_MASK]; > > + > > + if (dma.control & FW_CFG_DMA_CTL_READ) { > > + read = 1; > > + } else if (dma.control & FW_CFG_DMA_CTL_SKIP) { > > + read = 0; > > + } else { > > + dma.length = 0; > > I can see you addressed Kevin's comment here. > > > > + } > > + > > + dma.control = 0; > > + > > + while (dma.length > 0 && !(dma.control & > > FW_CFG_DMA_CTL_ERROR)) { > > + if (s->cur_entry == FW_CFG_INVALID || !e->data || > > + s->cur_offset >= e->len) { > > + len = dma.length; > > + > > + /* If the access is not a read access, it will be a > > skip access, > > + * tested before. > > + */ > > + if (read) { > > + if (dma_memory_set(s->dma_as, dma.address, 0, > > len)) { > > + dma.control |= FW_CFG_DMA_CTL_ERROR; > > + } > > + } > > + > > + } else { > > + if (dma.length <= (e->len - s->cur_offset)) { > > + len = dma.length; > > + } else { > > + len = (e->len - s->cur_offset); > > + } > > + > > + if (e->read_callback) { > > + e->read_callback(e->callback_opaque, > > s->cur_offset); > > + } > > + > > + /* If the access is not a read access, it will be a > > skip access, > > + * tested before. > > + */ > > + if (read) { > > + if (dma_memory_write(s->dma_as, dma.address, > > + &e->data[s->cur_offset], len)) > > { > > + dma.control |= FW_CFG_DMA_CTL_ERROR; > > + } > > + } > > + > > + s->cur_offset += len; > > + } > > + > > + dma.address += len; > > + dma.length -= len; > > + > > + } > > + > > + stl_be_dma(s->dma_as, dma_addr + offsetof(FWCfgDmaAccess, > > control), > > + dma.control); > > + > > + trace_fw_cfg_read(s, 0); > > +} > > Seems OK to me. > > > + > > +static void fw_cfg_dma_mem_write(void *opaque, hwaddr addr, > > + uint64_t value, unsigned size) > > +{ > > + FWCfgState *s = opaque; > > + > > + if (size == 4) { > > + if (addr == 0) { > > + /* FWCfgDmaAccess high address */ > > + s->dma_addr = value << 32; > > + } else if (addr == 4) { > > + /* FWCfgDmaAccess low address */ > > + s->dma_addr |= value; > > + fw_cfg_dma_transfer(s); > > + } > > + } else if (size == 8 && addr == 0) { > > + s->dma_addr = value; > > + fw_cfg_dma_transfer(s); > > + } > > +} > > Seems to match the zeroing of s->dma_addr in fw_cfg_dma_transfer(). > Good. > > > + > > +static bool fw_cfg_dma_mem_valid(void *opaque, hwaddr addr, > > + unsigned size, bool is_write) > > +{ > > + return is_write && ((size == 4 && (addr == 0 || addr == 4)) || > > + (size == 8 && addr == 0)); > > +} > > + > > static bool fw_cfg_data_mem_valid(void *opaque, hwaddr addr, > > unsigned size, bool is_write) > > { > > @@ -359,6 +487,12 @@ static const MemoryRegionOps > > fw_cfg_comb_mem_ops = { .valid.accepts = fw_cfg_comb_valid, > > }; > > > > +static const MemoryRegionOps fw_cfg_dma_mem_ops = { > > + .write = fw_cfg_dma_mem_write, > > + .endianness = DEVICE_BIG_ENDIAN, > > + .valid.accepts = fw_cfg_dma_mem_valid, > > +}; > > (2) Okay. This is somewhat important, and *completely* non-intuitive, > unfortunately. > > Without setting *both* > > .valid.max_access_size = 8, > .impl.max_access_size = 8, > > here, the memory subsystem will split up all 8-byte wide accesses > (from the guest side) to two 4-byte wide calls to > fw_cfg_dma_mem_write()). > > Those calls do satisfy the ordering logic in fw_cfg_dma_mem_write(), > but nonetheless, the lack of the above setting makes the following > code in fw_cfg_dma_mem_write() dead: > > > + } else if (size == 8 && addr == 0) { > > + s->dma_addr = value; > > + fw_cfg_dma_transfer(s); > > + } > > (I verified this claim with gdb on aarch64.) > > So, please initialize both of the above fields to 8. > > > + > > static void fw_cfg_reset(DeviceState *d) > > { > > FWCfgState *s = FW_CFG(d); > > @@ -399,6 +533,22 @@ static bool is_version_1(void *opaque, int > > version_id) return version_id == 1; > > } > > > > +static bool fw_cfg_dma_enabled(void *opaque) > > +{ > > + FWCfgState *s = opaque; > > + > > + return s->dma_enabled; > > +} > > + > > +static VMStateDescription vmstate_fw_cfg_dma = { > > + .name = "fw_cfg/dma", > > + .needed = fw_cfg_dma_enabled, > > + .fields = (VMStateField[]) { > > + VMSTATE_UINT64(dma_addr, FWCfgState), > > + VMSTATE_END_OF_LIST() > > + }, > > +}; > > Looks good to me. All fields that come from the command line (ie. > management layer) need not / must not be part of the migration stream. > And all data that is programmed by the guest, must. Here, "dma_addr" > is the only such item. Okay. > > > + > > static const VMStateDescription vmstate_fw_cfg = { > > .name = "fw_cfg", > > .version_id = 2, > > @@ -408,6 +558,10 @@ static const VMStateDescription vmstate_fw_cfg > > = { VMSTATE_UINT16_HACK(cur_offset, FWCfgState, is_version_1), > > VMSTATE_UINT32_V(cur_offset, FWCfgState, 2), > > VMSTATE_END_OF_LIST() > > + }, > > + .subsections = (const VMStateDescription*[]) { > > + &vmstate_fw_cfg_dma, > > + NULL, > > } > > }; > > > > @@ -593,7 +747,6 @@ static void fw_cfg_init1(DeviceState *dev) > > qdev_init_nofail(dev); > > > > fw_cfg_add_bytes(s, FW_CFG_SIGNATURE, (char *)"QEMU", 4); > > - fw_cfg_add_i32(s, FW_CFG_ID, 1); > > fw_cfg_add_bytes(s, FW_CFG_UUID, qemu_uuid, 16); > > fw_cfg_add_i16(s, FW_CFG_NOGRAPHIC, (uint16_t)(display_type == > > DT_NOGRAPHIC)); fw_cfg_add_i16(s, FW_CFG_NB_CPUS, > > (uint16_t)smp_cpus); > > This is called from fw_cfg_init_io() and fw_cfg_init_mem_wide(). > > The former is renamed to fw_cfg_init_io_dma() -- and gets a wrapper > under the original name --, and sets FW_CFG_ID expliticly. > > The latter sets FW_CFG_ID expliticly. > > Okay. > > > > @@ -605,25 +758,52 @@ static void fw_cfg_init1(DeviceState *dev) > > qemu_add_machine_init_done_notifier(&s->machine_ready); > > } > > > > -FWCfgState *fw_cfg_init_io(uint32_t iobase) > > +FWCfgState *fw_cfg_init_io_dma(uint32_t iobase, uint32_t > > dma_iobase, > > + AddressSpace *dma_as) > > { > > DeviceState *dev; > > + FWCfgState *s; > > + uint32_t version = FW_CFG_VERSION; > > > > dev = qdev_create(NULL, TYPE_FW_CFG_IO); > > qdev_prop_set_uint32(dev, "iobase", iobase); > > + qdev_prop_set_uint32(dev, "dma_iobase", dma_iobase); > > + > > fw_cfg_init1(dev); > > + s = FW_CFG(dev); > > + > > + if (dma_as) { > > + /* 64 bits for the address field */ > > + s->dma_as = dma_as; > > + s->dma_enabled = true; > > + s->dma_addr = 0; > > + > > + version |= FW_CFG_VERSION_DMA; > > + } > > > > - return FW_CFG(dev); > > + fw_cfg_add_i32(s, FW_CFG_ID, version); > > + > > + return s; > > } > > > > -FWCfgState *fw_cfg_init_mem_wide(hwaddr ctl_addr, hwaddr data_addr, > > - uint32_t data_width) > > +FWCfgState *fw_cfg_init_io(uint32_t iobase) > > +{ > > + return fw_cfg_init_io_dma(iobase, 0, NULL); > > +} > > + > > +FWCfgState *fw_cfg_init_mem_wide(hwaddr ctl_addr, > > + hwaddr data_addr, uint32_t > > data_width, > > + hwaddr dma_addr, AddressSpace > > *dma_as) { > > DeviceState *dev; > > SysBusDevice *sbd; > > + FWCfgState *s; > > + uint32_t version = FW_CFG_VERSION; > > + bool dma_enabled = dma_addr && dma_as; > > > > dev = qdev_create(NULL, TYPE_FW_CFG_MEM); > > qdev_prop_set_uint32(dev, "data_width", data_width); > > + qdev_prop_set_bit(dev, "dma_enabled", dma_enabled); > > > > fw_cfg_init1(dev); > > > > @@ -631,13 +811,25 @@ FWCfgState *fw_cfg_init_mem_wide(hwaddr > > ctl_addr, hwaddr data_addr, sysbus_mmio_map(sbd, 0, ctl_addr); > > sysbus_mmio_map(sbd, 1, data_addr); > > > > - return FW_CFG(dev); > > + s = FW_CFG(dev); > > + > > + if (dma_enabled) { > > + s->dma_as = dma_as; > > + s->dma_addr = 0; > > + sysbus_mmio_map(sbd, 2, dma_addr); > > + version |= FW_CFG_VERSION_DMA; > > + } > > + > > + fw_cfg_add_i32(s, FW_CFG_ID, version); > > + > > + return s; > > } > > > > FWCfgState *fw_cfg_init_mem(hwaddr ctl_addr, hwaddr data_addr) > > { > > return fw_cfg_init_mem_wide(ctl_addr, data_addr, > > - > > fw_cfg_data_mem_ops.valid.max_access_size); > > + > > fw_cfg_data_mem_ops.valid.max_access_size, > > + 0, NULL); > > } > > > > > > @@ -664,6 +856,7 @@ static const TypeInfo fw_cfg_info = { > > > > static Property fw_cfg_io_properties[] = { > > DEFINE_PROP_UINT32("iobase", FWCfgIoState, iobase, -1), > > + DEFINE_PROP_UINT32("dma_iobase", FWCfgIoState, dma_iobase, -1), > > DEFINE_PROP_END_OF_LIST(), > > }; > > > > @@ -673,8 +866,12 @@ static void fw_cfg_io_realize(DeviceState > > *dev, Error **errp) SysBusDevice *sbd = SYS_BUS_DEVICE(dev); > > > > memory_region_init_io(&s->comb_iomem, OBJECT(s), > > &fw_cfg_comb_mem_ops, > > - FW_CFG(s), "fwcfg", FW_CFG_SIZE); > > + FW_CFG(s), "fwcfg", FW_CFG_CTL_SIZE); > > sysbus_add_io(sbd, s->iobase, &s->comb_iomem); > > + > > + memory_region_init_io(&s->dma_iomem, OBJECT(s), > > &fw_cfg_dma_mem_ops, > > + FW_CFG(s), "fwcfg.dma", > > sizeof(dma_addr_t)); > > + sysbus_add_io(sbd, s->dma_iobase, &s->dma_iomem); > > } > > (3) Hmmmm. I think this should be made conditional. sysbus_add_io() > maps the region into IO port space immediately. Callers of > fw_cfg_init_io() should *not* reach sysbus_add_io(); it makes no > sense to map the DMA addr register at IO port 0. > > (And then you can omit memory_region_init_io() as well, if dma_iobase > is zero.) > > The rest of the code looks fine to me. > > Again, I apologize for sucking this much at timely reviews lately. If > you fix (2) and (3) above -- optionally: (1) as well --, then you'll > have my R-b. > > If you've lost your patience, I can pick up this series. :) > > Thank you > Laszlo > > > > > > static void fw_cfg_io_class_init(ObjectClass *klass, void *data) > > @@ -695,6 +892,8 @@ static const TypeInfo fw_cfg_io_info = { > > > > static Property fw_cfg_mem_properties[] = { > > DEFINE_PROP_UINT32("data_width", FWCfgMemState, data_width, > > -1), > > + DEFINE_PROP_BOOL("dma_enabled", FWCfgMemState, > > parent_obj.dma_enabled, > > + false), > > DEFINE_PROP_END_OF_LIST(), > > }; > > > > @@ -705,7 +904,7 @@ static void fw_cfg_mem_realize(DeviceState > > *dev, Error **errp) const MemoryRegionOps *data_ops = > > &fw_cfg_data_mem_ops; > > memory_region_init_io(&s->ctl_iomem, OBJECT(s), > > &fw_cfg_ctl_mem_ops, > > - FW_CFG(s), "fwcfg.ctl", FW_CFG_SIZE); > > + FW_CFG(s), "fwcfg.ctl", FW_CFG_CTL_SIZE); > > sysbus_init_mmio(sbd, &s->ctl_iomem); > > > > if (s->data_width > data_ops->valid.max_access_size) { > > @@ -723,6 +922,12 @@ static void fw_cfg_mem_realize(DeviceState > > *dev, Error **errp) memory_region_init_io(&s->data_iomem, > > OBJECT(s), data_ops, FW_CFG(s), "fwcfg.data", > > data_ops->valid.max_access_size); sysbus_init_mmio(sbd, > > &s->data_iomem); + > > + if (FW_CFG(s)->dma_enabled) { > > + memory_region_init_io(&s->dma_iomem, OBJECT(s), > > &fw_cfg_dma_mem_ops, > > + FW_CFG(s), "fwcfg.dma", > > sizeof(dma_addr_t)); > > + sysbus_init_mmio(sbd, &s->dma_iomem); > > + } > > } > > > > static void fw_cfg_mem_class_init(ObjectClass *klass, void *data) > > diff --git a/include/hw/nvram/fw_cfg.h b/include/hw/nvram/fw_cfg.h > > index e60d3ca..ee0cd8a 100644 > > --- a/include/hw/nvram/fw_cfg.h > > +++ b/include/hw/nvram/fw_cfg.h > > @@ -61,6 +61,15 @@ typedef struct FWCfgFiles { > > FWCfgFile f[]; > > } FWCfgFiles; > > > > +/* Control as first field allows for different structures selected > > by this > > + * field, which might be useful in the future > > + */ > > +typedef struct FWCfgDmaAccess { > > + uint32_t control; > > + uint32_t length; > > + uint64_t address; > > +} QEMU_PACKED FWCfgDmaAccess; > > + > > typedef void (*FWCfgCallback)(void *opaque, uint8_t *data); > > typedef void (*FWCfgReadCallback)(void *opaque, uint32_t offset); > > > > @@ -77,10 +86,13 @@ void fw_cfg_add_file_callback(FWCfgState *s, > > const char *filename, void *data, size_t len); > > void *fw_cfg_modify_file(FWCfgState *s, const char *filename, void > > *data, size_t len); > > +FWCfgState *fw_cfg_init_io_dma(uint32_t iobase, uint32_t > > dma_iobase, > > + AddressSpace *dma_as); > > FWCfgState *fw_cfg_init_io(uint32_t iobase); > > FWCfgState *fw_cfg_init_mem(hwaddr ctl_addr, hwaddr data_addr); > > -FWCfgState *fw_cfg_init_mem_wide(hwaddr ctl_addr, hwaddr data_addr, > > - uint32_t data_width); > > +FWCfgState *fw_cfg_init_mem_wide(hwaddr ctl_addr, > > + hwaddr data_addr, uint32_t > > data_width, > > + hwaddr dma_addr, AddressSpace > > *dma_as); > > FWCfgState *fw_cfg_find(void); > > > > >