On Mon, Sep 06, 2010 at 04:46:21PM +0900, Isaku Yamahata wrote: > implemented msi support functions. > > Signed-off-by: Isaku Yamahata <yamah...@valinux.co.jp>
Good stuff. Below I suggest a couple of minor correctness issues, and a couple of API changes. Below are also some suggestions to improve readability a bit: mostly there are too many one-liners used only once or twice, with names not informative enough to guess what they do, so I suggest we opencode them as one ends up reading the source anyway. Thanks! > --- > Makefile.objs | 2 +- > hw/msi.c | 362 > +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ > hw/msi.h | 41 +++++++ > hw/pci.h | 5 + > 4 files changed, 409 insertions(+), 1 deletions(-) > create mode 100644 hw/msi.c > create mode 100644 hw/msi.h > > diff --git a/Makefile.objs b/Makefile.objs > index 594894b..5f5a4c5 100644 > --- a/Makefile.objs > +++ b/Makefile.objs > @@ -186,7 +186,7 @@ hw-obj-$(CONFIG_PIIX4) += piix4.o > # PCI watchdog devices > hw-obj-y += wdt_i6300esb.o > > -hw-obj-y += msix.o > +hw-obj-y += msix.o msi.o > > # PCI network cards > hw-obj-y += ne2000.o > diff --git a/hw/msi.c b/hw/msi.c > new file mode 100644 > index 0000000..dbe89ee > --- /dev/null > +++ b/hw/msi.c > @@ -0,0 +1,362 @@ > +/* > + * msi.c > + * > + * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp> > + * VA Linux Systems Japan K.K. > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, or > + * (at your option) any later version. > + > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + > + * You should have received a copy of the GNU General Public License along > + * with this program; if not, see <http://www.gnu.org/licenses/>. > + */ > + > +#include "msi.h" > + > +#define PCI_MSI_PENDING_32 0x10 > +#define PCI_MSI_PENDING_64 0x14 > + Longer term, pls try to add this to linux, btw. If you manage to we'll then move this to pci_regs.h > +/* PCI_MSI_FLAGS */ > +#define PCI_MSI_FLAGS_QSIZE_SHIFT 4 > +#define PCI_MSI_FLAGS_QMASK_SHIFT 1 Use ffs on macros from pci_regs.h : PCI_MSI_FLAGS_QSIZE_SHIFT -> (ffs(PCI_MSI_FLAGS_QSIZE) - 1) as there's a single user, we don't need a macro then. > + > +/* PCI_MSI_ADDRESS_LO */ > +#define PCI_MSI_ADDRESS_LO_RESERVED_MASK 0x3 Better to have the valid bits than invalid bits: #define PCI_MSI_ADDRESS_LO_MASK (~0x3) And then use directly. again, we can try adding this to linux long term. > + > +#define PCI_MSI_32_SIZEOF 0x0a > +#define PCI_MSI_64_SIZEOF 0x0e > +#define PCI_MSI_32M_SIZEOF 0x14 > +#define PCI_MSI_64M_SIZEOF 0x18 Note to self: if we get rid of cap allocator, we won't need the above. > + > +#define MSI_DEBUG > +#ifdef MSI_DEBUG > +# define MSI_DPRINTF(fmt, ...) \ > + fprintf(stderr, "%s:%d " fmt, __func__, __LINE__, ## __VA_ARGS__) > +#else > +# define MSI_DPRINTF(fmt, ...) do { } while (0) > +#endif > +#define MSI_DEV_PRINTF(dev, fmt, ...) \ > + MSI_DPRINTF("%s:%x " fmt, (dev)->name, (dev)->devfn, ## __VA_ARGS__) > + > +static inline bool msi_test_bit(uint32_t bitmap, uint8_t bit) > +{ > + return bitmap & (1 << bit); > +} > + > +static inline void msi_set_bit(uint32_t *bitmap, uint8_t bit) > +{ > + *bitmap |= (1 << bit); > +} > + > +static inline void msi_clear_bit(uint32_t *bitmap, uint8_t bit) > +{ > + *bitmap &= ~(1 << bit); > +} The above 3 are much better open-coded - all they do is simple math. Also, I think we should use 1U << - the above is signed and so it will cause integer overflow when bit is 31. > + > + Two empty lines -> one > +/* multiple vector only suport power of 2 and up to 32 */ > +static inline uint8_t ilog2(uint8_t nr_vector) > +{ > + switch (nr_vector) { > + case 1: > + return 0; > + case 2: > + return 1; > + case 4: > + return 2; > + case 8: > + return 3; > + case 16: > + return 4; > + case 32: > + return 5; > + default: > + abort(); Not intuitive to abort in a mathematical function IMO. > + break; > + } > + return 0; > +} > + Please just use ffs opencoded, there's a single user. ilog -> ffs(n) - 1; and do this after range-checking as suggested below. > +static inline bool is_mask_bit_support(uint16_t control) > +{ > + return control & PCI_MSI_FLAGS_MASKBIT; > +} > + > +static inline bool is_64bit_address(uint16_t control) > +{ > + return control & PCI_MSI_FLAGS_64BIT; > +} > + These 2 function names are especially confusing. The above 2 are better open-coded. > +static inline uint8_t msi_vector_order(uint16_t control) > +{ > + return (control & PCI_MSI_FLAGS_QSIZE) >> PCI_MSI_FLAGS_QSIZE_SHIFT; > +} > + above only used once? better opencoded > +static inline uint8_t msi_nr_vector(uint16_t control) > +{ > + return 1 << msi_vector_order(control); > +} > + Rename msi_nr_vectors. > +static inline bool msi_control_enabled(uint16_t control) > +{ > + return control & PCI_MSI_FLAGS_ENABLE; > +} > + above is better opencoded. > +static inline uint8_t msi_cap_sizeof(uint16_t control) > +{ > + switch (control & (PCI_MSI_FLAGS_MASKBIT | PCI_MSI_FLAGS_64BIT)) { > + case PCI_MSI_FLAGS_MASKBIT | PCI_MSI_FLAGS_64BIT: > + return PCI_MSI_64M_SIZEOF; > + case PCI_MSI_FLAGS_64BIT: > + return PCI_MSI_64_SIZEOF; > + case PCI_MSI_FLAGS_MASKBIT: > + return PCI_MSI_32M_SIZEOF; > + case 0: > + return PCI_MSI_32_SIZEOF; > + default: > + abort(); > + break; > + } > + return 0; > +} > + Note to self: this will go away too if we get rid of cap allocator. > +static inline uint16_t msi_control_reg(const PCIDevice* dev) > +{ > + return dev->msi_cap + PCI_MSI_FLAGS; > +} > + > +static inline uint32_t msi_lower_address_reg(const PCIDevice* dev) > +{ > + return dev->msi_cap + PCI_MSI_ADDRESS_LO; > +} > + > +static inline uint32_t msi_upper_address_reg(const PCIDevice* dev) > +{ > + return dev->msi_cap + PCI_MSI_ADDRESS_HI; > +} > + I think above are better open-coded - a bit less sure here as I understand you want consistency between registers. In any case, I think names should be consistent with macros and be explicit in that they return an offset: e.g. msi_lower_address_reg -> msi_address_lo_off Also, return uint8_t and not uint32_t: these registers always live in a traditional config space. > +static inline uint16_t msi_data_reg(const PCIDevice* dev, bool is64bit) msi64bit would be a better name than is64bit > +{ > + return dev->msi_cap + (is64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32); > +} > + > +static inline uint32_t msi_mask_reg(const PCIDevice* dev, bool is64bit) > +{ > + return dev->msi_cap + (is64bit ? PCI_MSI_MASK_64 : PCI_MSI_MASK_32); > +} > + > +static inline uint32_t msi_pending_reg(const PCIDevice* dev, bool is64bit) > +{ > + return dev->msi_cap + (is64bit ? PCI_MSI_PENDING_64 : > PCI_MSI_PENDING_32); > +} > + > +bool msi_enabled(const PCIDevice *dev) > +{ > + return msi_present(dev) && > + msi_control_enabled(pci_get_word(dev->config + > msi_control_reg(dev))); > +} > + > +int msi_init(struct PCIDevice *dev, uint8_t offset, > + uint8_t nr_vector, uint16_t flags) rename nr_vector -> nr_vectors here and elsewhere > +{ > + uint8_t vector_order = ilog2(nr_vector); > + bool is64bit = is_64bit_address(flags); > + uint8_t cap_size; > + int config_offset; maybe add assert on nr_vector: should be assert(!((nr_vector - 1) & nr_vector)); assert(nr_vector > 0); assert(nr_vector <= 32); > + MSI_DEV_PRINTF(dev, > + "init offset: 0x%"PRIx8" vector: %"PRId8 > + " flags 0x%"PRIx16"\n", offset, nr_vector, flags); > + > + flags &= PCI_MSI_FLAGS_MASKBIT | PCI_MSI_FLAGS_64BIT; > + flags |= (vector_order << PCI_MSI_FLAGS_QMASK_SHIFT) & > PCI_MSI_FLAGS_QMASK; > + > + cap_size = msi_cap_sizeof(flags); > + config_offset = pci_add_capability(dev, PCI_CAP_ID_MSI, offset, > cap_size); > + if (config_offset < 0) { > + return config_offset; > + } > + > + dev->msi_cap = config_offset; > + dev->cap_present |= QEMU_PCI_CAP_MSI; > + > + pci_set_word(dev->config + msi_control_reg(dev), flags); > + pci_set_word(dev->wmask + msi_control_reg(dev), > + PCI_MSI_FLAGS_QSIZE | PCI_MSI_FLAGS_ENABLE); > + pci_set_long(dev->wmask + msi_lower_address_reg(dev), > + ~PCI_MSI_ADDRESS_LO_RESERVED_MASK); > + if (is64bit) { > + pci_set_long(dev->wmask + msi_upper_address_reg(dev), 0xffffffff); > + } > + pci_set_word(dev->wmask + msi_data_reg(dev, is64bit), 0xffff); > + > + if (is_mask_bit_support(flags)) { > + pci_set_long(dev->wmask + msi_mask_reg(dev, is64bit), > + (1 << nr_vector) - 1); 1 is signed. I think this should be 1U << nr_vector, otherwise you get signed integer overflow for nr >= 31. > + } > + return config_offset; > +} > + > +void msi_uninit(struct PCIDevice *dev) > +{ > + uint16_t control = pci_get_word(dev->config + msi_control_reg(dev)); > + uint8_t cap_size = msi_cap_sizeof(control); > + pci_del_capability(dev, PCI_CAP_ID_MSIX, cap_size); > + MSI_DEV_PRINTF(dev, "uninit\n"); > +} > + > +void msi_reset(PCIDevice *dev) > +{ > + uint16_t control; > + bool is64bit; > + > + control = pci_get_word(dev->config + msi_control_reg(dev)); > + control &= ~(PCI_MSI_FLAGS_QSIZE | PCI_MSI_FLAGS_ENABLE); > + is64bit = is_64bit_address(control); > + > + pci_set_word(dev->config + msi_control_reg(dev), control); > + pci_set_long(dev->config + msi_lower_address_reg(dev), 0); > + if (is64bit) { > + pci_set_long(dev->config + msi_upper_address_reg(dev), 0); > + } > + pci_set_word(dev->config + msi_data_reg(dev, is64bit), 0); > + if (is_mask_bit_support(control)) { > + pci_set_long(dev->config + msi_mask_reg(dev, is64bit), 0); > + pci_set_long(dev->config + msi_pending_reg(dev, is64bit), 0); > + } > + MSI_DEV_PRINTF(dev, "reset\n"); > +} > + > +static bool msi_is_masked(const PCIDevice *dev, uint8_t vector) > +{ > + uint16_t control = pci_get_word(dev->config + msi_control_reg(dev)); > + uint32_t mask; > + > + if (!is_mask_bit_support(control)) { > + return false; > + } > + > + mask = pci_get_long(dev->config + > + msi_mask_reg(dev, is_64bit_address(control))); > + return msi_test_bit(mask, vector); > +} > + > +static void msi_set_pending(PCIDevice *dev, uint8_t vector) > +{ > + uint16_t control = pci_get_word(dev->config + msi_control_reg(dev)); > + bool is64bit = is_64bit_address(control); > + uint32_t pending; > + > + assert(is_mask_bit_support(control)); > + > + pending = pci_get_long(dev->config + msi_pending_reg(dev, is64bit)); > + msi_set_bit(&pending, vector); > + pci_set_long(dev->config + msi_pending_reg(dev, is64bit), pending); > + MSI_DEV_PRINTF(dev, "pending vector 0x%"PRIx8"\n", vector); > +} > + > +void msi_notify(PCIDevice *dev, uint8_t vector) > +{ > + uint16_t control = pci_get_word(dev->config + msi_control_reg(dev)); > + bool is64bit = is_64bit_address(control); > + uint8_t nr_vector = msi_nr_vector(control); > + uint64_t address; > + uint32_t data; > + > + assert(vector < nr_vector); > + if (msi_is_masked(dev, vector)) { > + msi_set_pending(dev, vector); > + return; > + } > + > + if (is64bit){ > + address = pci_get_quad(dev->config + msi_lower_address_reg(dev)); > + } else { > + address = pci_get_long(dev->config + msi_lower_address_reg(dev)); > + } > + > + /* upper bit 31:16 is zero */ > + data = pci_get_word(dev->config + msi_data_reg(dev, is64bit)); > + if (nr_vector > 1) { > + data &= ~(nr_vector - 1); > + data |= vector; > + } > + > + MSI_DEV_PRINTF(dev, > + "notify vector 0x%"PRIx8 > + " address: 0x%"PRIx64" data: 0x%"PRIx32"\n", > + vector, address, data); > + stl_phys(address, data); > +} > + > +/* call this function after updating configs by pci_default_write_config() */ > +void msi_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len) > +{ > + uint16_t control = pci_get_word(dev->config + msi_control_reg(dev)); > + bool is64bit = is_64bit_address(control); > + uint8_t nr_vector = msi_nr_vector(control); > + > +#ifdef MSI_DEBUG > + uint8_t cap_size = msi_cap_sizeof(control & (PCI_MSI_FLAGS_MASKBIT | > + PCI_MSI_FLAGS_64BIT)); > + if (ranges_overlap(addr, len, dev->msi_cap, cap_size)) { > + MSI_DEV_PRINTF(dev, "addr 0x%"PRIx32" val 0x%"PRIx32" len %d\n", > + addr, val, len); > + MSI_DEV_PRINTF(dev, "ctrl: 0x%"PRIx16" address: 0x%"PRIx32, > + control, > + pci_get_long(dev->config + > msi_lower_address_reg(dev))); > + if (is64bit) { > + fprintf(stderr, " addrss-hi: 0x%"PRIx32, > + pci_get_long(dev->config + msi_upper_address_reg(dev))); > + } > + fprintf(stderr, " data: 0x%"PRIx16, > + pci_get_word(dev->config + msi_data_reg(dev, is64bit))); > + if (is_mask_bit_support(control)) { > + fprintf(stderr, " mask 0x%"PRIx32" pending 0x%"PRIx32, > + pci_get_long(dev->config + msi_mask_reg(dev, is64bit)), > + pci_get_long(dev->config + msi_pending_reg(dev, > is64bit))); > + } > + fprintf(stderr, "\n"); > + } > +#endif > + > + if (!msi_control_enabled(control)) { Should not we clear pending bits here? > + return; > + } We must also clear INTx# interrupts when msi is enabled. > + > + if (!is_mask_bit_support(control)) { > + /* if per vector masking isn't support, > + there is no pending interrupt. */ > + return; > + } > + I think we also need to clear pending bits if # of vectors is reduced. We need to also decide what to do if software tries to enable more vectors than supported. I suggest setting allocated vectors to nr_vectors in this case. > + if (range_covers_byte(addr, len, msi_control_reg(dev)) || /* MSI enable > */ > + ranges_overlap(addr, len, msi_mask_reg(dev, is64bit), 4)) { I think it is better to reduce nesting. Reverse the condition and return if this is not an msi write. > + uint32_t pending = > + pci_get_long(dev->config + msi_pending_reg(dev, is64bit)); > + uint8_t vector; > + > + /* deliver pending interrupts which are unmasked */ > + for (vector = 0; vector < nr_vector; ++vector) { > + if (msi_is_masked(dev, vector) || !msi_test_bit(pending, > vector)) { I am confused. This is called after mask is updated, right? So msi_is_masked means vector was masked, not unmasked? I think the logic is reversed here. > + continue; > + } > + > + msi_clear_bit(&pending, vector); > + pci_set_long(dev->config + msi_pending_reg(dev, is64bit), > pending); > + msi_notify(dev, vector); > + } > + } > +} > + > +uint8_t msi_nr_allocated_vector(const PCIDevice *dev) rename msi_nr_vectors_allocated to match spec. > +{ > + uint16_t control = pci_get_word(dev->config + msi_control_reg(dev)); > + return msi_nr_vector(control); > +} > diff --git a/hw/msi.h b/hw/msi.h > new file mode 100644 > index 0000000..1131c6e > --- /dev/null > +++ b/hw/msi.h > @@ -0,0 +1,41 @@ > +/* > + * msi.h > + * > + * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp> > + * VA Linux Systems Japan K.K. > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, or > + * (at your option) any later version. > + > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + > + * You should have received a copy of the GNU General Public License along > + * with this program; if not, see <http://www.gnu.org/licenses/>. > + */ > + > +#ifndef QEMU_MSI_H > +#define QEMU_MSI_H > + > +#include "qemu-common.h" > +#include "pci.h" > + > +bool msi_enabled(const PCIDevice *dev); > +int msi_init(struct PCIDevice *dev, uint8_t offset, > + uint8_t nr_vector, uint16_t flags); So users need to encode flags? Where do they get them? And to encode, we need registers internal to msi.c or duplicate ... Would not it be better to pass msi_64_bit and msi_per_vector_mask? > +void msi_uninit(struct PCIDevice *dev); > +void msi_reset(PCIDevice *dev); > +void msi_notify(PCIDevice *dev, uint8_t vector); > +void msi_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len); > +uint8_t msi_nr_allocated_vector(const PCIDevice *dev); > + > +static inline bool msi_present(const PCIDevice *dev) > +{ > + return dev->cap_present & QEMU_PCI_CAP_MSI; > +} > + > +#endif /* QEMU_MSI_H */ > diff --git a/hw/pci.h b/hw/pci.h > index 2b4c318..9387a03 100644 > --- a/hw/pci.h > +++ b/hw/pci.h > @@ -115,6 +115,8 @@ enum { > /* multifunction capable device */ > #define QEMU_PCI_CAP_MULTIFUNCTION_BITNR 2 > QEMU_PCI_CAP_MULTIFUNCTION = (1 << QEMU_PCI_CAP_MULTIFUNCTION_BITNR), > + > + QEMU_PCI_CAP_MSI = 0x4, These are internal bits so we do not need to preserve any values. Put this cap right before or right after MSIX please. > }; > > struct PCIDevice { > @@ -168,6 +170,9 @@ struct PCIDevice { > /* Version id needed for VMState */ > int32_t version_id; > > + /* Offset of MSI capability in config space */ > + uint8_t msi_cap; > + > /* Location of option rom */ > char *romfile; > ram_addr_t rom_offset; > -- > 1.7.1.1