On 05/28/2018 05:18 PM, Greg Kurz wrote: > On Fri, 18 May 2018 18:44:05 +0200 > Cédric Le Goater <c...@kaod.org> wrote: > >> The proposed layout of the IRQ number space is organized as follow : >> >> RANGES DEVICES >> >> 0x0000 - 0x0FFF Reserved for future use (IPI = 2) >> 0x1000 - 0x1000 1 EPOW >> 0x1001 - 0x1001 1 HOTPLUG >> 0x1002 - 0x10FF unused >> 0x1100 - 0x11FF 256 VIO devices (1 IRQ each) >> 0x1200 - 0x1283 32 PCI LSI devices (4 IRQs each) >> 0x1284 - 0x13FF unused >> 0x1400 - 0x17FF PCI MSI device 1 (1024 IRQs each) >> 0x1800 - 0x1BFF PCI MSI device 1 > > device 2 and... > >> 0x1c00 - 0x1FFF PCI MSI device 2 > > device 3 ?
ah yes :) > >> >> 0x2000 .... not allocated. Need to increase NR_IRQS >> > > What's NR_IRQS ? The total number of IRQs in the backend. > What's the goal to have several 1k ranges ? to support multiple PHBs with different allocatable MSI ranges, which is not the case today. > So that each one may be affected to a single device ? yes. >> The MSI range is a bit more complex to handle as the IRQS are dynamically >> allocated by the guest OS. In consequence, we use a bitmap allocator >> under the machine for these. >> >> The XICS IRQ number space is increased to 4K, which gives three MSI >> ranges of 1K for the PHBs. The XICS source IRQ numbers still have the >> 4K offset. >> > > Why ? It's the legacy IRQ offset value for the sPAPR sources (2 being reserved for XICS IPIs) and because XIVE will use the range for IPIs : 0x0000 - 0x0FFF Reserved for future use (IPI = 2) So not changing the value serve our purpose. >> Signed-off-by: Cédric Le Goater <c...@kaod.org> >> --- >> include/hw/ppc/spapr.h | 2 + >> include/hw/ppc/spapr_irq.h | 12 +++ >> hw/ppc/spapr.c | 22 +++++ >> hw/ppc/spapr_irq.c | 220 >> ++++++++++++++++++++++++++++++++++++++++++++- >> 4 files changed, 255 insertions(+), 1 deletion(-) >> >> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h >> index 4eb212b16a51..fcc1b1c1451d 100644 >> --- a/include/hw/ppc/spapr.h >> +++ b/include/hw/ppc/spapr.h >> @@ -165,6 +165,8 @@ struct sPAPRMachineState { >> char *kvm_type; >> MemoryHotplugState hotplug_memory; >> >> + int32_t nr_irqs; >> + unsigned long *irq_map; >> const char *icp_type; >> >> bool cmd_line_caps[SPAPR_CAP_NUM]; >> diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h >> index caf4c33d4cec..d1af4c4d11ba 100644 >> --- a/include/hw/ppc/spapr_irq.h >> +++ b/include/hw/ppc/spapr_irq.h >> @@ -22,8 +22,16 @@ >> #define SPAPR_IRQ_PCI_MSI 0x1400 /* 1K IRQs per device covered by >> * a bitmap allocator */ >> >> +typedef struct sPAPRPIrqRange { >> + const char *name; >> + uint32_t offset; >> + uint32_t width; >> + uint32_t max_index; >> +} sPAPRPIrqRange; >> + >> typedef struct sPAPRIrq { >> uint32_t nr_irqs; >> + const sPAPRPIrqRange *ranges; >> >> void (*init)(sPAPRMachineState *spapr, Error **errp); >> int (*alloc)(sPAPRMachineState *spapr, uint32_t range, uint32_t index, >> @@ -35,6 +43,10 @@ typedef struct sPAPRIrq { >> } sPAPRIrq; >> >> extern sPAPRIrq spapr_irq_default; >> +extern sPAPRIrq spapr_irq_2_12; >> + >> +const sPAPRPIrqRange *spapr_irq_get_range(sPAPRMachineState *spapr, >> + uint32_t offset); >> >> int spapr_irq_alloc(sPAPRMachineState *spapr, uint32_t range, uint32_t >> index, >> Error **errp); >> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c >> index 09f095d73eae..f2ebd6f20414 100644 >> --- a/hw/ppc/spapr.c >> +++ b/hw/ppc/spapr.c >> @@ -1848,6 +1848,24 @@ static const VMStateDescription >> vmstate_spapr_patb_entry = { >> }, >> }; >> >> +static bool spapr_irq_map_needed(void *opaque) >> +{ >> + sPAPRMachineState *spapr = opaque; >> + >> + return spapr->irq_map && !bitmap_empty(spapr->irq_map, spapr->nr_irqs); >> +} >> + >> +static const VMStateDescription vmstate_spapr_irq_map = { >> + .name = "spapr_irq_map", >> + .version_id = 1, >> + .minimum_version_id = 1, >> + .needed = spapr_irq_map_needed, >> + .fields = (VMStateField[]) { >> + VMSTATE_BITMAP(irq_map, sPAPRMachineState, 0, nr_irqs), >> + VMSTATE_END_OF_LIST() >> + }, >> +}; >> + >> static const VMStateDescription vmstate_spapr = { >> .name = "spapr", >> .version_id = 3, >> @@ -1875,6 +1893,7 @@ static const VMStateDescription vmstate_spapr = { >> &vmstate_spapr_cap_cfpc, >> &vmstate_spapr_cap_sbbc, >> &vmstate_spapr_cap_ibs, >> + &vmstate_spapr_irq_map, >> NULL >> } >> }; >> @@ -3916,7 +3935,10 @@ static void >> spapr_machine_2_12_instance_options(MachineState *machine) >> >> static void spapr_machine_2_12_class_options(MachineClass *mc) >> { >> + sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); >> + >> spapr_machine_2_13_class_options(mc); >> + smc->irq = &spapr_irq_2_12; >> SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_12); >> } >> >> diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c >> index ff6cb1aafd25..bfffb1467336 100644 >> --- a/hw/ppc/spapr_irq.c >> +++ b/hw/ppc/spapr_irq.c >> @@ -192,7 +192,7 @@ static qemu_irq spapr_qirq_2_12(sPAPRMachineState >> *spapr, int irq) >> return NULL; >> } >> >> -sPAPRIrq spapr_irq_default = { >> +sPAPRIrq spapr_irq_2_12 = { >> .nr_irqs = XICS_IRQS_SPAPR, >> .init = spapr_irq_init_2_12, >> .alloc = spapr_irq_alloc_2_12, >> @@ -201,6 +201,224 @@ sPAPRIrq spapr_irq_default = { >> .qirq = spapr_qirq_2_12, >> }; >> >> +/* >> + * IRQ range helpers for new IRQ backends >> + */ >> +const sPAPRPIrqRange *spapr_irq_get_range(sPAPRMachineState *spapr, >> + uint32_t offset) >> +{ >> + sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); >> + const sPAPRPIrqRange *range = smc->irq->ranges; >> + >> + if (range) { >> + while (range->name && range->offset != offset) { >> + range++; >> + } >> + >> + if (!range->name) { >> + return NULL; >> + } >> + } >> + >> + return range; >> +} >> + >> +static int spapr_irq_get_base(sPAPRMachineState *spapr, uint32_t offset, >> + uint32_t index, Error **errp) >> +{ >> + const sPAPRPIrqRange *range = spapr_irq_get_range(spapr, offset); >> + >> + if (!range) { >> + error_setg(errp, "Invalid IRQ range %x", offset); >> + return -1; >> + } >> + >> + if (index > range->max_index) { >> + error_setg(errp, "Index %d too big for IRQ range %s", index, >> + range->name); >> + return -1; >> + } >> + >> + return range->offset + index * range->width; >> +} >> + >> +static int spapr_irq_range_alloc(sPAPRMachineState *spapr, >> + uint32_t range, uint32_t index, Error >> **errp) >> +{ >> + return spapr_irq_get_base(spapr, range, index, errp); >> +} >> + >> +static int spapr_irq_range_alloc_msi(sPAPRMachineState *spapr, uint32_t >> range, >> + uint32_t index, int num, bool align, >> + Error **errp) >> +{ >> + int msi_base = spapr_irq_get_base(spapr, SPAPR_IRQ_PCI_MSI, index, >> errp); >> + int irq; >> + >> + /* >> + * The 'align_mask' parameter of bitmap_find_next_zero_area() >> + * should be one less than a power of 2; 0 means no >> + * alignment. Adapt the 'align' value of the former allocator >> + * to fit the requirements of bitmap_find_next_zero_area() >> + */ >> + align -= 1; >> + >> + irq = bitmap_find_next_zero_area(spapr->irq_map, spapr->nr_irqs, >> + msi_base, num, align); >> + if (irq == spapr->nr_irqs) { >> + error_setg(errp, "can't find a free MSI %d-IRQ block", num); >> + return -1; >> + } >> + >> + bitmap_set(spapr->irq_map, irq, num); >> + return irq; >> +} >> + >> +static void spapr_irq_range_free_msi(sPAPRMachineState *spapr, int irq, int >> num) >> +{ >> + bitmap_clear(spapr->irq_map, irq, num); >> +} >> + >> +/* >> + * New XICS IRQ backend >> + * >> + * using the IRQ ranges and device indexes >> + */ >> +static void spapr_irq_init_2_13(sPAPRMachineState *spapr, Error **errp) > > FYI, next QEMU version will likely be 3.0: > > https://lists.gnu.org/archive/html/qemu-devel/2018-05/msg04879.html ah yes. > Also, it is useful to add version information to the name in case of > existing machine types. I guess _default or _new is a better choice > here. ok. >> +{ >> + MachineState *machine = MACHINE(spapr); >> + sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine); >> + >> + spapr_irq_init_2_12(spapr, errp); > > Hmm... it's a bit confusing. I'd rather have each init function to call a > common helper. well, I followed the machine class pattern. > >> + >> + /* >> + * Initialize the MSI IRQ allocator. The full XICS IRQ number >> + * space is covered even though the bottow IRQ numbers below the >> + * XICS source number offset (4K) are unused and that only MSI IRQ >> + * numbers can be allocated. We does waste some bytes but it makes >> + * things easier. We will optimize later. >> + */ >> + spapr->nr_irqs = smc->irq->nr_irqs + spapr->ics->offset; >> + spapr->irq_map = bitmap_new(spapr->nr_irqs); >> +} >> + >> +static int spapr_irq_alloc_2_13(sPAPRMachineState *spapr, >> + uint32_t range, uint32_t index, Error >> **errp) >> +{ >> + ICSState *ics = spapr->ics; >> + bool lsi = (range == SPAPR_IRQ_PCI_LSI); >> + int irq = spapr_irq_range_alloc(spapr, range, index, errp); >> + >> + if (irq < 0) { >> + return irq; >> + } >> + >> + /* Update the IRQState in the XICS source */ >> + ics_set_irq_type(ics, irq - ics->offset, lsi); >> + >> + return irq; >> +} >> + >> +static int spapr_irq_alloc_block_2_13(sPAPRMachineState *spapr, uint32_t >> range, >> + uint32_t index, int num, bool align, >> + Error **errp) >> +{ >> + ICSState *ics = spapr->ics; >> + bool lsi = (range == SPAPR_IRQ_PCI_LSI); >> + int irq; >> + int i; >> + >> + if (range == SPAPR_IRQ_PCI_MSI) { >> + irq = spapr_irq_range_alloc_msi(spapr, range, index, num, align, >> errp); >> + } else { >> + /* TODO: check IRQ range width vs. required block size */ >> + irq = spapr_irq_range_alloc(spapr, range, index, errp); >> + } >> + >> + if (irq < 0) { >> + return irq; >> + } >> + >> + /* Update the IRQState in the XICS source */ >> + for (i = irq; i < irq + num; ++i) { >> + ics_set_irq_type(ics, i - ics->offset, lsi); >> + } >> + >> + return irq; >> +} >> + >> +static void spapr_irq_free_2_13(sPAPRMachineState *spapr, int irq, int num, >> + Error **errp) >> +{ >> + ICSState *ics = spapr->ics; >> + int msi_base = spapr_irq_get_base(spapr, SPAPR_IRQ_PCI_MSI, 0, NULL); >> + int i; >> + >> + /* Any IRQ below MSI base should not be freed */ >> + if (irq < msi_base) { >> + error_setg(errp, "IRQs %x-%x can not be freed", irq, irq + num); >> + return; >> + } >> + >> + spapr_irq_range_free_msi(spapr, irq, num); >> + >> + /* Clear out the IRQState from the XICS source */ >> + for (i = irq; i < irq + num; ++i) { >> + if (ics_valid_irq(ics, i)) { >> + uint32_t srcno = i - ics->offset; >> + memset(&ics->irqs[srcno], 0, sizeof(ICSIRQState)); >> + } >> + } >> +} >> + >> +static qemu_irq spapr_qirq_2_13(sPAPRMachineState *spapr, int irq) >> +{ >> + return spapr_qirq_2_12(spapr, irq); >> +} >> + >> +/* >> + * RANGES DEVICES >> + * >> + * 0x0000 - 0x0FFF Reserved (IPI = 2) >> + * >> + * 0x1000 - 0x1000 1 EPOW >> + * 0x1001 - 0x1001 1 HOTPLUG >> + * 0x1002 - 0x10FF unused >> + * 0x1100 - 0x11FF 256 VIO devices (1 IRQ each) >> + * 0x1200 - 0x1283 32 PCI LSI devices (4 IRQs each) >> + * 0x1284 - 0x13FF unused >> + * 0x1400 - 0x17FF PCI MSI device 1 (1024 IRQs each) >> + * 0x1800 - 0x1BFF PCI MSI device 1 >> + * 0x1c00 - 0x1FFF PCI MSI device 2 >> + * 0x2000 .... not allocated. Need to increase NR_IRQS >> + */ >> +static const sPAPRPIrqRange spapr_irq_ranges_2_13[] = { >> + /* "IPI" Not used */ >> + { "EPOW", SPAPR_IRQ_EPOW, 1, 0 }, >> + { "HOTPLUG", SPAPR_IRQ_HOTPLUG, 1, 0 }, >> + { "VIO", SPAPR_IRQ_VIO, 1, 0xFF }, >> + { "PCI LSI", SPAPR_IRQ_PCI_LSI, PCI_NUM_PINS, 0x1F }, >> + { "PCI MSI", SPAPR_IRQ_PCI_MSI, 0x400, 0x1F }, >> + { NULL, 0, 0, 0 }, >> +}; >> + >> +/* >> + * Increase the XICS IRQ number space to 4K. It gives us 3 possible >> + * MSI ranges for the PHBs. The XICS Source IRQ numbers still have the >> + * 4K offset. >> + */ >> +#define SPAPR_NR_IRQS_2_13 0x1000 >> + >> +sPAPRIrq spapr_irq_default = { >> + .nr_irqs = SPAPR_NR_IRQS_2_13, > > Since there's only one user for this define, why not open-coding the value > here ? yes. I agree. Thanks, C. >> + .init = spapr_irq_init_2_13, >> + .ranges = spapr_irq_ranges_2_13, >> + .alloc = spapr_irq_alloc_2_13, >> + .alloc_block = spapr_irq_alloc_block_2_13, >> + .free = spapr_irq_free_2_13, >> + .qirq = spapr_qirq_2_13, >> +}; >> + >> int spapr_irq_alloc(sPAPRMachineState *spapr, uint32_t range, uint32_t >> index, >> Error **errp) >> { >