On 11/08/2017 09:54 AM, Greg Kurz wrote: > On Sun, 29 Oct 2017 19:12:10 +0100 > Cédric Le Goater <c...@kaod.org> wrote: > >> Currently, the ICSState 'ics' object of the sPAPR machine acts as the >> global interrupt source handler and also as the IRQ number allocator >> for the machine. Some IRQ numbers are allocated very early in the >> machine initialization sequence to populate the device tree, and this >> is a problem to introduce the new POWER XIVE interrupt model, as it >> needs to share the IRQ numbers with the older model. >> >> To prepare ground for XIVE, here is a proposal adding a set of new >> XICSFabric operations to let the machine handle directly the IRQ >> number allocation and to decorrelate the allocation from the interrupt >> source object : >> >> bool (*irq_test)(XICSFabric *xi, int irq); >> int (*irq_alloc_block)(XICSFabric *xi, int count, int align); >> void (*irq_free_block)(XICSFabric *xi, int irq, int num); >> >> In these prototypes, the 'irq' parameter refers to a number in the >> global IRQ number space. Indexes for arrays storing different state >> informations on the interrupts, like the ICSIRQState, are named >> 'srcno'. >> >> On the sPAPR platform, these IRQ operations are simply backed by a >> bitmap 'irq_map' in the machine. >> >> 'irq_base' is a base number in sync with the ICSState 'offset'. It >> lets us allocate only the subset of the IRQ numbers used on the sPAPR >> platform but we could also choose to waste some extra bytes (512) and >> allocate the whole number space. 'nr_irqs' is the total number of >> IRQs, required to manipulate the bitmap. >> >> Signed-off-by: Cédric Le Goater <c...@kaod.org> >> --- > > Hi, > > The idea makes sense but this patch brings too many changes IMHO.
yes. I agree. The next version splits a bit more the changes and introduces the XICSFabric ops before the bitmap. >> hw/intc/xics.c | 3 ++- >> hw/intc/xics_spapr.c | 57 ++++++++++++---------------------------------- >> hw/ppc/spapr.c | 62 >> +++++++++++++++++++++++++++++++++++++++++++++++++- >> include/hw/ppc/spapr.h | 4 ++++ >> include/hw/ppc/xics.h | 4 ++++ >> 5 files changed, 85 insertions(+), 45 deletions(-) >> >> diff --git a/hw/intc/xics.c b/hw/intc/xics.c >> index cc9816e7f204..2c4899f278e2 100644 >> --- a/hw/intc/xics.c >> +++ b/hw/intc/xics.c >> @@ -53,6 +53,7 @@ void icp_pic_print_info(ICPState *icp, Monitor *mon) >> void ics_pic_print_info(ICSState *ics, Monitor *mon) >> { >> uint32_t i; >> + XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(ics->xics); >> >> monitor_printf(mon, "ICS %4x..%4x %p\n", >> ics->offset, ics->offset + ics->nr_irqs - 1, ics); >> @@ -64,7 +65,7 @@ void ics_pic_print_info(ICSState *ics, Monitor *mon) >> for (i = 0; i < ics->nr_irqs; i++) { >> ICSIRQState *irq = ics->irqs + i; >> >> - if (!(irq->flags & XICS_FLAGS_IRQ_MASK)) { >> + if (!xic->irq_test(ics->xics, i + ics->offset)) { >> continue; >> } >> monitor_printf(mon, " %4x %s %02x %02x\n", >> diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c >> index d98ea8b13068..f2e20bca5b2e 100644 >> --- a/hw/intc/xics_spapr.c >> +++ b/hw/intc/xics_spapr.c >> @@ -245,50 +245,23 @@ void xics_spapr_init(sPAPRMachineState *spapr) >> spapr_register_hypercall(H_IPOLL, h_ipoll); >> } >> >> -#define ICS_IRQ_FREE(ics, srcno) \ >> - (!((ics)->irqs[(srcno)].flags & (XICS_FLAGS_IRQ_MASK))) >> - >> -static int ics_find_free_block(ICSState *ics, int num, int alignnum) >> -{ >> - int first, i; >> - >> - for (first = 0; first < ics->nr_irqs; first += alignnum) { >> - if (num > (ics->nr_irqs - first)) { >> - return -1; >> - } >> - for (i = first; i < first + num; ++i) { >> - if (!ICS_IRQ_FREE(ics, i)) { >> - break; >> - } >> - } >> - if (i == (first + num)) { >> - return first; >> - } >> - } >> - >> - return -1; >> -} >> - >> int spapr_ics_alloc(ICSState *ics, int irq_hint, bool lsi, Error **errp) >> { >> int irq; >> + XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(ics->xics); >> >> - if (!ics) { >> - return -1; >> - } > > If spapr_ics_alloc() is never called with ics == NULL, then you > should assert. Also, this looks like this deserves a separate > patch. yes. >> if (irq_hint) { >> - if (!ICS_IRQ_FREE(ics, irq_hint - ics->offset)) { >> + if (xic->irq_test(ics->xics, irq_hint)) { >> error_setg(errp, "can't allocate IRQ %d: already in use", >> irq_hint); >> return -1; >> } >> irq = irq_hint; >> } else { >> - irq = ics_find_free_block(ics, 1, 1); >> + irq = xic->irq_alloc_block(ics->xics, 1, 0); >> if (irq < 0) { >> error_setg(errp, "can't allocate IRQ: no IRQ left"); >> return -1; >> } >> - irq += ics->offset; >> } >> >> ics_set_irq_type(ics, irq - ics->offset, lsi); >> @@ -305,10 +278,8 @@ int spapr_ics_alloc_block(ICSState *ics, int num, bool >> lsi, >> bool align, Error **errp) >> { >> int i, first = -1; >> + XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(ics->xics); >> >> - if (!ics) { >> - return -1; >> - } >> > > Ditto. > >> /* >> * MSIMesage::data is used for storing VIRQ so >> @@ -320,9 +291,9 @@ int spapr_ics_alloc_block(ICSState *ics, int num, bool >> lsi, >> if (align) { >> assert((num == 1) || (num == 2) || (num == 4) || >> (num == 8) || (num == 16) || (num == 32)); >> - first = ics_find_free_block(ics, num, num); >> + first = xic->irq_alloc_block(ics->xics, num, num); >> } else { >> - first = ics_find_free_block(ics, num, 1); >> + first = xic->irq_alloc_block(ics->xics, num, 0); >> } >> if (first < 0) { >> error_setg(errp, "can't find a free %d-IRQ block", num); >> @@ -331,25 +302,25 @@ int spapr_ics_alloc_block(ICSState *ics, int num, bool >> lsi, >> >> if (first >= 0) { > > It looks like this check isn't needed since we return in the first < 0 case. > Maybe you can fix this in a preliminary patch ? done. >> for (i = first; i < first + num; ++i) { >> - ics_set_irq_type(ics, i, lsi); >> + ics_set_irq_type(ics, i - ics->offset, lsi); >> } >> } >> - first += ics->offset; >> >> trace_xics_alloc_block(first, num, lsi, align); >> >> return first; >> } >> >> -static void ics_free(ICSState *ics, int srcno, int num) >> +static void ics_free(ICSState *ics, int irq, int num) >> { >> int i; >> + XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(ics->xics); >> >> - for (i = srcno; i < srcno + num; ++i) { >> - if (ICS_IRQ_FREE(ics, i)) { >> - trace_xics_ics_free_warn(0, i + ics->offset); >> + for (i = irq; i < irq + num; ++i) { >> + if (xic->irq_test(ics->xics, i)) { >> + trace_xics_ics_free_warn(0, i); >> } >> - memset(&ics->irqs[i], 0, sizeof(ICSIRQState)); >> + xic->irq_free_block(ics->xics, i, 1); >> } >> } >> >> @@ -357,7 +328,7 @@ void spapr_ics_free(ICSState *ics, int irq, int num) >> { >> if (ics_valid_irq(ics, irq)) { >> trace_xics_ics_free(0, irq, num); >> - ics_free(ics, irq - ics->offset, num); >> + ics_free(ics, irq, num); >> } >> } >> >> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c >> index d682f013d422..88da4bad2328 100644 >> --- a/hw/ppc/spapr.c >> +++ b/hw/ppc/spapr.c >> @@ -1681,6 +1681,24 @@ static const VMStateDescription >> vmstate_spapr_patb_entry = { >> }, >> }; >> >> +static bool spapr_irq_map_needed(void *opaque) >> +{ >> + sPAPRMachineState *spapr = opaque; >> + >> + return find_first_bit(spapr->irq_map, spapr->nr_irqs) < spapr->nr_irqs; >> +} > > This will allow migration from older QEMU. Maybe you can add a machine > property > so that the subsection is only generated for newer pseries, and you'll support > migration to older QEMU (see details at the end of === Subsections === in > docs/devel/migration.txt). I have found a better way to save state. We will discuss it in the next round. >> + >> +static const VMStateDescription vmstate_spapr_irq_map = { >> + .name = "spapr_irq_map", >> + .version_id = 0, >> + .minimum_version_id = 0, >> + .needed = spapr_irq_map_needed, >> + .fields = (VMStateField[]) { >> + VMSTATE_BITMAP(irq_map, sPAPRMachineState, 0, nr_irqs), >> + VMSTATE_END_OF_LIST() >> + }, >> +}; >> + >> static const VMStateDescription vmstate_spapr = { >> .name = "spapr", >> .version_id = 3, >> @@ -1700,6 +1718,7 @@ static const VMStateDescription vmstate_spapr = { >> &vmstate_spapr_ov5_cas, >> &vmstate_spapr_patb_entry, >> &vmstate_spapr_pending_events, >> + &vmstate_spapr_irq_map, >> NULL >> } >> }; >> @@ -2337,8 +2356,13 @@ static void ppc_spapr_init(MachineState *machine) >> /* Setup a load limit for the ramdisk leaving room for SLOF and FDT */ >> load_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR) - FW_OVERHEAD; >> >> + /* Initialize the IRQ allocator */ >> + spapr->nr_irqs = XICS_IRQS_SPAPR; >> + spapr->irq_map = bitmap_new(spapr->nr_irqs); >> + spapr->irq_base = XICS_IRQ_BASE; >> + >> /* Set up Interrupt Controller before we create the VCPUs */ >> - xics_system_init(machine, XICS_IRQS_SPAPR, &error_fatal); >> + xics_system_init(machine, spapr->nr_irqs, &error_fatal); >> >> /* Set up containers for ibm,client-architecture-support negotiated >> options >> */ >> @@ -3536,6 +3560,38 @@ static ICPState *spapr_icp_get(XICSFabric *xi, int >> vcpu_id) >> return cpu ? ICP(cpu->intc) : NULL; >> } >> >> +static bool spapr_irq_test(XICSFabric *xi, int irq) >> +{ >> + sPAPRMachineState *spapr = SPAPR_MACHINE(xi); >> + int srcno = irq - spapr->irq_base; >> + >> + return test_bit(srcno, spapr->irq_map); >> +} >> + >> +static int spapr_irq_alloc_block(XICSFabric *xi, int count, int align) >> +{ >> + sPAPRMachineState *spapr = SPAPR_MACHINE(xi); >> + int start = 0; >> + int srcno; >> + >> + srcno = bitmap_find_next_zero_area(spapr->irq_map, spapr->nr_irqs, >> start, >> + count, align); >> + if (srcno == spapr->nr_irqs) { >> + return -1; >> + } >> + >> + bitmap_set(spapr->irq_map, srcno, count); >> + return srcno + spapr->irq_base; >> +} >> + >> +static void spapr_irq_free_block(XICSFabric *xi, int irq, int num) >> +{ >> + sPAPRMachineState *spapr = SPAPR_MACHINE(xi); >> + int srcno = irq - spapr->irq_base; >> + >> + bitmap_clear(spapr->irq_map, srcno, num); >> +} >> + >> static void spapr_pic_print_info(InterruptStatsProvider *obj, >> Monitor *mon) >> { >> @@ -3630,6 +3686,10 @@ static void spapr_machine_class_init(ObjectClass *oc, >> void *data) >> xic->ics_get = spapr_ics_get; >> xic->ics_resend = spapr_ics_resend; >> xic->icp_get = spapr_icp_get; >> + xic->irq_test = spapr_irq_test; >> + xic->irq_alloc_block = spapr_irq_alloc_block; >> + xic->irq_free_block = spapr_irq_free_block; >> + >> ispc->print_info = spapr_pic_print_info; >> /* Force NUMA node memory size to be a multiple of >> * SPAPR_MEMORY_BLOCK_SIZE (256M) since that's the granularity >> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h >> index 9d21ca9bde3a..b962bfe09bb5 100644 >> --- a/include/hw/ppc/spapr.h >> +++ b/include/hw/ppc/spapr.h >> @@ -7,6 +7,7 @@ >> #include "hw/ppc/spapr_drc.h" >> #include "hw/mem/pc-dimm.h" >> #include "hw/ppc/spapr_ovec.h" >> +#include "qemu/bitmap.h" >> >> struct VIOsPAPRBus; >> struct sPAPRPHBState; >> @@ -78,6 +79,9 @@ struct sPAPRMachineState { >> struct VIOsPAPRBus *vio_bus; >> QLIST_HEAD(, sPAPRPHBState) phbs; >> struct sPAPRNVRAM *nvram; >> + int32_t nr_irqs; >> + unsigned long *irq_map; >> + uint32_t irq_base; >> ICSState *ics; >> sPAPRRTCState rtc; >> >> diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h >> index 28d248abad61..30e7f2e0a7dd 100644 >> --- a/include/hw/ppc/xics.h >> +++ b/include/hw/ppc/xics.h >> @@ -175,6 +175,10 @@ typedef struct XICSFabricClass { >> ICSState *(*ics_get)(XICSFabric *xi, int irq); >> void (*ics_resend)(XICSFabric *xi); >> ICPState *(*icp_get)(XICSFabric *xi, int server); >> + /* IRQ allocator helpers */ >> + bool (*irq_test)(XICSFabric *xi, int irq); >> + int (*irq_alloc_block)(XICSFabric *xi, int count, int align); >> + void (*irq_free_block)(XICSFabric *xi, int irq, int num); > > API looks good to me. I suggest you introduce it in a dedicated patch, and > change the allocator implementation in another patch. yep. Thanks C. >> } XICSFabricClass; >> >> #define XICS_IRQS_SPAPR 1024 >