On Mon, Jul 24, 2017 at 02:52:29PM +0200, Cédric Le Goater wrote: > On 07/19/2017 05:24 AM, David Gibson wrote: > > On Wed, Jul 05, 2017 at 07:13:18PM +0200, Cédric Le Goater wrote: > >> The XIVE interrupt controller of the POWER9 uses a set of tables to > >> redirect exception from event sources to CPU threads. Among which we > >> choose to model : > >> > >> - the State Bit Entries (SBE), also known as Event State Buffer > >> (ESB). This is a two bit state machine for each event source which > >> is used to trigger events. The bits are named "P" (pending) and "Q" > >> (queued) and can be controlled by MMIO. > >> > >> - the Interrupt Virtualization Entry (IVE) table, also known as Event > >> Assignment Structure (EAS). This table is indexed by the IRQ number > >> and is looked up to find the Event Queue associated with a > >> triggered event. > >> > >> - the Event Queue Descriptor (EQD) table, also known as Event > >> Notification Descriptor (END). The EQD contains fields that specify > >> the Event Queue on which event data is posted (and later pulled by > >> the OS) and also a target (or VPD) to notify. > >> > >> An additional table was not modeled but we might need to to support > >> the H_INT_SET_OS_REPORTING_LINE hcall: > >> > >> - the Virtual Processor Descriptor (VPD) table, also known as > >> Notification Virtual Target (NVT). > >> > >> The XIVE object is expanded with the tables described above. The size > >> of each table depends on the number of provisioned IRQ and the maximum > >> number of CPUs in the system. The indexing is very basic and might > >> need to be improved for the EQs. > >> > >> Signed-off-by: Cédric Le Goater <c...@kaod.org> > >> --- > >> hw/intc/xive-internal.h | 95 > >> +++++++++++++++++++++++++++++++++++++++++++++++++ > >> hw/intc/xive.c | 72 +++++++++++++++++++++++++++++++++++++ > >> 2 files changed, 167 insertions(+) > >> > >> diff --git a/hw/intc/xive-internal.h b/hw/intc/xive-internal.h > >> index 155c2dcd6066..8e755aa88a14 100644 > >> --- a/hw/intc/xive-internal.h > >> +++ b/hw/intc/xive-internal.h > >> @@ -11,6 +11,89 @@ > >> > >> #include <hw/sysbus.h> > >> > >> +/* Utilities to manipulate these (originaly from OPAL) */ > >> +#define MASK_TO_LSH(m) (__builtin_ffsl(m) - 1) > >> +#define GETFIELD(m, v) (((v) & (m)) >> MASK_TO_LSH(m)) > >> +#define SETFIELD(m, v, val) \ > >> + (((v) & ~(m)) | ((((typeof(v))(val)) << MASK_TO_LSH(m)) & (m))) > >> + > >> +#define PPC_BIT(bit) (0x8000000000000000UL >> (bit)) > >> +#define PPC_BIT32(bit) (0x80000000UL >> (bit)) > >> +#define PPC_BIT8(bit) (0x80UL >> (bit)) > >> +#define PPC_BITMASK(bs, be) ((PPC_BIT(bs) - PPC_BIT(be)) | > >> PPC_BIT(bs)) > >> +#define PPC_BITMASK32(bs, be) ((PPC_BIT32(bs) - PPC_BIT32(be)) | \ > >> + PPC_BIT32(bs)) > >> + > >> +/* IVE/EAS > >> + * > >> + * One per interrupt source. Targets that interrupt to a given EQ > >> + * and provides the corresponding logical interrupt number (EQ data) > >> + * > >> + * We also map this structure to the escalation descriptor inside > >> + * an EQ, though in that case the valid and masked bits are not used. > >> + */ > >> +typedef struct XiveIVE { > >> + /* Use a single 64-bit definition to make it easier to > >> + * perform atomic updates > >> + */ > >> + uint64_t w; > >> +#define IVE_VALID PPC_BIT(0) > >> +#define IVE_EQ_BLOCK PPC_BITMASK(4, 7) /* Destination EQ block# > >> */ > >> +#define IVE_EQ_INDEX PPC_BITMASK(8, 31) /* Destination EQ index > >> */ > >> +#define IVE_MASKED PPC_BIT(32) /* Masked */ > >> +#define IVE_EQ_DATA PPC_BITMASK(33, 63) /* Data written to the > >> EQ */ > >> +} XiveIVE; > >> + > >> +/* EQ */ > >> +typedef struct XiveEQ { > >> + uint32_t w0; > >> +#define EQ_W0_VALID PPC_BIT32(0) > >> +#define EQ_W0_ENQUEUE PPC_BIT32(1) > >> +#define EQ_W0_UCOND_NOTIFY PPC_BIT32(2) > >> +#define EQ_W0_BACKLOG PPC_BIT32(3) > >> +#define EQ_W0_PRECL_ESC_CTL PPC_BIT32(4) > >> +#define EQ_W0_ESCALATE_CTL PPC_BIT32(5) > >> +#define EQ_W0_END_OF_INTR PPC_BIT32(6) > >> +#define EQ_W0_QSIZE PPC_BITMASK32(12, 15) > >> +#define EQ_W0_SW0 PPC_BIT32(16) > >> +#define EQ_W0_FIRMWARE EQ_W0_SW0 /* Owned by FW */ > >> +#define EQ_QSIZE_4K 0 > >> +#define EQ_QSIZE_64K 4 > >> +#define EQ_W0_HWDEP PPC_BITMASK32(24, 31) > >> + uint32_t w1; > >> +#define EQ_W1_ESn PPC_BITMASK32(0, 1) > >> +#define EQ_W1_ESn_P PPC_BIT32(0) > >> +#define EQ_W1_ESn_Q PPC_BIT32(1) > >> +#define EQ_W1_ESe PPC_BITMASK32(2, 3) > >> +#define EQ_W1_ESe_P PPC_BIT32(2) > >> +#define EQ_W1_ESe_Q PPC_BIT32(3) > >> +#define EQ_W1_GENERATION PPC_BIT32(9) > >> +#define EQ_W1_PAGE_OFF PPC_BITMASK32(10, 31) > >> + uint32_t w2; > >> +#define EQ_W2_MIGRATION_REG PPC_BITMASK32(0, 3) > >> +#define EQ_W2_OP_DESC_HI PPC_BITMASK32(4, 31) > >> + uint32_t w3; > >> +#define EQ_W3_OP_DESC_LO PPC_BITMASK32(0, 31) > >> + uint32_t w4; > >> +#define EQ_W4_ESC_EQ_BLOCK PPC_BITMASK32(4, 7) > >> +#define EQ_W4_ESC_EQ_INDEX PPC_BITMASK32(8, 31) > >> + uint32_t w5; > >> +#define EQ_W5_ESC_EQ_DATA PPC_BITMASK32(1, 31) > >> + uint32_t w6; > >> +#define EQ_W6_FORMAT_BIT PPC_BIT32(8) > >> +#define EQ_W6_NVT_BLOCK PPC_BITMASK32(9, 12) > >> +#define EQ_W6_NVT_INDEX PPC_BITMASK32(13, 31) > >> + uint32_t w7; > >> +#define EQ_W7_F0_IGNORE PPC_BIT32(0) > >> +#define EQ_W7_F0_BLK_GROUPING PPC_BIT32(1) > >> +#define EQ_W7_F0_PRIORITY PPC_BITMASK32(8, 15) > >> +#define EQ_W7_F1_WAKEZ PPC_BIT32(0) > >> +#define EQ_W7_F1_LOG_SERVER_ID PPC_BITMASK32(1, 31) > >> +} XiveEQ; > >> + > >> +#define XIVE_EQ_PRIORITY_COUNT 8 > >> +#define XIVE_PRIORITY_MAX (XIVE_EQ_PRIORITY_COUNT - 1) > >> + > >> struct XIVE { > >> SysBusDevice parent; > >> > >> @@ -23,6 +106,18 @@ struct XIVE { > >> uint32_t int_max; /* Max index */ > >> uint32_t int_hw_bot; /* Bottom index of HW IRQ allocator */ > >> uint32_t int_ipi_top; /* Highest IPI index handed out so far + > >> 1 */ > >> + > >> + /* XIVE internal tables */ > >> + void *sbe; > >> + XiveIVE *ivt; > >> + XiveEQ *eqdt; > >> }; > >> > >> +void xive_reset(void *dev); > >> +XiveIVE *xive_get_ive(XIVE *x, uint32_t isn); > >> +XiveEQ *xive_get_eq(XIVE *x, uint32_t idx); > >> + > >> +bool xive_eq_for_target(XIVE *x, uint32_t target, uint8_t prio, > >> + uint32_t *out_eq_idx); > >> + > >> #endif /* _INTC_XIVE_INTERNAL_H */ > >> diff --git a/hw/intc/xive.c b/hw/intc/xive.c > >> index 5b4ea915d87c..5b14d8155317 100644 > >> --- a/hw/intc/xive.c > >> +++ b/hw/intc/xive.c > >> @@ -35,6 +35,27 @@ > >> */ > >> #define MAX_HW_IRQS_ENTRIES (8 * 1024) > >> > >> + > >> +void xive_reset(void *dev) > >> +{ > >> + XIVE *x = XIVE(dev); > >> + int i; > >> + > >> + /* SBEs are initialized to 0b01 which corresponds to "ints off" */ > >> + memset(x->sbe, 0x55, x->int_count / 4); > > > > I think strictly this should be a DIV_ROUND_UP to handle the case of > > int_count not a multiple of 4. > > ok. > > >> + > >> + /* Clear and mask all valid IVEs */ > >> + for (i = x->int_base; i < x->int_max; i++) { > >> + XiveIVE *ive = &x->ivt[i]; > >> + if (ive->w & IVE_VALID) { > >> + ive->w = IVE_VALID | IVE_MASKED; > >> + } > >> + } > >> + > >> + /* clear all EQs */ > >> + memset(x->eqdt, 0, x->nr_targets * XIVE_EQ_PRIORITY_COUNT * > >> sizeof(XiveEQ)); > >> +} > >> + > >> static void xive_init(Object *obj) > >> { > >> ; > >> @@ -62,6 +83,19 @@ static void xive_realize(DeviceState *dev, Error **errp) > >> if (x->int_ipi_top < 0x10) { > >> x->int_ipi_top = 0x10; > >> } > >> + > >> + /* Allocate SBEs (State Bit Entry). 2 bits, so 4 entries per byte */ > >> + x->sbe = g_malloc0(x->int_count / 4); > > > > And here as well. > > yes. > > >> + > >> + /* Allocate the IVT (Interrupt Virtualization Table) */ > >> + x->ivt = g_malloc0(x->int_count * sizeof(XiveIVE)); > >> + > >> + /* Allocate the EQDT (Event Queue Descriptor Table), 8 priorities > >> + * for each thread in the system */ > >> + x->eqdt = g_malloc0(x->nr_targets * XIVE_EQ_PRIORITY_COUNT * > >> + sizeof(XiveEQ)); > >> + > >> + qemu_register_reset(xive_reset, dev); > >> } > >> > >> static Property xive_properties[] = { > >> @@ -92,3 +126,41 @@ static void xive_register_types(void) > >> } > >> > >> type_init(xive_register_types) > >> + > >> +XiveIVE *xive_get_ive(XIVE *x, uint32_t lisn) > >> +{ > >> + uint32_t idx = lisn; > >> + > >> + if (idx < x->int_base || idx >= x->int_max) { > >> + return NULL; > >> + } > >> + > >> + return &x->ivt[idx]; > > > > Should be idx - int_base, no? > > no, not in the allocator model I have chosen. The IRQ numbers > are exposed to the guest with their offset. But this is another > discussion which I would rather continue in another thread.
Uh.. but you're using idx to index IVT directly, after verifying that it lies between int_base and int_max. AFAICT IVT is only allocated with int_max - int_base entries, so without an offset here you'll overrun it, won't you? > >> +} > >> + > >> +XiveEQ *xive_get_eq(XIVE *x, uint32_t idx) > >> +{ > >> + if (idx >= x->nr_targets * XIVE_EQ_PRIORITY_COUNT) { > >> + return NULL; > >> + } > >> + > >> + return &x->eqdt[idx]; > >> +} > >> + > >> +/* TODO: improve EQ indexing. This is very simple and relies on the > >> + * fact that target (CPU) numbers start at 0 and are contiguous. It > >> + * should be OK for sPAPR. > >> + */ > >> +bool xive_eq_for_target(XIVE *x, uint32_t target, uint8_t priority, > >> + uint32_t *out_eq_idx) > >> +{ > >> + if (priority > XIVE_PRIORITY_MAX || target >= x->nr_targets) { > >> + return false; > >> + } > >> + > >> + if (out_eq_idx) { > >> + *out_eq_idx = target + priority; > >> + } > >> + > >> + return true; > > > > Seems a clunky interface. Why not return a XiveEQ *, NULL if the > > inputs aren't valud. > > Yes. This interface is inherited from OPAL and it's not consistent > with the other xive_get_*() routines. But we are missing a XIVE > internal table for VPs which explains the difference. I need to look > at the support of the OS_REPORTING_LINE hcalls before simplifying. > > Thanks, > > C. > > -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson
signature.asc
Description: PGP signature