On Tue Dec 10, 2024 at 10:05 AM AEST, Michael Kowal wrote: > From: Frederic Barrat <fbar...@linux.ibm.com> > > When a group interrupt cannot be delivered, we need to: > - increment the backlog counter for the group in the NVG table > (if the END is configured to keep a backlog). > - start a broadcast operation to set the LSMFB field on matching CPUs > which can't take the interrupt now because they're running at too > high a priority. > > Signed-off-by: Frederic Barrat <fbar...@linux.ibm.com> > Signed-off-by: Michael Kowal <ko...@linux.ibm.com> > --- > include/hw/ppc/xive.h | 5 ++ > include/hw/ppc/xive2.h | 1 + > hw/intc/pnv_xive2.c | 42 +++++++++++++++++ > hw/intc/xive2.c | 105 +++++++++++++++++++++++++++++++++++------ > hw/ppc/pnv.c | 22 ++++++++- > 5 files changed, 159 insertions(+), 16 deletions(-) > > diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h > index ce4eb9726b..f443a39cf1 100644 > --- a/include/hw/ppc/xive.h > +++ b/include/hw/ppc/xive.h > @@ -442,6 +442,9 @@ struct XivePresenterClass { > uint32_t logic_serv, XiveTCTXMatch *match); > bool (*in_kernel)(const XivePresenter *xptr); > uint32_t (*get_config)(XivePresenter *xptr); > + int (*broadcast)(XivePresenter *xptr, > + uint8_t nvt_blk, uint32_t nvt_idx, > + uint8_t priority); > }; > > int xive_presenter_tctx_match(XivePresenter *xptr, XiveTCTX *tctx, > @@ -472,6 +475,8 @@ struct XiveFabricClass { > uint8_t nvt_blk, uint32_t nvt_idx, > bool cam_ignore, uint8_t priority, > uint32_t logic_serv, XiveTCTXMatch *match); > + int (*broadcast)(XiveFabric *xfb, uint8_t nvt_blk, uint32_t nvt_idx, > + uint8_t priority); > }; > > /* > diff --git a/include/hw/ppc/xive2.h b/include/hw/ppc/xive2.h > index 65154f78d8..ebf301bb5b 100644 > --- a/include/hw/ppc/xive2.h > +++ b/include/hw/ppc/xive2.h > @@ -120,6 +120,7 @@ uint64_t xive2_tm_pull_os_ctx(XivePresenter *xptr, > XiveTCTX *tctx, > void xive2_tm_pull_os_ctx_ol(XivePresenter *xptr, XiveTCTX *tctx, > hwaddr offset, uint64_t value, unsigned size); > bool xive2_tm_irq_precluded(XiveTCTX *tctx, int ring, uint8_t priority); > +void xive2_tm_set_lsmfb(XiveTCTX *tctx, int ring, uint8_t priority); > void xive2_tm_set_hv_target(XivePresenter *xptr, XiveTCTX *tctx, > hwaddr offset, uint64_t value, unsigned size); > void xive2_tm_pull_phys_ctx_ol(XivePresenter *xptr, XiveTCTX *tctx, > diff --git a/hw/intc/pnv_xive2.c b/hw/intc/pnv_xive2.c > index 5cdd4fdcc9..41b727d1fb 100644 > --- a/hw/intc/pnv_xive2.c > +++ b/hw/intc/pnv_xive2.c > @@ -705,6 +705,47 @@ static uint32_t > pnv_xive2_presenter_get_config(XivePresenter *xptr) > return cfg; > } > > +static int pnv_xive2_broadcast(XivePresenter *xptr, > + uint8_t nvt_blk, uint32_t nvt_idx, > + uint8_t priority) > +{ > + PnvXive2 *xive = PNV_XIVE2(xptr); > + PnvChip *chip = xive->chip; > + int i, j; > + bool gen1_tima_os = > + xive->cq_regs[CQ_XIVE_CFG >> 3] & CQ_XIVE_CFG_GEN1_TIMA_OS; > + > + for (i = 0; i < chip->nr_cores; i++) { > + PnvCore *pc = chip->cores[i]; > + CPUCore *cc = CPU_CORE(pc); > + > + for (j = 0; j < cc->nr_threads; j++) { > + PowerPCCPU *cpu = pc->threads[j]; > + XiveTCTX *tctx; > + int ring; > + > + if (!pnv_xive2_is_cpu_enabled(xive, cpu)) { > + continue; > + } > + > + tctx = XIVE_TCTX(pnv_cpu_state(cpu)->intc); > + > + if (gen1_tima_os) { > + ring = xive_presenter_tctx_match(xptr, tctx, 0, nvt_blk, > + nvt_idx, true, 0); > + } else { > + ring = xive2_presenter_tctx_match(xptr, tctx, 0, nvt_blk, > + nvt_idx, true, 0); > + } > + > + if (ring != -1) { > + xive2_tm_set_lsmfb(tctx, ring, priority); > + } > + } > + } > + return 0; > +} > + > static uint8_t pnv_xive2_get_block_id(Xive2Router *xrtr) > { > return pnv_xive2_block_id(PNV_XIVE2(xrtr)); > @@ -2445,6 +2486,7 @@ static void pnv_xive2_class_init(ObjectClass *klass, > void *data) > > xpc->match_nvt = pnv_xive2_match_nvt; > xpc->get_config = pnv_xive2_presenter_get_config; > + xpc->broadcast = pnv_xive2_broadcast; > }; > > static const TypeInfo pnv_xive2_info = { > diff --git a/hw/intc/xive2.c b/hw/intc/xive2.c > index cffcf3ff05..05cb17518d 100644 > --- a/hw/intc/xive2.c > +++ b/hw/intc/xive2.c > @@ -62,6 +62,30 @@ static uint32_t xive2_nvgc_get_backlog(Xive2Nvgc *nvgc, > uint8_t priority) > return val; > } > > +static void xive2_nvgc_set_backlog(Xive2Nvgc *nvgc, uint8_t priority, > + uint32_t val) > +{ > + uint8_t *ptr, i; > + uint32_t shift; > + > + if (priority > 7) { > + return; > + } > + > + if (val > 0xFFFFFF) { > + val = 0xFFFFFF; > + }
Could these conditions have asserts or warnings? Seems like we saturate a counter or silently drop an interrupt if these things can happen. Can add something later. > + /* > + * The per-priority backlog counters are 24-bit and the structure > + * is stored in big endian > + */ > + ptr = (uint8_t *)&nvgc->w2 + priority * 3; This fits because nvgc is 32 bytes so 24 bytes from w2, and 8 priorities * 3 bytes each is 24. I just added a bit more comment. Reviewed-by: Nicholas Piggin <npig...@gmail.com> > + for (i = 0; i < 3; i++, ptr++) { > + shift = 8 * (2 - i); > + *ptr = (val >> shift) & 0xFF; > + } > +} > + > void xive2_eas_pic_print_info(Xive2Eas *eas, uint32_t lisn, GString *buf) > { > if (!xive2_eas_is_valid(eas)) { > @@ -830,6 +854,19 @@ bool xive2_tm_irq_precluded(XiveTCTX *tctx, int ring, > uint8_t priority) > return true; > } > > +void xive2_tm_set_lsmfb(XiveTCTX *tctx, int ring, uint8_t priority) > +{ > + uint8_t *regs = &tctx->regs[ring]; > + > + /* > + * Called by the router during a VP-group notification when the > + * thread matches but can't take the interrupt because it's > + * already running at a more favored priority. It then stores the > + * new interrupt priority in the LSMFB field. > + */ > + regs[TM_LSMFB] = priority; > +} > + > static void xive2_router_realize(DeviceState *dev, Error **errp) > { > Xive2Router *xrtr = XIVE2_ROUTER(dev); > @@ -962,10 +999,9 @@ static void xive2_router_end_notify(Xive2Router *xrtr, > uint8_t end_blk, > /* > * If no matching NVP is dispatched on a HW thread : > * - specific VP: update the NVP structure if backlog is activated > - * - logical server : forward request to IVPE (not supported) > + * - VP-group: update the backlog counter for that priority in the NVG > */ > if (xive2_end_is_backlog(&end)) { > - uint8_t ipb; > > if (format == 1) { > qemu_log_mask(LOG_GUEST_ERROR, > @@ -974,19 +1010,58 @@ static void xive2_router_end_notify(Xive2Router *xrtr, > uint8_t end_blk, > return; > } > > - /* > - * Record the IPB in the associated NVP structure for later > - * use. The presenter will resend the interrupt when the vCPU > - * is dispatched again on a HW thread. > - */ > - ipb = xive_get_field32(NVP2_W2_IPB, nvp.w2) | > - xive_priority_to_ipb(priority); > - nvp.w2 = xive_set_field32(NVP2_W2_IPB, nvp.w2, ipb); > - xive2_router_write_nvp(xrtr, nvp_blk, nvp_idx, &nvp, 2); > - > - /* > - * On HW, follows a "Broadcast Backlog" to IVPEs > - */ > + if (!xive2_end_is_ignore(&end)) { > + uint8_t ipb; > + /* > + * Record the IPB in the associated NVP structure for later > + * use. The presenter will resend the interrupt when the vCPU > + * is dispatched again on a HW thread. > + */ > + ipb = xive_get_field32(NVP2_W2_IPB, nvp.w2) | > + xive_priority_to_ipb(priority); > + nvp.w2 = xive_set_field32(NVP2_W2_IPB, nvp.w2, ipb); > + xive2_router_write_nvp(xrtr, nvp_blk, nvp_idx, &nvp, 2); > + } else { > + Xive2Nvgc nvg; > + uint32_t backlog; > + > + /* For groups, the per-priority backlog counters are in the NVG > */ > + if (xive2_router_get_nvgc(xrtr, false, nvp_blk, nvp_idx, &nvg)) { > + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: no NVG %x/%x\n", > + nvp_blk, nvp_idx); > + return; > + } > + > + if (!xive2_nvgc_is_valid(&nvg)) { > + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: NVG %x/%x is > invalid\n", > + nvp_blk, nvp_idx); > + return; > + } > + > + /* > + * Increment the backlog counter for that priority. > + * For the precluded case, we only call broadcast the > + * first time the counter is incremented. broadcast will > + * set the LSMFB field of the TIMA of relevant threads so > + * that they know an interrupt is pending. > + */ > + backlog = xive2_nvgc_get_backlog(&nvg, priority) + 1; > + xive2_nvgc_set_backlog(&nvg, priority, backlog); > + xive2_router_write_nvgc(xrtr, false, nvp_blk, nvp_idx, &nvg); > + > + if (precluded && backlog == 1) { > + XiveFabricClass *xfc = XIVE_FABRIC_GET_CLASS(xrtr->xfb); > + xfc->broadcast(xrtr->xfb, nvp_blk, nvp_idx, priority); > + > + if (!xive2_end_is_precluded_escalation(&end)) { > + /* > + * The interrupt will be picked up when the > + * matching thread lowers its priority level > + */ > + return; > + } > + } > + } > } > > do_escalation: > diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c > index f0f0d7567d..7c11143749 100644 > --- a/hw/ppc/pnv.c > +++ b/hw/ppc/pnv.c > @@ -1,7 +1,9 @@ > /* > * QEMU PowerPC PowerNV machine model > * > - * Copyright (c) 2016, IBM Corporation. > + * Copyright (c) 2016-2024, IBM Corporation. > + * > + * SPDX-License-Identifier: GPL-2.0-or-later > * > * This library is free software; you can redistribute it and/or > * modify it under the terms of the GNU Lesser General Public > @@ -2639,6 +2641,23 @@ static int pnv10_xive_match_nvt(XiveFabric *xfb, > uint8_t format, > return total_count; > } > > +static int pnv10_xive_broadcast(XiveFabric *xfb, > + uint8_t nvt_blk, uint32_t nvt_idx, > + uint8_t priority) > +{ > + PnvMachineState *pnv = PNV_MACHINE(xfb); > + int i; > + > + for (i = 0; i < pnv->num_chips; i++) { > + Pnv10Chip *chip10 = PNV10_CHIP(pnv->chips[i]); > + XivePresenter *xptr = XIVE_PRESENTER(&chip10->xive); > + XivePresenterClass *xpc = XIVE_PRESENTER_GET_CLASS(xptr); > + > + xpc->broadcast(xptr, nvt_blk, nvt_idx, priority); > + } > + return 0; > +} > + > static bool pnv_machine_get_big_core(Object *obj, Error **errp) > { > PnvMachineState *pnv = PNV_MACHINE(obj); > @@ -2772,6 +2791,7 @@ static void > pnv_machine_p10_common_class_init(ObjectClass *oc, void *data) > pmc->dt_power_mgt = pnv_dt_power_mgt; > > xfc->match_nvt = pnv10_xive_match_nvt; > + xfc->broadcast = pnv10_xive_broadcast; > > machine_class_allow_dynamic_sysbus_dev(mc, TYPE_PNV_PHB); > }