On Tue Dec 10, 2024 at 10:05 AM AEST, Michael Kowal wrote:
> From: Frederic Barrat <fbar...@linux.ibm.com>
>
> When a group interrupt cannot be delivered, we need to:
> - increment the backlog counter for the group in the NVG table
>   (if the END is configured to keep a backlog).
> - start a broadcast operation to set the LSMFB field on matching CPUs
>   which can't take the interrupt now because they're running at too
>   high a priority.
>
> Signed-off-by: Frederic Barrat <fbar...@linux.ibm.com>
> Signed-off-by: Michael Kowal <ko...@linux.ibm.com>
> ---
>  include/hw/ppc/xive.h  |   5 ++
>  include/hw/ppc/xive2.h |   1 +
>  hw/intc/pnv_xive2.c    |  42 +++++++++++++++++
>  hw/intc/xive2.c        | 105 +++++++++++++++++++++++++++++++++++------
>  hw/ppc/pnv.c           |  22 ++++++++-
>  5 files changed, 159 insertions(+), 16 deletions(-)
>
> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
> index ce4eb9726b..f443a39cf1 100644
> --- a/include/hw/ppc/xive.h
> +++ b/include/hw/ppc/xive.h
> @@ -442,6 +442,9 @@ struct XivePresenterClass {
>                       uint32_t logic_serv, XiveTCTXMatch *match);
>      bool (*in_kernel)(const XivePresenter *xptr);
>      uint32_t (*get_config)(XivePresenter *xptr);
> +    int (*broadcast)(XivePresenter *xptr,
> +                     uint8_t nvt_blk, uint32_t nvt_idx,
> +                     uint8_t priority);
>  };
>  
>  int xive_presenter_tctx_match(XivePresenter *xptr, XiveTCTX *tctx,
> @@ -472,6 +475,8 @@ struct XiveFabricClass {
>                       uint8_t nvt_blk, uint32_t nvt_idx,
>                       bool cam_ignore, uint8_t priority,
>                       uint32_t logic_serv, XiveTCTXMatch *match);
> +    int (*broadcast)(XiveFabric *xfb, uint8_t nvt_blk, uint32_t nvt_idx,
> +                     uint8_t priority);
>  };
>  
>  /*
> diff --git a/include/hw/ppc/xive2.h b/include/hw/ppc/xive2.h
> index 65154f78d8..ebf301bb5b 100644
> --- a/include/hw/ppc/xive2.h
> +++ b/include/hw/ppc/xive2.h
> @@ -120,6 +120,7 @@ uint64_t xive2_tm_pull_os_ctx(XivePresenter *xptr, 
> XiveTCTX *tctx,
>  void xive2_tm_pull_os_ctx_ol(XivePresenter *xptr, XiveTCTX *tctx,
>                               hwaddr offset, uint64_t value, unsigned size);
>  bool xive2_tm_irq_precluded(XiveTCTX *tctx, int ring, uint8_t priority);
> +void xive2_tm_set_lsmfb(XiveTCTX *tctx, int ring, uint8_t priority);
>  void xive2_tm_set_hv_target(XivePresenter *xptr, XiveTCTX *tctx,
>                              hwaddr offset, uint64_t value, unsigned size);
>  void xive2_tm_pull_phys_ctx_ol(XivePresenter *xptr, XiveTCTX *tctx,
> diff --git a/hw/intc/pnv_xive2.c b/hw/intc/pnv_xive2.c
> index 5cdd4fdcc9..41b727d1fb 100644
> --- a/hw/intc/pnv_xive2.c
> +++ b/hw/intc/pnv_xive2.c
> @@ -705,6 +705,47 @@ static uint32_t 
> pnv_xive2_presenter_get_config(XivePresenter *xptr)
>      return cfg;
>  }
>  
> +static int pnv_xive2_broadcast(XivePresenter *xptr,
> +                               uint8_t nvt_blk, uint32_t nvt_idx,
> +                               uint8_t priority)
> +{
> +    PnvXive2 *xive = PNV_XIVE2(xptr);
> +    PnvChip *chip = xive->chip;
> +    int i, j;
> +    bool gen1_tima_os =
> +        xive->cq_regs[CQ_XIVE_CFG >> 3] & CQ_XIVE_CFG_GEN1_TIMA_OS;
> +
> +    for (i = 0; i < chip->nr_cores; i++) {
> +        PnvCore *pc = chip->cores[i];
> +        CPUCore *cc = CPU_CORE(pc);
> +
> +        for (j = 0; j < cc->nr_threads; j++) {
> +            PowerPCCPU *cpu = pc->threads[j];
> +            XiveTCTX *tctx;
> +            int ring;
> +
> +            if (!pnv_xive2_is_cpu_enabled(xive, cpu)) {
> +                continue;
> +            }
> +
> +            tctx = XIVE_TCTX(pnv_cpu_state(cpu)->intc);
> +
> +            if (gen1_tima_os) {
> +                ring = xive_presenter_tctx_match(xptr, tctx, 0, nvt_blk,
> +                                                 nvt_idx, true, 0);
> +            } else {
> +                ring = xive2_presenter_tctx_match(xptr, tctx, 0, nvt_blk,
> +                                                  nvt_idx, true, 0);
> +            }
> +
> +            if (ring != -1) {
> +                xive2_tm_set_lsmfb(tctx, ring, priority);
> +            }
> +        }
> +    }
> +    return 0;
> +}
> +
>  static uint8_t pnv_xive2_get_block_id(Xive2Router *xrtr)
>  {
>      return pnv_xive2_block_id(PNV_XIVE2(xrtr));
> @@ -2445,6 +2486,7 @@ static void pnv_xive2_class_init(ObjectClass *klass, 
> void *data)
>  
>      xpc->match_nvt  = pnv_xive2_match_nvt;
>      xpc->get_config = pnv_xive2_presenter_get_config;
> +    xpc->broadcast  = pnv_xive2_broadcast;
>  };
>  
>  static const TypeInfo pnv_xive2_info = {
> diff --git a/hw/intc/xive2.c b/hw/intc/xive2.c
> index cffcf3ff05..05cb17518d 100644
> --- a/hw/intc/xive2.c
> +++ b/hw/intc/xive2.c
> @@ -62,6 +62,30 @@ static uint32_t xive2_nvgc_get_backlog(Xive2Nvgc *nvgc, 
> uint8_t priority)
>      return val;
>  }
>  
> +static void xive2_nvgc_set_backlog(Xive2Nvgc *nvgc, uint8_t priority,
> +                                   uint32_t val)
> +{
> +    uint8_t *ptr, i;
> +    uint32_t shift;
> +
> +    if (priority > 7) {
> +        return;
> +    }
> +
> +    if (val > 0xFFFFFF) {
> +        val = 0xFFFFFF;
> +    }

Could these conditions have asserts or warnings? Seems like we
saturate a counter or silently drop an interrupt if these things
can happen. Can add something later.

> +    /*
> +     * The per-priority backlog counters are 24-bit and the structure
> +     * is stored in big endian
> +     */
> +    ptr = (uint8_t *)&nvgc->w2 + priority * 3;

This fits because nvgc is 32 bytes so 24 bytes from w2, and
8 priorities * 3 bytes each is 24. I just added a bit more comment.

Reviewed-by: Nicholas Piggin <npig...@gmail.com>

> +    for (i = 0; i < 3; i++, ptr++) {
> +        shift = 8 * (2 - i);
> +        *ptr = (val >> shift) & 0xFF;
> +    }
> +}
> +
>  void xive2_eas_pic_print_info(Xive2Eas *eas, uint32_t lisn, GString *buf)
>  {
>      if (!xive2_eas_is_valid(eas)) {
> @@ -830,6 +854,19 @@ bool xive2_tm_irq_precluded(XiveTCTX *tctx, int ring, 
> uint8_t priority)
>      return true;
>  }
>  
> +void xive2_tm_set_lsmfb(XiveTCTX *tctx, int ring, uint8_t priority)
> +{
> +    uint8_t *regs = &tctx->regs[ring];
> +
> +    /*
> +     * Called by the router during a VP-group notification when the
> +     * thread matches but can't take the interrupt because it's
> +     * already running at a more favored priority. It then stores the
> +     * new interrupt priority in the LSMFB field.
> +     */
> +    regs[TM_LSMFB] = priority;
> +}
> +
>  static void xive2_router_realize(DeviceState *dev, Error **errp)
>  {
>      Xive2Router *xrtr = XIVE2_ROUTER(dev);
> @@ -962,10 +999,9 @@ static void xive2_router_end_notify(Xive2Router *xrtr, 
> uint8_t end_blk,
>      /*
>       * If no matching NVP is dispatched on a HW thread :
>       * - specific VP: update the NVP structure if backlog is activated
> -     * - logical server : forward request to IVPE (not supported)
> +     * - VP-group: update the backlog counter for that priority in the NVG
>       */
>      if (xive2_end_is_backlog(&end)) {
> -        uint8_t ipb;
>  
>          if (format == 1) {
>              qemu_log_mask(LOG_GUEST_ERROR,
> @@ -974,19 +1010,58 @@ static void xive2_router_end_notify(Xive2Router *xrtr, 
> uint8_t end_blk,
>              return;
>          }
>  
> -        /*
> -         * Record the IPB in the associated NVP structure for later
> -         * use. The presenter will resend the interrupt when the vCPU
> -         * is dispatched again on a HW thread.
> -         */
> -        ipb = xive_get_field32(NVP2_W2_IPB, nvp.w2) |
> -            xive_priority_to_ipb(priority);
> -        nvp.w2 = xive_set_field32(NVP2_W2_IPB, nvp.w2, ipb);
> -        xive2_router_write_nvp(xrtr, nvp_blk, nvp_idx, &nvp, 2);
> -
> -        /*
> -         * On HW, follows a "Broadcast Backlog" to IVPEs
> -         */
> +        if (!xive2_end_is_ignore(&end)) {
> +            uint8_t ipb;
> +            /*
> +             * Record the IPB in the associated NVP structure for later
> +             * use. The presenter will resend the interrupt when the vCPU
> +             * is dispatched again on a HW thread.
> +             */
> +            ipb = xive_get_field32(NVP2_W2_IPB, nvp.w2) |
> +                xive_priority_to_ipb(priority);
> +            nvp.w2 = xive_set_field32(NVP2_W2_IPB, nvp.w2, ipb);
> +            xive2_router_write_nvp(xrtr, nvp_blk, nvp_idx, &nvp, 2);
> +        } else {
> +            Xive2Nvgc nvg;
> +            uint32_t backlog;
> +
> +            /* For groups, the per-priority backlog counters are in the NVG 
> */
> +            if (xive2_router_get_nvgc(xrtr, false, nvp_blk, nvp_idx, &nvg)) {
> +                qemu_log_mask(LOG_GUEST_ERROR, "XIVE: no NVG %x/%x\n",
> +                              nvp_blk, nvp_idx);
> +                return;
> +            }
> +
> +            if (!xive2_nvgc_is_valid(&nvg)) {
> +                qemu_log_mask(LOG_GUEST_ERROR, "XIVE: NVG %x/%x is 
> invalid\n",
> +                              nvp_blk, nvp_idx);
> +                return;
> +            }
> +
> +            /*
> +             * Increment the backlog counter for that priority.
> +             * For the precluded case, we only call broadcast the
> +             * first time the counter is incremented. broadcast will
> +             * set the LSMFB field of the TIMA of relevant threads so
> +             * that they know an interrupt is pending.
> +             */
> +            backlog = xive2_nvgc_get_backlog(&nvg, priority) + 1;
> +            xive2_nvgc_set_backlog(&nvg, priority, backlog);
> +            xive2_router_write_nvgc(xrtr, false, nvp_blk, nvp_idx, &nvg);
> +
> +            if (precluded && backlog == 1) {
> +                XiveFabricClass *xfc = XIVE_FABRIC_GET_CLASS(xrtr->xfb);
> +                xfc->broadcast(xrtr->xfb, nvp_blk, nvp_idx, priority);
> +
> +                if (!xive2_end_is_precluded_escalation(&end)) {
> +                    /*
> +                     * The interrupt will be picked up when the
> +                     * matching thread lowers its priority level
> +                     */
> +                    return;
> +                }
> +            }
> +        }
>      }
>  
>  do_escalation:
> diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
> index f0f0d7567d..7c11143749 100644
> --- a/hw/ppc/pnv.c
> +++ b/hw/ppc/pnv.c
> @@ -1,7 +1,9 @@
>  /*
>   * QEMU PowerPC PowerNV machine model
>   *
> - * Copyright (c) 2016, IBM Corporation.
> + * Copyright (c) 2016-2024, IBM Corporation.
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
>   *
>   * This library is free software; you can redistribute it and/or
>   * modify it under the terms of the GNU Lesser General Public
> @@ -2639,6 +2641,23 @@ static int pnv10_xive_match_nvt(XiveFabric *xfb, 
> uint8_t format,
>      return total_count;
>  }
>  
> +static int pnv10_xive_broadcast(XiveFabric *xfb,
> +                                uint8_t nvt_blk, uint32_t nvt_idx,
> +                                uint8_t priority)
> +{
> +    PnvMachineState *pnv = PNV_MACHINE(xfb);
> +    int i;
> +
> +    for (i = 0; i < pnv->num_chips; i++) {
> +        Pnv10Chip *chip10 = PNV10_CHIP(pnv->chips[i]);
> +        XivePresenter *xptr = XIVE_PRESENTER(&chip10->xive);
> +        XivePresenterClass *xpc = XIVE_PRESENTER_GET_CLASS(xptr);
> +
> +        xpc->broadcast(xptr, nvt_blk, nvt_idx, priority);
> +    }
> +    return 0;
> +}
> +
>  static bool pnv_machine_get_big_core(Object *obj, Error **errp)
>  {
>      PnvMachineState *pnv = PNV_MACHINE(obj);
> @@ -2772,6 +2791,7 @@ static void 
> pnv_machine_p10_common_class_init(ObjectClass *oc, void *data)
>      pmc->dt_power_mgt = pnv_dt_power_mgt;
>  
>      xfc->match_nvt = pnv10_xive_match_nvt;
> +    xfc->broadcast = pnv10_xive_broadcast;
>  
>      machine_class_allow_dynamic_sysbus_dev(mc, TYPE_PNV_PHB);
>  }


Reply via email to