On Wed, Jul 18, 2018 at 06:03:29PM +1000, Benjamin Herrenschmidt wrote: > On Wed, 2018-07-18 at 16:12 +1000, David Gibson wrote: > > On Thu, Jun 28, 2018 at 10:36:32AM +0200, Cédric Le Goater wrote: > > > From: Benjamin Herrenschmidt <b...@kernel.crashing.org> > > Can you trim your replies ? It's really hard to find your comments in > such a long patch... > > > > diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h > > > index 6ac8a9392da6..966a996c2eac 100644 > > > --- a/include/hw/ppc/xics.h > > > +++ b/include/hw/ppc/xics.h > > > @@ -194,6 +194,7 @@ void icp_set_mfrr(ICPState *icp, uint8_t mfrr); > > > uint32_t icp_accept(ICPState *ss); > > > uint32_t icp_ipoll(ICPState *ss, uint32_t *mfrr); > > > void icp_eoi(ICPState *icp, uint32_t xirr); > > > +void icp_irq(ICSState *ics, int server, int nr, uint8_t priority); > > > > Hrm... are you sure you need to expose this?
[snip] > > > + limit = pci_config_size(pdev); > > > + if (limit <= cfg_addr) { > > > + /* conventional pci device can be behind pcie-to-pci bridge. > > > + 256 <= addr < 4K has no effects. */ > > > + return ~0ull; > > > + } > > > + val = pci_host_config_read_common(pdev, cfg_addr, limit, size); > > > + switch (size) { > > > + case 1: > > > + return val; > > > + case 2: > > > + return bswap16(val); > > > + case 4: > > > + return bswap32(val); > > > + default: > > > + g_assert_not_reached(); > > > + } > > > +} > > > + > > > +static void pnv_phb3_check_m32(PnvPHB3 *phb) > > > +{ > > > + uint64_t base, start, size; > > > + MemoryRegion *parent; > > > + PnvPBCQState *pbcq = &phb->pbcq; > > > + > > > + if (phb->m32_mapped) { > > > + memory_region_del_subregion(phb->mr_m32.container, &phb->mr_m32); > > > + phb->m32_mapped = false; > > > > Could you use memory_region_set_enabled() rather than having to add > > and delete the subregion and keep track of it in this separate > > variable? > > There was a reason for that but it's years old and I forgot... I think > when re-enabled it moves around, among other things. So it's not more > efficient. Hm, ok. It'd be good to know what this is. > > > + } > > > + > > > + /* Disabled ? move on with life ... */ > > > > Trivial: "nothing to do" seems to be the standard way to express this. > > Even trivialler: usual English rules don't put a space before '?' or > > '!' punctuation. > > No, that's the tasteless English rule :-) It shall be overridden by > anybody interested in making things actually readable :-) > > > > + > > > +static void pnv_phb3_lxivt_write(PnvPHB3 *phb, unsigned idx, uint64_t > > > val) > > > +{ > > > + ICSState *ics = &phb->lsis; > > > + uint8_t server, prio; > > > + > > > + phb->ioda_LXIVT[idx] = val & (IODA2_LXIVT_SERVER | > > > + IODA2_LXIVT_PRIORITY | > > > + IODA2_LXIVT_NODE_ID); > > > + server = GETFIELD(IODA2_LXIVT_SERVER, val); > > > + prio = GETFIELD(IODA2_LXIVT_PRIORITY, val); > > > + > > > + /* > > > + * The low order 2 bits are the link pointer (Type II interrupts). > > > > I don't think we've currently implemented link pointers in our xics > > code. Do we need to do that? > > Not until somebody uses them (other than pHyp). > > > > + * Shift back to get a valid IRQ server. > > > + */ > > > + server >>= 2; > > > + > > > + ics_simple_write_xive(ics, idx, server, prio, prio); > > > +} > > > + > > > +static uint64_t *pnv_phb3_ioda_access(PnvPHB3 *phb, > > > + unsigned *out_table, unsigned > > > *out_idx) > > > +{ > > > + uint64_t adreg = phb->regs[PHB_IODA_ADDR >> 3]; > > > + unsigned int index = GETFIELD(PHB_IODA_AD_TADR, adreg); > > > + unsigned int table = GETFIELD(PHB_IODA_AD_TSEL, adreg); > > > + unsigned int mask; > > > + uint64_t *tptr = NULL; > > > + > > > + switch (table) { > > > + case IODA2_TBL_LIST: > > > + tptr = phb->ioda_LIST; > > > + mask = 7; > > > > I'd suggest hex for the masks. > > This is more readable when matched with the documentation but not a big > deal. Matching the docs is a good enough reason to keep decimal. > > > + break; > > > + case IODA2_TBL_LXIVT: > > > + tptr = phb->ioda_LXIVT; > > > + mask = 7; > > > + break; > > > + case IODA2_TBL_IVC_CAM: > > > + case IODA2_TBL_RBA: > > > + mask = 31; > > > + break; > > > + case IODA2_TBL_RCAM: > > > + mask = 63; > > > + break; > > > + case IODA2_TBL_MRT: > > > + mask = 7; > > > + break; > > > + case IODA2_TBL_PESTA: > > > + case IODA2_TBL_PESTB: > > > + mask = 255; > > > + break; > > > + case IODA2_TBL_TVT: > > > + tptr = phb->ioda_TVT; > > > + mask = 511; > > > + break; > > > + case IODA2_TBL_TCAM: > > > + case IODA2_TBL_TDR: > > > + mask = 63; > > > + break; > > > + case IODA2_TBL_M64BT: > > > + tptr = phb->ioda_M64BT; > > > + mask = 15; > > > + break; > > > + case IODA2_TBL_M32DT: > > > + tptr = phb->ioda_MDT; > > > + mask = 255; > > > + break; > > > + case IODA2_TBL_PEEV: > > > + tptr = phb->ioda_PEEV; > > > + mask = 3; > > > + break; > > > + default: > > > + return NULL; > > > + } > > > + index &= mask; > > > > Do we want to silently mask, rather than logging an error if there's > > an access out of bounds for a particular table? > > We could do both, as to behave like the HW but also flag an error. Sounds reasonable. > But > you have to be careful with the auto-increment below. Hm, ok. > > > + if (out_idx) { > > > + *out_idx = index; > > > + } > > > + if (out_table) { > > > + *out_table = table; > > > + } > > > + if (adreg & PHB_IODA_AD_AUTOINC) { > > > + index = (index + 1) & mask; > > > + adreg = SETFIELD(PHB_IODA_AD_TADR, adreg, index); > > > + } > > > + if (tptr) { > > > + tptr += index; > > > + } > > > + phb->regs[PHB_IODA_ADDR >> 3] = adreg; > > > + return tptr; > > > +} > > ../.. > > > > + if ((comp & mask) != comp) { > > > + qemu_log_mask(LOG_GUEST_ERROR, > > > + "IRQ compare bits not in mask: comp=0x%x > > > mask=0x%x", > > > + comp, mask); > > > + comp &= mask; > > > + } > > > + /* Setup LSI offset */ > > > + ics->offset = comp + global; > > > > Oh.. changing ICS offset at runtime. I hadn't considered that case.. > > As above, see further down. > > > > + /* Special case configuration data */ > > > + if ((off & 0xfffc) == PHB_CONFIG_DATA) { > > > + pnv_phb3_config_write(phb, off & 0x3, size, val); > > > + return; > > > + } > > > + > > > + /* Other registers are 64-bit only */ > > > + if (size != 8 || off & 0x7) { > > > + qemu_log_mask(LOG_GUEST_ERROR, > > > + "Invalid register access, offset: 0x%"PRIx64" > > > size: %d", > > > + off, size); > > > + return; > > > + } > > > + > > > + /* Handle masking */ > > > + switch (off) { > > > + case PHB_M64_UPPER_BITS: > > > > Couldn't you handle this in the switch below - it should be ok to > > momentarily have the invalid bits set in your reg case, as long as you > > mask them before applying the side-effects, yes? > > That felt easier that way ... > > > That said... shouldn't you filter our invalid or read-only regs before > > updating the cache? > > Well, I had a reason for doing it that way, I do have a vague memory of > that but I can't remember it now :-) > > > > +/* > > > + * MSI/MSIX memory region implementation. > > > + * The handler handles both MSI and MSIX. > > > + */ > > > +static void pnv_phb3_msi_write(void *opaque, hwaddr addr, > > > + uint64_t data, unsigned size) > > > +{ > > > + PnvPhb3DMASpace *ds = opaque; > > > + > > > + /* Resolve PE# */ > > > + if (!pnv_phb3_resolve_pe(ds)) { > > > + qemu_log_mask(LOG_GUEST_ERROR, > > > + "Failed to resolve PE# for bus @%p (%d) devfn > > > 0x%x", > > > + ds->bus, pci_bus_num(ds->bus), ds->devfn); > > > + return; > > > + } > > > + > > > + pnv_phb3_msi_send(&ds->phb->msis, addr, data, ds->pe_num); > > > +} > > > + > > > +static const MemoryRegionOps pnv_phb3_msi_ops = { > > > + /* There is no .read as the read result is undefined by PCI spec */ > > > > What will qemu do if it hits a NULL read here? The behaviour may be > > undefind, but we'd prefer it didn't crash qemu.. > > Yeah. [snip] > > > + /* > > > + * The low order 2 bits are the link pointer (Type II interrupts). > > > + * Shift back to get a valid IRQ server. > > > + */ > > > + server >>= 2; > > > + > > > + switch (pq) { > > > + case 0: /* 00 */ > > > + if (prio == 0xff) { > > > + /* Masked, set Q */ > > > + phb3_msi_set_q(msi, srcno); > > > + } else { > > > + /* Enabled, set P and send */ > > > + phb3_msi_set_p(msi, srcno, gen); > > > + icp_irq(ics, server, srcno + ics->offset, prio); > > > > Can't you just pulse the right qirq here, which will use the core ICS > > logic to propagate to the ICP? > > Ok, interrupts ... sooooo... > > This code predates a pile of XICS work you guys did to start with. > > Now, this is an ICS subclass, so why shouldn't it directly poke at the > target ICP ? That's ok in theory, but causing it to expose the icp interface to a new module isn't great. > It's an alternate to the normal ICS since it behaves a bit > differently (PQ bits & all). AFAICT the PQ bits are effectively another filtering layer on top of the same ICS logic as elsewhere. I think it's better if we can share that shared logic, rather than replicating it. -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson
signature.asc
Description: PGP signature