On 30/05/2019 15:33, David Gibson wrote:
> On Fri, May 24, 2019 at 03:31:54PM +1000, Alexey Kardashevskiy wrote:
>>
>>
>> On 23/05/2019 15:29, David Gibson wrote:
>>> Device nodes for PCI bridges (both host and P2P) describe both the bridge
>>> device itself and the bus hanging off it, handling of this is a bit of a
>>> mess.
>>>
>>> spapr_dt_pci_device() has a few things it only adds for non-bridges, but
>>> always adds #address-cells and #size-cells which should only appear for
>>> bridges. But the walking down the subordinate PCI bus is done in one of
>>> its callers spapr_populate_pci_devices_dt(). The PHB dt creation in
>>> spapr_populate_pci_dt() open codes some similar logic to the bridge case.
>>>
>>> This patch consolidates things in a bunch of ways:
>>> * Bus specific dt info is now created in spapr_dt_pci_bus() used for both
>>> P2P bridges and the host bridge. This includes walking subordinate
>>> devices
>>> * spapr_dt_pci_device() now calls spapr_dt_pci_bus() when called on a
>>> P2P bridge
>>> * We do detection of bridges with the is_bridge field of the device class,
>>> rather than checking PCI config space directly, for consistency with
>>> qemu's core PCI code.
>>> * Several things are renamed for brevity and clarity
>>>
>>> Signed-off-by: David Gibson <da...@gibson.dropbear.id.au>
>>> ---
>>> hw/ppc/spapr.c | 7 +-
>>> hw/ppc/spapr_pci.c | 140 +++++++++++++++++++-----------------
>>> include/hw/pci-host/spapr.h | 4 +-
>>> 3 files changed, 79 insertions(+), 72 deletions(-)
>>>
>>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>>> index e2b33e5890..44573adce7 100644
>>> --- a/hw/ppc/spapr.c
>>> +++ b/hw/ppc/spapr.c
>>> @@ -1309,8 +1309,7 @@ static void *spapr_build_fdt(SpaprMachineState *spapr)
>>> }
>>>
>>> QLIST_FOREACH(phb, &spapr->phbs, list) {
>>> - ret = spapr_populate_pci_dt(phb, PHANDLE_INTC, fdt,
>>> - spapr->irq->nr_msis, NULL);
>>> + ret = spapr_dt_phb(phb, PHANDLE_INTC, fdt, spapr->irq->nr_msis,
>>> NULL);
>>> if (ret < 0) {
>>> error_report("couldn't setup PCI devices in fdt");
>>> exit(1);
>>> @@ -3917,8 +3916,8 @@ int spapr_phb_dt_populate(SpaprDrc *drc,
>>> SpaprMachineState *spapr,
>>> return -1;
>>> }
>>>
>>> - if (spapr_populate_pci_dt(sphb, intc_phandle, fdt, spapr->irq->nr_msis,
>>> - fdt_start_offset)) {
>>> + if (spapr_dt_phb(sphb, intc_phandle, fdt, spapr->irq->nr_msis,
>>> + fdt_start_offset)) {
>>> error_setg(errp, "unable to create FDT node for PHB %d",
>>> sphb->index);
>>> return -1;
>>> }
>>> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
>>> index 4075b433fd..c166df4145 100644
>>> --- a/hw/ppc/spapr_pci.c
>>> +++ b/hw/ppc/spapr_pci.c
>>> @@ -1219,6 +1219,60 @@ static const char *dt_name_from_class(uint8_t class,
>>> uint8_t subclass,
>>> static uint32_t spapr_phb_get_pci_drc_index(SpaprPhbState *phb,
>>> PCIDevice *pdev);
>>>
>>> +typedef struct PciWalkFdt {
>>> + void *fdt;
>>> + int offset;
>>> + SpaprPhbState *sphb;
>>> + int err;
>>> +} PciWalkFdt;
>>> +
>>> +static int spapr_dt_pci_device(SpaprPhbState *sphb, PCIDevice *dev,
>>> + void *fdt, int parent_offset);
>>> +
>>> +static void spapr_dt_pci_device_cb(PCIBus *bus, PCIDevice *pdev,
>>> + void *opaque)
>>> +{
>>> + PciWalkFdt *p = opaque;
>>> + int err;
>>> +
>>> + if (p->err) {
>>> + /* Something's already broken, don't keep going */
>>> + return;
>>> + }
>>> +
>>> + err = spapr_dt_pci_device(p->sphb, pdev, p->fdt, p->offset);
>>> + if (err < 0) {
>>> + p->err = err;
>>> + }
>>> +}
>>> +
>>> +/* Augment PCI device node with bridge specific information */
>>> +static int spapr_dt_pci_bus(SpaprPhbState *sphb, PCIBus *bus,
>>> + void *fdt, int offset)
>>> +{
>>> + PciWalkFdt cbinfo = {
>>> + .fdt = fdt,
>>> + .offset = offset,
>>> + .sphb = sphb,
>>> + .err = 0,
>>> + };
>>> +
>>> + _FDT(fdt_setprop_cell(fdt, offset, "#address-cells",
>>> + RESOURCE_CELLS_ADDRESS));
>>> + _FDT(fdt_setprop_cell(fdt, offset, "#size-cells",
>>> + RESOURCE_CELLS_SIZE));
>>> +
>>> + if (bus) {
>>> + pci_for_each_device_reverse(bus, pci_bus_num(bus),
>>> + spapr_dt_pci_device_cb, &cbinfo);
>>> + if (cbinfo.err) {
>>> + return cbinfo.err;
>>> + }
>>> + }
>>> +
>>> + return offset;
>>
>>
>> This spapr_dt_pci_bus() returns 0 or an offset or an error.
>>
>> But:
>> spapr_dt_phb() returns 0 or error; and so does spapr_dt_drc().
>>
>> Not extremely consistent.
>
> No, it's not. But the inconsistency is already there between the
> device function which needs to return an offset and the PHB function
> and others which don't.
>
> I have some ideas for how to clean this up, along with a bunch of
> other dt creation stuff, but I don't think it's reasonably in scope
> for this series.
>
>> It looks like spapr_dt_pci_bus() does not need to return @offset as it
>> does not change it and the caller - spapr_dt_pci_device() - can have 1
>> "return offset" in the end.
>
> True, but changing this here just shuffles the inconsistency around
> without really improving it. I've put cleaning this up more widely on
> my list of things to tackle when I have time.
>
>>
>>
>>> +}
>>> +
>>> /* create OF node for pci device and required OF DT properties */
>>> static int spapr_dt_pci_device(SpaprPhbState *sphb, PCIDevice *dev,
>>> void *fdt, int parent_offset)
>>> @@ -1228,10 +1282,9 @@ static int spapr_dt_pci_device(SpaprPhbState *sphb,
>>> PCIDevice *dev,
>>> char *nodename;
>>> int slot = PCI_SLOT(dev->devfn);
>>> int func = PCI_FUNC(dev->devfn);
>>> + PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev);
>>> ResourceProps rp;
>>> uint32_t drc_index = spapr_phb_get_pci_drc_index(sphb, dev);
>>> - uint32_t header_type = pci_default_read_config(dev, PCI_HEADER_TYPE,
>>> 1);
>>> - bool is_bridge = (header_type == PCI_HEADER_TYPE_BRIDGE);
>>> uint32_t vendor_id = pci_default_read_config(dev, PCI_VENDOR_ID, 2);
>>> uint32_t device_id = pci_default_read_config(dev, PCI_DEVICE_ID, 2);
>>> uint32_t revision_id = pci_default_read_config(dev, PCI_REVISION_ID,
>>> 1);
>>> @@ -1268,13 +1321,6 @@ static int spapr_dt_pci_device(SpaprPhbState *sphb,
>>> PCIDevice *dev,
>>> _FDT(fdt_setprop_cell(fdt, offset, "interrupts", irq_pin));
>>> }
>>>
>>> - if (!is_bridge) {
>>> - uint32_t min_grant = pci_default_read_config(dev, PCI_MIN_GNT, 1);
>>> - uint32_t max_latency = pci_default_read_config(dev, PCI_MAX_LAT,
>>> 1);
>>> - _FDT(fdt_setprop_cell(fdt, offset, "min-grant", min_grant));
>>> - _FDT(fdt_setprop_cell(fdt, offset, "max-latency", max_latency));
>>> - }
>>> -
>>> if (subsystem_id) {
>>> _FDT(fdt_setprop_cell(fdt, offset, "subsystem-id", subsystem_id));
>>> }
>>> @@ -1309,11 +1355,6 @@ static int spapr_dt_pci_device(SpaprPhbState *sphb,
>>> PCIDevice *dev,
>>> _FDT(fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index));
>>> }
>>>
>>> - _FDT(fdt_setprop_cell(fdt, offset, "#address-cells",
>>> - RESOURCE_CELLS_ADDRESS));
>>> - _FDT(fdt_setprop_cell(fdt, offset, "#size-cells",
>>> - RESOURCE_CELLS_SIZE));
>>> -
>>> if (msi_present(dev)) {
>>> uint32_t max_msi = msi_nr_vectors_allocated(dev);
>>> if (max_msi) {
>>> @@ -1338,7 +1379,18 @@ static int spapr_dt_pci_device(SpaprPhbState *sphb,
>>> PCIDevice *dev,
>>>
>>> spapr_phb_nvgpu_populate_pcidev_dt(dev, fdt, offset, sphb);
>>>
>>> - return offset;
>>> + if (!pc->is_bridge) {
>>> + /* Properties only for non-bridges */
>>> + uint32_t min_grant = pci_default_read_config(dev, PCI_MIN_GNT, 1);
>>> + uint32_t max_latency = pci_default_read_config(dev, PCI_MAX_LAT,
>>> 1);
>>> + _FDT(fdt_setprop_cell(fdt, offset, "min-grant", min_grant));
>>> + _FDT(fdt_setprop_cell(fdt, offset, "max-latency", max_latency));
>>> + return offset;
>>> + } else {
>>> + PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev));
>>> +
>>> + return spapr_dt_pci_bus(sphb, sec_bus, fdt, offset);
>>> + }
>>> }
>>>
>>> /* Callback to be called during DRC release. */
>>> @@ -2063,44 +2115,6 @@ static const TypeInfo spapr_phb_info = {
>>> }
>>> };
>>>
>>> -typedef struct SpaprFdt {
>>> - void *fdt;
>>> - int node_off;
>>> - SpaprPhbState *sphb;
>>> -} SpaprFdt;
>>> -
>>> -static void spapr_populate_pci_devices_dt(PCIBus *bus, PCIDevice *pdev,
>>> - void *opaque)
>>> -{
>>> - PCIBus *sec_bus;
>>> - SpaprFdt *p = opaque;
>>> - int offset;
>>> - SpaprFdt s_fdt;
>>> -
>>> - offset = spapr_dt_pci_device(p->sphb, pdev, p->fdt, p->node_off);
>>> - if (!offset) {
>>> - error_report("Failed to create pci child device tree node");
>>> - return;
>>> - }
>>> -
>>> - if ((pci_default_read_config(pdev, PCI_HEADER_TYPE, 1) !=
>>> - PCI_HEADER_TYPE_BRIDGE)) {
>>> - return;
>>> - }
>>> -
>>> - sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev));
>>> - if (!sec_bus) {
>>> - return;
>>> - }
>>> -
>>> - s_fdt.fdt = p->fdt;
>>> - s_fdt.node_off = offset;
>>> - s_fdt.sphb = p->sphb;
>>> - pci_for_each_device_reverse(sec_bus, pci_bus_num(sec_bus),
>>> - spapr_populate_pci_devices_dt,
>>> - &s_fdt);
>>> -}
>>> -
>>> static void spapr_phb_pci_enumerate_bridge(PCIBus *bus, PCIDevice *pdev,
>>> void *opaque)
>>> {
>>> @@ -2138,8 +2152,8 @@ static void spapr_phb_pci_enumerate(SpaprPhbState
>>> *phb)
>>>
>>> }
>>>
>>> -int spapr_populate_pci_dt(SpaprPhbState *phb, uint32_t intc_phandle, void
>>> *fdt,
>>> - uint32_t nr_msis, int *node_offset)
>>> +int spapr_dt_phb(SpaprPhbState *phb, uint32_t intc_phandle, void *fdt,
>>> + uint32_t nr_msis, int *node_offset)
>>> {
>>> int bus_off, i, j, ret;
>>> uint32_t bus_range[] = { cpu_to_be32(0), cpu_to_be32(0xff) };
>>> @@ -2186,8 +2200,6 @@ int spapr_populate_pci_dt(SpaprPhbState *phb,
>>> uint32_t intc_phandle, void *fdt,
>>> cpu_to_be32(0x0),
>>> cpu_to_be32(phb->numa_node)};
>>> SpaprTceTable *tcet;
>>> - PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus;
>>> - SpaprFdt s_fdt;
>>> SpaprDrc *drc;
>>> Error *errp = NULL;
>>>
>>> @@ -2200,8 +2212,6 @@ int spapr_populate_pci_dt(SpaprPhbState *phb,
>>> uint32_t intc_phandle, void *fdt,
>>> /* Write PHB properties */
>>> _FDT(fdt_setprop_string(fdt, bus_off, "device_type", "pci"));
>>> _FDT(fdt_setprop_string(fdt, bus_off, "compatible",
>>> "IBM,Logical_PHB"));
>>> - _FDT(fdt_setprop_cell(fdt, bus_off, "#address-cells", 0x3));
>>> - _FDT(fdt_setprop_cell(fdt, bus_off, "#size-cells", 0x2));
>>> _FDT(fdt_setprop_cell(fdt, bus_off, "#interrupt-cells", 0x1));
>>> _FDT(fdt_setprop(fdt, bus_off, "used-by-rtas", NULL, 0));
>>> _FDT(fdt_setprop(fdt, bus_off, "bus-range", &bus_range,
>>> sizeof(bus_range)));
>>> @@ -2266,13 +2276,11 @@ int spapr_populate_pci_dt(SpaprPhbState *phb,
>>> uint32_t intc_phandle, void *fdt,
>>> spapr_phb_pci_enumerate(phb);
>>> _FDT(fdt_setprop_cell(fdt, bus_off, "qemu,phb-enumerated", 0x1));
>>>
>>> - /* Populate tree nodes with PCI devices attached */
>>> - s_fdt.fdt = fdt;
>>> - s_fdt.node_off = bus_off;
>>> - s_fdt.sphb = phb;
>>> - pci_for_each_device_reverse(bus, pci_bus_num(bus),
>>> - spapr_populate_pci_devices_dt,
>>> - &s_fdt);
>>> + /* Walk the bridge and subordinate buses */
>>> + ret = spapr_dt_pci_bus(phb, PCI_HOST_BRIDGE(phb)->bus, fdt, bus_off);
>>> + if (ret) {
>>
>>
>> if (ret < 0)
>>
>> otherwise it returns prematurely and NVLink stuff does not make it to
>> the FDT.
>>
>>
>> Otherwise the whole patchset is a good cleanup and seems working fine.
Just to double check you have not missed this bit :)
--
Alexey