On 08.05.15 02:01, David Gibson wrote: > On Fri, May 08, 2015 at 12:39:20AM +0200, Alexander Graf wrote: >> >> >> On 07.05.15 07:33, David Gibson wrote: >>> From: Michael Roth <mdr...@linux.vnet.ibm.com> >>> >>> This device emulates a firmware abstraction used by pSeries guests to >>> manage hotplug/dynamic-reconfiguration of host-bridges, PCI devices, >>> memory, and CPUs. It is conceptually similar to an SHPC device, >>> complete with LED indicators to identify individual slots to physical >>> physical users and indicate when it is safe to remove a device. In >>> some cases it is also used to manage virtualized resources, such a >>> memory, CPUs, and physical-host bridges, which in the case of pSeries >>> guests are virtualized resources where the physical components are >>> managed by the host. >>> >>> Guests communicate with these DR Connectors using RTAS calls, >>> generally by addressing the unique DRC index associated with a >>> particular connector for a particular resource. For introspection >>> purposes we expose this state initially as QOM properties, and >>> in subsequent patches will introduce the RTAS calls that make use of >>> it. This constitutes to the 'guest' interface. >>> >>> On the QEMU side we provide an attach/detach interface to associate >>> or cleanup a DeviceState with a particular sPAPRDRConnector in >>> response to hotplug/unplug, respectively. This constitutes the >>> 'physical' interface to the DR Connector. >>> >>> Signed-off-by: Michael Roth <mdr...@linux.vnet.ibm.com> >>> Reviewed-by: David Gibson <da...@gibson.dropbear.id.au> >>> Signed-off-by: David Gibson <da...@gibson.dropbear.id.au> >>> --- >>> hw/ppc/Makefile.objs | 2 +- >>> hw/ppc/spapr_drc.c | 588 >>> +++++++++++++++++++++++++++++++++++++++++++++ >>> include/hw/ppc/spapr_drc.h | 199 +++++++++++++++ >>> 3 files changed, 788 insertions(+), 1 deletion(-) >>> create mode 100644 hw/ppc/spapr_drc.c >>> create mode 100644 include/hw/ppc/spapr_drc.h >>> >>> diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs >>> index 437955d..c8ab06e 100644 >>> --- a/hw/ppc/Makefile.objs >>> +++ b/hw/ppc/Makefile.objs >>> @@ -3,7 +3,7 @@ obj-y += ppc.o ppc_booke.o >>> # IBM pSeries (sPAPR) >>> obj-$(CONFIG_PSERIES) += spapr.o spapr_vio.o spapr_events.o >>> obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o >>> -obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o >>> +obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o >>> ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy) >>> obj-y += spapr_pci_vfio.o >>> endif >>> diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c >>> new file mode 100644 >>> index 0000000..047c6c7 >>> --- /dev/null >>> +++ b/hw/ppc/spapr_drc.c >>> @@ -0,0 +1,588 @@ >>> +/* >>> + * QEMU SPAPR Dynamic Reconfiguration Connector Implementation >>> + * >>> + * Copyright IBM Corp. 2014 >>> + * >>> + * Authors: >>> + * Michael Roth <mdr...@linux.vnet.ibm.com> >>> + * >>> + * This work is licensed under the terms of the GNU GPL, version 2 or >>> later. >>> + * See the COPYING file in the top-level directory. >>> + */ >>> + >>> +#include "hw/ppc/spapr_drc.h" >>> +#include "qom/object.h" >>> +#include "hw/qdev.h" >>> +#include "qapi/visitor.h" >>> +#include "qemu/error-report.h" >>> + >>> +/* #define DEBUG_SPAPR_DRC */ >>> + >>> +#ifdef DEBUG_SPAPR_DRC >>> +#define DPRINTF(fmt, ...) \ >>> + do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0) >>> +#define DPRINTFN(fmt, ...) \ >>> + do { DPRINTF(fmt, ## __VA_ARGS__); fprintf(stderr, "\n"); } while (0) >>> +#else >>> +#define DPRINTF(fmt, ...) \ >>> + do { } while (0) >>> +#define DPRINTFN(fmt, ...) \ >>> + do { } while (0) >>> +#endif >>> + >>> +#define DRC_CONTAINER_PATH "/dr-connector" >>> +#define DRC_INDEX_TYPE_SHIFT 28 >>> +#define DRC_INDEX_ID_MASK (~(~0 << DRC_INDEX_TYPE_SHIFT)) >>> + >>> +static sPAPRDRConnectorTypeShift get_type_shift(sPAPRDRConnectorType type) >>> +{ >>> + uint32_t shift = 0; >>> + >>> + /* make sure this isn't SPAPR_DR_CONNECTOR_TYPE_ANY, or some >>> + * other wonky value. >>> + */ >>> + g_assert(is_power_of_2(type)); >>> + >>> + while (type != (1 << shift)) { >>> + shift++; >>> + } >>> + return shift; >>> +} >>> + >>> +static uint32_t get_index(sPAPRDRConnector *drc) >>> +{ >>> + /* no set format for a drc index: it only needs to be globally >>> + * unique. this is how we encode the DRC type on bare-metal >>> + * however, so might as well do that here >>> + */ >>> + return (get_type_shift(drc->type) << DRC_INDEX_TYPE_SHIFT) | >>> + (drc->id & DRC_INDEX_ID_MASK); >>> +} >>> + >>> +static int set_isolation_state(sPAPRDRConnector *drc, >>> + sPAPRDRIsolationState state) >>> +{ >>> + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); >>> + >>> + DPRINTFN("drc: %x, set_isolation_state: %x", get_index(drc), state); >>> + >>> + drc->isolation_state = state; >>> + >>> + if (drc->isolation_state == SPAPR_DR_ISOLATION_STATE_ISOLATED) { >>> + /* if we're awaiting release, but still in an unconfigured state, >>> + * it's likely the guest is still in the process of configuring >>> + * the device and is transitioning the devices to an ISOLATED >>> + * state as a part of that process. so we only complete the >>> + * removal when this transition happens for a device in a >>> + * configured state, as suggested by the state diagram from >>> + * PAPR+ 2.7, 13.4 >>> + */ >>> + if (drc->awaiting_release) { >>> + if (drc->configured) { >>> + DPRINTFN("finalizing device removal"); >>> + drck->detach(drc, DEVICE(drc->dev), drc->detach_cb, >>> + drc->detach_cb_opaque, NULL); >>> + } else { >>> + DPRINTFN("deferring device removal on unconfigured >>> device\n"); >>> + } >>> + } >>> + drc->configured = false; >>> + } >>> + >>> + return 0; >>> +} >>> + >>> +static int set_indicator_state(sPAPRDRConnector *drc, >>> + sPAPRDRIndicatorState state) >>> +{ >>> + DPRINTFN("drc: %x, set_indicator_state: %x", get_index(drc), state); >>> + drc->indicator_state = state; >>> + return 0; >>> +} >>> + >>> +static int set_allocation_state(sPAPRDRConnector *drc, >>> + sPAPRDRAllocationState state) >>> +{ >>> + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); >>> + >>> + DPRINTFN("drc: %x, set_allocation_state: %x", get_index(drc), state); >>> + >>> + if (drc->type != SPAPR_DR_CONNECTOR_TYPE_PCI) { >>> + drc->allocation_state = state; >>> + if (drc->awaiting_release && >>> + drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_UNUSABLE) { >>> + DPRINTFN("finalizing device removal"); >>> + drck->detach(drc, DEVICE(drc->dev), drc->detach_cb, >>> + drc->detach_cb_opaque, NULL); >>> + } >>> + } >>> + return 0; >>> +} >>> + >>> +static uint32_t get_type(sPAPRDRConnector *drc) >>> +{ >>> + return drc->type; >>> +} >>> + >>> +static const char *get_name(sPAPRDRConnector *drc) >>> +{ >>> + return drc->name; >>> +} >>> + >>> +static const void *get_fdt(sPAPRDRConnector *drc, int *fdt_start_offset) >>> +{ >>> + if (fdt_start_offset) { >>> + *fdt_start_offset = drc->fdt_start_offset; >>> + } >>> + return drc->fdt; >>> +} >>> + >>> +static void set_configured(sPAPRDRConnector *drc) >>> +{ >>> + DPRINTFN("drc: %x, set_configured", get_index(drc)); >>> + >>> + if (drc->isolation_state != SPAPR_DR_ISOLATION_STATE_UNISOLATED) { >>> + /* guest should be not configuring an isolated device */ >>> + DPRINTFN("drc: %x, set_configured: skipping isolated device", >>> + get_index(drc)); >>> + return; >>> + } >>> + drc->configured = true; >>> +} >>> + >>> +/* >>> + * dr-entity-sense sensor value >>> + * returned via get-sensor-state RTAS calls >>> + * as expected by state diagram in PAPR+ 2.7, 13.4 >>> + * based on the current allocation/indicator/power states >>> + * for the DR connector. >>> + */ >>> +static sPAPRDREntitySense entity_sense(sPAPRDRConnector *drc) >>> +{ >>> + sPAPRDREntitySense state; >>> + >>> + if (drc->dev) { >>> + if (drc->type != SPAPR_DR_CONNECTOR_TYPE_PCI && >>> + drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_UNUSABLE) { >>> + /* for logical DR, we return a state of UNUSABLE >>> + * iff the allocation state UNUSABLE. >>> + * Otherwise, report the state as USABLE/PRESENT, >>> + * as we would for PCI. >>> + */ >>> + state = SPAPR_DR_ENTITY_SENSE_UNUSABLE; >>> + } else { >>> + /* this assumes all PCI devices are assigned to >>> + * a 'live insertion' power domain, where QEMU >>> + * manages power state automatically as opposed >>> + * to the guest. present, non-PCI resources are >>> + * unaffected by power state. >>> + */ >>> + state = SPAPR_DR_ENTITY_SENSE_PRESENT; >>> + } >>> + } else { >>> + if (drc->type == SPAPR_DR_CONNECTOR_TYPE_PCI) { >>> + /* PCI devices, and only PCI devices, use EMPTY >>> + * in cases where we'd otherwise use UNUSABLE >>> + */ >>> + state = SPAPR_DR_ENTITY_SENSE_EMPTY; >>> + } else { >>> + state = SPAPR_DR_ENTITY_SENSE_UNUSABLE; >>> + } >>> + } >>> + >>> + DPRINTFN("drc: %x, entity_sense: %x", get_index(drc), state); >>> + return state; >>> +} >>> + >>> +static void prop_get_index(Object *obj, Visitor *v, void *opaque, >>> + const char *name, Error **errp) >>> +{ >>> + sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(obj); >>> + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); >>> + uint32_t value = (uint32_t)drck->get_index(drc); >>> + visit_type_uint32(v, &value, name, errp); >>> +} >>> + >>> +static void prop_get_type(Object *obj, Visitor *v, void *opaque, >>> + const char *name, Error **errp) >>> +{ >>> + sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(obj); >>> + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); >>> + uint32_t value = (uint32_t)drck->get_type(drc); >>> + visit_type_uint32(v, &value, name, errp); >>> +} >>> + >>> +static char *prop_get_name(Object *obj, Error **errp) >>> +{ >>> + sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(obj); >>> + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); >>> + return g_strdup(drck->get_name(drc)); >>> +} >>> + >>> +static void prop_get_entity_sense(Object *obj, Visitor *v, void *opaque, >>> + const char *name, Error **errp) >>> +{ >>> + sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(obj); >>> + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); >>> + uint32_t value = (uint32_t)drck->entity_sense(drc); >>> + visit_type_uint32(v, &value, name, errp); >>> +} >>> + >>> +static void prop_get_fdt(Object *obj, Visitor *v, void *opaque, >>> + const char *name, Error **errp) >>> +{ >>> + sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(obj); >>> + int fdt_offset_next, fdt_offset, fdt_depth; >>> + void *fdt; >>> + >>> + if (!drc->fdt) { >>> + return; >>> + } >>> + >>> + fdt = drc->fdt; >>> + fdt_offset = drc->fdt_start_offset; >>> + fdt_depth = 0; >>> + >>> + do { >>> + const char *name = NULL; >>> + const struct fdt_property *prop = NULL; >>> + int prop_len = 0, name_len = 0; >>> + uint32_t tag; >>> + >>> + tag = fdt_next_tag(fdt, fdt_offset, &fdt_offset_next); >>> + switch (tag) { >>> + case FDT_BEGIN_NODE: >>> + fdt_depth++; >>> + name = fdt_get_name(fdt, fdt_offset, &name_len); >>> + visit_start_struct(v, NULL, NULL, name, 0, NULL); >>> + break; >>> + case FDT_END_NODE: >>> + /* shouldn't ever see an FDT_END_NODE before FDT_BEGIN_NODE */ >>> + g_assert(fdt_depth > 0); >>> + visit_end_struct(v, NULL); >>> + fdt_depth--; >>> + break; >>> + case FDT_PROP: { >>> + int i; >>> + prop = fdt_get_property_by_offset(fdt, fdt_offset, &prop_len); >> >> Did this fdt function only get introduced recently? My test system has >> 1.3.0 installed which doesn't contain it apparently. >> >> Please add a configure check to verify it actually exists if it's really >> necessary. > > Erm.. that's odd. That function went in with commit 73dca9ae, which > is included in dtc 1.3.0..
This is indeed very odd. So this is the binary I have on my system: http://download.opensuse.org/ports/ppc/distribution/12.2/repo/oss/suse/ppc64/libfdt1-1.3.0-7.1.6.ppc64.rpm which got build from almost vanilla 1.3.0 sources: https://build.opensuse.org/package/show/openSUSE:12.3/dtc However, the symbols in the file seem to say it's libfdt 1.2 and I don't see the by_offset function exported. It is however present in the source, so I'm slightly confused. $ readelf -a libfdt-1.3.0.so | grep proper 000000016848 002a00000015 R_PPC64_JMP_SLOT 0000000000016290 fdt_get_property_namel + 0 0000000168f0 003f00000015 R_PPC64_JMP_SLOT 00000000000162a8 fdt_get_property + 0 20: 0000000000016440 112 FUNC GLOBAL DEFAULT 22 fdt_nop_property@@LIBFDT_1.2 42: 0000000000016290 228 FUNC GLOBAL DEFAULT 22 fdt_get_property_namelen@@LIBFDT_1.2 51: 0000000000016548 340 FUNC GLOBAL DEFAULT 22 fdt_property@@LIBFDT_1.2 63: 00000000000162a8 88 FUNC GLOBAL DEFAULT 22 fdt_get_property@@LIBFDT_1.2 However, on a newer system with dtc 1.4 I get the function exported, but all symbols still have a 1.2 version tag: $ readelf -a /usr/lib64/libfdt.so| grep property 000000205018 001d00000007 R_X86_64_JUMP_SLO 0000000000001f36 fdt_first_property_off + 0 000000205048 002a00000007 R_X86_64_JUMP_SLO 0000000000001fbf fdt_get_property_namel + 0 000000205098 004100000007 R_X86_64_JUMP_SLO 0000000000002068 fdt_get_property + 0 0000002050a0 002b00000007 R_X86_64_JUMP_SLO 0000000000001f6a fdt_get_property_by_of + 0 0000002050f8 003200000007 R_X86_64_JUMP_SLO 0000000000001f50 fdt_next_property_offs + 0 18: 000000000000270a 45 FUNC GLOBAL DEFAULT 13 fdt_nop_property@@LIBFDT_1.2 29: 0000000000001f36 26 FUNC GLOBAL DEFAULT 13 fdt_first_property_offset@@LIBFDT_1.2 42: 0000000000001fbf 169 FUNC GLOBAL DEFAULT 13 fdt_get_property_namelen@@LIBFDT_1.2 43: 0000000000001f6a 85 FUNC GLOBAL DEFAULT 13 fdt_get_property_by_offse@@LIBFDT_1.2 50: 0000000000001f50 26 FUNC GLOBAL DEFAULT 13 fdt_next_property_offset@@LIBFDT_1.2 53: 00000000000029b9 303 FUNC GLOBAL DEFAULT 13 fdt_property@@LIBFDT_1.2 65: 0000000000002068 31 FUNC GLOBAL DEFAULT 13 fdt_get_property@@LIBFDT_1.2 Either way, I think the commit actually fixing what we see is http://git.qemu.org/?p=dtc.git;a=commitdiff;h=c6fb1d239191daa3323fb6caeff56d48c4777793;hp=317a5d92bc357aba2c993ee78b4c089b7539fcc6 which exports the symbol. So please either find a way around using the function (which I'd prefer) or make sure that configure requires libfdt 1.4 onwards. Alex