[PATCH 3/9] hw/sysbus: Introduce TYPE_DYNAMIC_SYS_BUS_DEVICE

2025-01-25 Thread Philippe Mathieu-Daudé
Some TYPE_SYS_BUS_DEVICEs can be optionally dynamically
plugged on the TYPE_PLATFORM_BUS_DEVICE.
Rather than sometimes noting that with comment around
the 'user_creatable = true' line in each DeviceRealize
handler, introduce an abstract TYPE_DYNAMIC_SYS_BUS_DEVICE
class.

Signed-off-by: Philippe Mathieu-Daudé 
---
 include/hw/sysbus.h |  2 ++
 hw/core/sysbus.c| 14 ++
 2 files changed, 16 insertions(+)

diff --git a/include/hw/sysbus.h b/include/hw/sysbus.h
index c9b1e0e90e3..81bbda10d37 100644
--- a/include/hw/sysbus.h
+++ b/include/hw/sysbus.h
@@ -19,6 +19,8 @@ DECLARE_INSTANCE_CHECKER(BusState, SYSTEM_BUS,
 OBJECT_DECLARE_TYPE(SysBusDevice, SysBusDeviceClass,
 SYS_BUS_DEVICE)
 
+#define TYPE_DYNAMIC_SYS_BUS_DEVICE "dynamic-sysbus-device"
+
 /**
  * SysBusDeviceClass:
  *
diff --git a/hw/core/sysbus.c b/hw/core/sysbus.c
index 306f98406c0..e8d03fd28d9 100644
--- a/hw/core/sysbus.c
+++ b/hw/core/sysbus.c
@@ -321,6 +321,14 @@ BusState *sysbus_get_default(void)
 return main_system_bus;
 }
 
+static void dynamic_sysbus_device_class_init(ObjectClass *klass, void *data)
+{
+DeviceClass *k = DEVICE_CLASS(klass);
+
+k->user_creatable = true;
+k->hotpluggable = false;
+}
+
 static const TypeInfo sysbus_types[] = {
 {
 .name   = TYPE_SYSTEM_BUS,
@@ -336,6 +344,12 @@ static const TypeInfo sysbus_types[] = {
 .class_size = sizeof(SysBusDeviceClass),
 .class_init = sysbus_device_class_init,
 },
+{
+.name   = TYPE_DYNAMIC_SYS_BUS_DEVICE,
+.parent = TYPE_SYS_BUS_DEVICE,
+.class_init = dynamic_sysbus_device_class_init,
+.abstract   = true,
+}
 };
 
 DEFINE_TYPES(sysbus_types)
-- 
2.47.1




[PATCH 6/9] hw/i386: Have X86_IOMMU devices inherit from DYNAMIC_SYS_BUS_DEVICE

2025-01-25 Thread Philippe Mathieu-Daudé
Do not explain why _X86_IOMMU devices are user_creatable,
have them inherit TYPE_DYNAMIC_SYS_BUS_DEVICE, to explicit
they can optionally be plugged on TYPE_PLATFORM_BUS_DEVICE.

Signed-off-by: Philippe Mathieu-Daudé 
---
 hw/i386/amd_iommu.c   | 2 --
 hw/i386/intel_iommu.c | 2 --
 hw/i386/x86-iommu.c   | 2 +-
 3 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 6b13ce894b1..e8e084c7cf8 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -1687,8 +1687,6 @@ static void amdvi_sysbus_class_init(ObjectClass *klass, 
void *data)
 dc->hotpluggable = false;
 dc_class->realize = amdvi_sysbus_realize;
 dc_class->int_remap = amdvi_int_remap;
-/* Supported by the pc-q35-* machine types */
-dc->user_creatable = true;
 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
 dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device";
 device_class_set_props(dc, amdvi_properties);
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index f366c223d0e..7fde0603bfe 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -4871,8 +4871,6 @@ static void vtd_class_init(ObjectClass *klass, void *data)
 dc->hotpluggable = false;
 x86_class->realize = vtd_realize;
 x86_class->int_remap = vtd_int_remap;
-/* Supported by the pc-q35-* machine types */
-dc->user_creatable = true;
 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
 dc->desc = "Intel IOMMU (VT-d) DMA Remapping device";
 }
diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c
index fed34b2fcfa..5cdd165af0d 100644
--- a/hw/i386/x86-iommu.c
+++ b/hw/i386/x86-iommu.c
@@ -146,7 +146,7 @@ bool x86_iommu_ir_supported(X86IOMMUState *s)
 
 static const TypeInfo x86_iommu_info = {
 .name  = TYPE_X86_IOMMU_DEVICE,
-.parent= TYPE_SYS_BUS_DEVICE,
+.parent= TYPE_DYNAMIC_SYS_BUS_DEVICE,
 .instance_size = sizeof(X86IOMMUState),
 .class_init= x86_iommu_class_init,
 .class_size= sizeof(X86IOMMUClass),
-- 
2.47.1




[PATCH 2/9] hw/sysbus: Declare QOM types using DEFINE_TYPES() macro

2025-01-25 Thread Philippe Mathieu-Daudé
When multiple QOM types are registered in the same file,
it is simpler to use the the DEFINE_TYPES() macro. In
particular because type array declared with such macro
are easier to review.

Signed-off-by: Philippe Mathieu-Daudé 
---
 hw/core/sysbus.c | 39 +--
 1 file changed, 17 insertions(+), 22 deletions(-)

diff --git a/hw/core/sysbus.c b/hw/core/sysbus.c
index f713bbfe04f..306f98406c0 100644
--- a/hw/core/sysbus.c
+++ b/hw/core/sysbus.c
@@ -80,13 +80,6 @@ static void system_bus_class_init(ObjectClass *klass, void 
*data)
 k->get_fw_dev_path = sysbus_get_fw_dev_path;
 }
 
-static const TypeInfo system_bus_info = {
-.name = TYPE_SYSTEM_BUS,
-.parent = TYPE_BUS,
-.instance_size = sizeof(BusState),
-.class_init = system_bus_class_init,
-};
-
 /* Check whether an IRQ source exists */
 bool sysbus_has_irq(SysBusDevice *dev, int n)
 {
@@ -306,15 +299,6 @@ static void sysbus_device_class_init(ObjectClass *klass, 
void *data)
 k->user_creatable = false;
 }
 
-static const TypeInfo sysbus_device_type_info = {
-.name = TYPE_SYS_BUS_DEVICE,
-.parent = TYPE_DEVICE,
-.instance_size = sizeof(SysBusDevice),
-.abstract = true,
-.class_size = sizeof(SysBusDeviceClass),
-.class_init = sysbus_device_class_init,
-};
-
 static BusState *main_system_bus;
 
 static void main_system_bus_create(void)
@@ -337,10 +321,21 @@ BusState *sysbus_get_default(void)
 return main_system_bus;
 }
 
-static void sysbus_register_types(void)
-{
-type_register_static(&system_bus_info);
-type_register_static(&sysbus_device_type_info);
-}
+static const TypeInfo sysbus_types[] = {
+{
+.name   = TYPE_SYSTEM_BUS,
+.parent = TYPE_BUS,
+.instance_size  = sizeof(BusState),
+.class_init = system_bus_class_init,
+},
+{
+.name   = TYPE_SYS_BUS_DEVICE,
+.parent = TYPE_DEVICE,
+.instance_size  = sizeof(SysBusDevice),
+.abstract   = true,
+.class_size = sizeof(SysBusDeviceClass),
+.class_init = sysbus_device_class_init,
+},
+};
 
-type_init(sysbus_register_types)
+DEFINE_TYPES(sysbus_types)
-- 
2.47.1




[PATCH 1/9] hw/sysbus: Use sizeof(BusState) in main_system_bus_create()

2025-01-25 Thread Philippe Mathieu-Daudé
Rather than using the obscure system_bus_info.instance_size,
directly use sizeof(BusState).

Signed-off-by: Philippe Mathieu-Daudé 
---
 hw/core/sysbus.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/core/sysbus.c b/hw/core/sysbus.c
index 9355849ff0a..f713bbfe04f 100644
--- a/hw/core/sysbus.c
+++ b/hw/core/sysbus.c
@@ -323,8 +323,8 @@ static void main_system_bus_create(void)
  * assign main_system_bus before qbus_init()
  * in order to make "if (bus != sysbus_get_default())" work
  */
-main_system_bus = g_malloc0(system_bus_info.instance_size);
-qbus_init(main_system_bus, system_bus_info.instance_size,
+main_system_bus = g_new0(BusState, 1);
+qbus_init(main_system_bus, sizeof(BusState),
   TYPE_SYSTEM_BUS, NULL, "main-system-bus");
 OBJECT(main_system_bus)->free = g_free;
 }
-- 
2.47.1




[PATCH 4/9] hw/vfio: Have VFIO_PLATFORM devices inherit from DYNAMIC_SYS_BUS_DEVICE

2025-01-25 Thread Philippe Mathieu-Daudé
Do not explain why VFIO_PLATFORM devices are user_creatable,
have them inherit TYPE_DYNAMIC_SYS_BUS_DEVICE, to explicit
they can optionally be plugged on TYPE_PLATFORM_BUS_DEVICE.

Signed-off-by: Philippe Mathieu-Daudé 
---
 hw/vfio/amd-xgbe.c  | 2 --
 hw/vfio/calxeda-xgmac.c | 2 --
 hw/vfio/platform.c  | 4 +---
 3 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/hw/vfio/amd-xgbe.c b/hw/vfio/amd-xgbe.c
index 96bd608b8dd..aaa96903db0 100644
--- a/hw/vfio/amd-xgbe.c
+++ b/hw/vfio/amd-xgbe.c
@@ -41,8 +41,6 @@ static void vfio_amd_xgbe_class_init(ObjectClass *klass, void 
*data)
 &vcxc->parent_realize);
 dc->desc = "VFIO AMD XGBE";
 dc->vmsd = &vfio_platform_amd_xgbe_vmstate;
-/* Supported by TYPE_VIRT_MACHINE */
-dc->user_creatable = true;
 }
 
 static const TypeInfo vfio_amd_xgbe_dev_info = {
diff --git a/hw/vfio/calxeda-xgmac.c b/hw/vfio/calxeda-xgmac.c
index 87c382e7361..b016d42b496 100644
--- a/hw/vfio/calxeda-xgmac.c
+++ b/hw/vfio/calxeda-xgmac.c
@@ -41,8 +41,6 @@ static void vfio_calxeda_xgmac_class_init(ObjectClass *klass, 
void *data)
 &vcxc->parent_realize);
 dc->desc = "VFIO Calxeda XGMAC";
 dc->vmsd = &vfio_platform_calxeda_xgmac_vmstate;
-/* Supported by TYPE_VIRT_MACHINE */
-dc->user_creatable = true;
 }
 
 static const TypeInfo vfio_calxeda_xgmac_dev_info = {
diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c
index 1070a2113a1..f491f4dc954 100644
--- a/hw/vfio/platform.c
+++ b/hw/vfio/platform.c
@@ -672,13 +672,11 @@ static void vfio_platform_class_init(ObjectClass *klass, 
void *data)
 dc->desc = "VFIO-based platform device assignment";
 sbc->connect_irq_notifier = vfio_start_irqfd_injection;
 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
-/* Supported by TYPE_VIRT_MACHINE */
-dc->user_creatable = true;
 }
 
 static const TypeInfo vfio_platform_dev_info = {
 .name = TYPE_VFIO_PLATFORM,
-.parent = TYPE_SYS_BUS_DEVICE,
+.parent = TYPE_DYNAMIC_SYS_BUS_DEVICE,
 .instance_size = sizeof(VFIOPlatformDevice),
 .instance_init = vfio_platform_instance_init,
 .class_init = vfio_platform_class_init,
-- 
2.47.1




[PATCH 0/9] hw/sysbus/platform-bus: Introduce TYPE_DYNAMIC_SYS_BUS_DEVICE

2025-01-25 Thread Philippe Mathieu-Daudé
Some SysBus devices can optionally be dynamically plugged onto
the sysbus-platform-bus (then virtual guests are aware of
mmio mapping and IRQs via device tree / ACPI rules).

This series makes these devices explicit by having them implement
the DYNAMIC_SYS_BUS_DEVICE class, which only sets 'user_creatable'
flag to %true but makes the codebase a bit clearer (IMHO, at least
now we can grep for DYNAMIC_SYS_BUS_DEVICE).

Philippe Mathieu-Daudé (9):
  hw/sysbus: Use sizeof(BusState) in main_system_bus_create()
  hw/sysbus: Declare QOM types using DEFINE_TYPES() macro
  hw/sysbus: Introduce TYPE_DYNAMIC_SYS_BUS_DEVICE
  hw/vfio: Have VFIO_PLATFORM devices inherit from
DYNAMIC_SYS_BUS_DEVICE
  hw/display: Have RAMFB device inherit from DYNAMIC_SYS_BUS_DEVICE
  hw/i386: Have X86_IOMMU devices inherit from DYNAMIC_SYS_BUS_DEVICE
  hw/net: Have eTSEC device inherit from DYNAMIC_SYS_BUS_DEVICE
  hw/tpm: Have TPM TIS sysbus device inherit from DYNAMIC_SYS_BUS_DEVICE
  hw/xen: Have legacy Xen backend inherit from DYNAMIC_SYS_BUS_DEVICE

 include/hw/sysbus.h   |  2 ++
 include/hw/xen/xen_pvdev.h|  3 +-
 hw/core/sysbus.c  | 53 ---
 hw/display/ramfb-standalone.c |  3 +-
 hw/i386/amd_iommu.c   |  2 --
 hw/i386/intel_iommu.c |  2 --
 hw/i386/x86-iommu.c   |  2 +-
 hw/net/fsl_etsec/etsec.c  |  4 +--
 hw/tpm/tpm_tis_sysbus.c   |  3 +-
 hw/vfio/amd-xgbe.c|  2 --
 hw/vfio/calxeda-xgmac.c   |  2 --
 hw/vfio/platform.c|  4 +--
 hw/xen/xen-legacy-backend.c   |  7 ++---
 13 files changed, 42 insertions(+), 47 deletions(-)

-- 
2.47.1




[PATCH 7/9] hw/net: Have eTSEC device inherit from DYNAMIC_SYS_BUS_DEVICE

2025-01-25 Thread Philippe Mathieu-Daudé
Because the network eTSEC device can be optionally plugged on the
TYPE_PLATFORM_BUS_DEVICE, have it inherit TYPE_DYNAMIC_SYS_BUS_DEVICE.

Signed-off-by: Philippe Mathieu-Daudé 
---
 hw/net/fsl_etsec/etsec.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/hw/net/fsl_etsec/etsec.c b/hw/net/fsl_etsec/etsec.c
index 781b9003954..3ce4fa2662d 100644
--- a/hw/net/fsl_etsec/etsec.c
+++ b/hw/net/fsl_etsec/etsec.c
@@ -425,14 +425,12 @@ static void etsec_class_init(ObjectClass *klass, void 
*data)
 dc->realize = etsec_realize;
 device_class_set_legacy_reset(dc, etsec_reset);
 device_class_set_props(dc, etsec_properties);
-/* Supported by ppce500 machine */
-dc->user_creatable = true;
 }
 
 static const TypeInfo etsec_types[] = {
 {
 .name  = TYPE_ETSEC_COMMON,
-.parent= TYPE_SYS_BUS_DEVICE,
+.parent= TYPE_DYNAMIC_SYS_BUS_DEVICE,
 .instance_size = sizeof(eTSEC),
 .class_init= etsec_class_init,
 .instance_init = etsec_instance_init,
-- 
2.47.1




[PATCH 5/9] hw/display: Have RAMFB device inherit from DYNAMIC_SYS_BUS_DEVICE

2025-01-25 Thread Philippe Mathieu-Daudé
Because the RAM FB device can be optionally plugged on the
TYPE_PLATFORM_BUS_DEVICE, have it inherit TYPE_DYNAMIC_SYS_BUS_DEVICE.

Signed-off-by: Philippe Mathieu-Daudé 
---
 hw/display/ramfb-standalone.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/hw/display/ramfb-standalone.c b/hw/display/ramfb-standalone.c
index 6c35028965d..1be106b57f2 100644
--- a/hw/display/ramfb-standalone.c
+++ b/hw/display/ramfb-standalone.c
@@ -72,13 +72,12 @@ static void ramfb_class_initfn(ObjectClass *klass, void 
*data)
 dc->vmsd = &ramfb_dev_vmstate;
 dc->realize = ramfb_realizefn;
 dc->desc = "ram framebuffer standalone device";
-dc->user_creatable = true;
 device_class_set_props(dc, ramfb_properties);
 }
 
 static const TypeInfo ramfb_info = {
 .name  = TYPE_RAMFB_DEVICE,
-.parent= TYPE_SYS_BUS_DEVICE,
+.parent= TYPE_DYNAMIC_SYS_BUS_DEVICE,
 .instance_size = sizeof(RAMFBStandaloneState),
 .class_init= ramfb_class_initfn,
 };
-- 
2.47.1




[PATCH 8/9] hw/tpm: Have TPM TIS sysbus device inherit from DYNAMIC_SYS_BUS_DEVICE

2025-01-25 Thread Philippe Mathieu-Daudé
Because the TPM TIS sysbus device can be optionally plugged on the
TYPE_PLATFORM_BUS_DEVICE, have it inherit TYPE_DYNAMIC_SYS_BUS_DEVICE.

Signed-off-by: Philippe Mathieu-Daudé 
---
 hw/tpm/tpm_tis_sysbus.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/hw/tpm/tpm_tis_sysbus.c b/hw/tpm/tpm_tis_sysbus.c
index ee0bfe9538e..4f187690a28 100644
--- a/hw/tpm/tpm_tis_sysbus.c
+++ b/hw/tpm/tpm_tis_sysbus.c
@@ -133,7 +133,6 @@ static void tpm_tis_sysbus_class_init(ObjectClass *klass, 
void *data)
 dc->vmsd  = &vmstate_tpm_tis_sysbus;
 tc->model = TPM_MODEL_TPM_TIS;
 dc->realize = tpm_tis_sysbus_realizefn;
-dc->user_creatable = true;
 device_class_set_legacy_reset(dc, tpm_tis_sysbus_reset);
 tc->request_completed = tpm_tis_sysbus_request_completed;
 tc->get_version = tpm_tis_sysbus_get_tpm_version;
@@ -142,7 +141,7 @@ static void tpm_tis_sysbus_class_init(ObjectClass *klass, 
void *data)
 
 static const TypeInfo tpm_tis_sysbus_info = {
 .name = TYPE_TPM_TIS_SYSBUS,
-.parent = TYPE_SYS_BUS_DEVICE,
+.parent = TYPE_DYNAMIC_SYS_BUS_DEVICE,
 .instance_size = sizeof(TPMStateSysBus),
 .instance_init = tpm_tis_sysbus_initfn,
 .class_init  = tpm_tis_sysbus_class_init,
-- 
2.47.1




[RFC PATCH 9/9] hw/xen: Have legacy Xen backend inherit from DYNAMIC_SYS_BUS_DEVICE

2025-01-25 Thread Philippe Mathieu-Daudé
Because the legacy Xen backend devices can optionally be plugged on the
TYPE_PLATFORM_BUS_DEVICE, have it inherit TYPE_DYNAMIC_SYS_BUS_DEVICE.
Remove the implicit TYPE_XENSYSDEV instance_size.

Untested, but I'm surprised the legacy devices work because they
had a broken instance size (QDev instead of Sysbus...), so accesses
of XenLegacyDevice fields were overwritting sysbus ones.

Signed-off-by: Philippe Mathieu-Daudé 
---
 include/hw/xen/xen_pvdev.h  | 3 ++-
 hw/xen/xen-legacy-backend.c | 7 ++-
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/include/hw/xen/xen_pvdev.h b/include/hw/xen/xen_pvdev.h
index 0c984440476..48950dc2b57 100644
--- a/include/hw/xen/xen_pvdev.h
+++ b/include/hw/xen/xen_pvdev.h
@@ -32,7 +32,8 @@ struct XenDevOps {
 };
 
 struct XenLegacyDevice {
-DeviceStateqdev;
+SysBusDevice parent_obj;
+
 const char *type;
 intdom;
 intdev;
diff --git a/hw/xen/xen-legacy-backend.c b/hw/xen/xen-legacy-backend.c
index 118c571b3a7..4d079e35d83 100644
--- a/hw/xen/xen-legacy-backend.c
+++ b/hw/xen/xen-legacy-backend.c
@@ -640,16 +640,14 @@ static void xendev_class_init(ObjectClass *klass, void 
*data)
 DeviceClass *dc = DEVICE_CLASS(klass);
 
 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
-/* xen-backend devices can be plugged/unplugged dynamically */
-dc->user_creatable = true;
 dc->bus_type = TYPE_XENSYSBUS;
 }
 
 static const TypeInfo xendev_type_info = {
 .name  = TYPE_XENBACKEND,
-.parent= TYPE_DEVICE,
+.parent= TYPE_DYNAMIC_SYS_BUS_DEVICE,
 .class_init= xendev_class_init,
-.instance_size = sizeof(struct XenLegacyDevice),
+.instance_size = sizeof(XenLegacyDevice),
 };
 
 static void xen_sysbus_class_init(ObjectClass *klass, void *data)
@@ -672,7 +670,6 @@ static const TypeInfo xensysbus_info = {
 static const TypeInfo xensysdev_info = {
 .name  = TYPE_XENSYSDEV,
 .parent= TYPE_SYS_BUS_DEVICE,
-.instance_size = sizeof(SysBusDevice),
 };
 
 static void xenbe_register_types(void)
-- 
2.47.1




Re: Serious AMD-Vi issue

2025-01-25 Thread Teddy Astie
Hello Elliott,

Le 24/01/2025 à 22:31, Elliott Mitchell a écrit :
> On Fri, Jan 24, 2025 at 03:31:30PM +0100, Roger Pau Monné wrote:
>> On Thu, Jan 25, 2024 at 12:24:53PM -0800, Elliott Mitchell wrote:
>>> Apparently this was first noticed with 4.14, but more recently I've been
>>> able to reproduce the issue:
>>>
>>> https://bugs.debian.org/988477
>>>
>>> The original observation features MD-RAID1 using a pair of Samsung
>>> SATA-attached flash devices.  The main line shows up in `xl dmesg`:
>>>
>>> (XEN) AMD-Vi: IO_PAGE_FAULT: :bb:dd.f d0 addr ff???000 flags 
>>> 0x8 I
>>
>> I think I've figured out the cause for those faults, and posted a fix
>> here:
>>
>> https://lore.kernel.org/xen-devel/20250124120112.56678-1-roger@citrix.com/
>>
>> Fix is patch 5/5, but you likely want to take them all to avoid
>> context conflicts.
>
> I haven't tested yet, but some analysis from looking at the series.

>
> This seems a plausible explanation for the interrupt IOMMU messages.  As
> such I think there is a good chance the reported messages will disappear.
>
> Nothing in here looks plausible for solving the real problem, that of
> RAID1 mirrors diverging (almost certainly getting zeroes during DMA, but
> there is a chance stale data is being read).
>

The message is showing shows that something is going wrong, presumably a
lost interrupt. This can lead to data loss, as it breaks the
expectations of the Dom0's drivers.

If you still observe data loss after these patches, and these messages
have disappeared, it may be due to something else, but these patches are
not looking to hide the fault.

According to AMD-Vi specification, there appears to be a specific case
where interrupt remapping faults are reported as IO_PAGE_FAULT (which
appears to be what's happening).

IG bit (133) of DTE appears to provide an explanation (SupIOPF can set
this behavior globally).

 > IG: ignore unmapped interrupts. 1=Suppress event logging for interrupt
 > messages causing IO_PAGE_FAULT events. 0=creation of event log entries
 > for IO_PAGE_FAULT events is controlled by SupIOPF in the interrupt
 > remapping table entry (see Section 2.2.5 [Interrupt Remapping
 > Tables]).

Note that Xen (and this patch doesn't change this behavior) does set
this bit to 0, which means that faults are reported as IO_PAGE_FAULT events.

> Worse, since it removes the observed messages, the next person will
> almost certainly have severe data loss by the time they realize there is
> a problem.  Notably those messages lead me to Debian #988477, so I was
> able to take action before things got too bad.
>
>
>
> I'm not absolutely certain this is a pure Xen bug.  There is a
> possibility the RAID1 driver is reusing DMA buffers in a fashion which
> violates the DMA interface.  Yet there is also a good chance Xen isn't
> implementing its layer properly either.
>
>
>
> There is one pattern emerging at this point.  Samsung hardware is badly
> effected, other vendors are either uneffected or mildly effected.
> Notably the estimated age of the devices meant to be handed off to
> someone able to diagnose the issue is >10 years.  The uneffected
> Crucial/Micron SATA device *should* drastically outperform these, yet
> instead it is uneffected.  The Crucial/Micron NVMe is very mildly
> effected, yet should be more than an order of magnitude faster.
>
> The simplest explanation is the flash controller on the Samsung devices
> is lower latency than the one used by Micron.
>
>
> Both present reproductions feature AMD processors and ASUS motherboards.
> I'm doubtful of this being an ASUS issue.  This seems more likely a case
> of people who use RAID with flash tending to go with a motherboard vendor
> who reliably support ECC on all their motherboards.
>
> I don't know whether this is confined to AMD processors, or not.  The
> small number of reproductions suggests few people are doing RAID with
> flash storage.  In which case no one may have tried RAID1 with flash on
> Intel processors.  On Intel hardware the referenced message would be
> absent and people might think their problem was distinct from Debian
> #988477.
>
> In fact what seems a likely reproduction on Intel hardware is the Intel
> sound card issue.  I notice that issue occurs when sound *starts*
> playing.  When a sound device starts, its buffers would be empty and the
> first DMA request would be turned around with minimal latency.  In such
> case this matches the Samsung SATA devices handling DMA with low
> latency.
>
>
>> Can you give it a try and see if it fixes the fault messages, plus
>> your issues with the disk devices?
>
> Ick.  I was hoping to avoid reinstalling the known problematic devices
> and simply send them to someone better setup for analyzing x86 problems.
>
> Looking at the series, it seems likely to remove the fault messages and
> turn this into silent data loss.  I doubt any AMD processors have an
> IOMMU, yet omit cmpxchg16b (older system lacked full IOMMU, yet did have
> cmpxc

Re: [PATCH v6 04/15] x86/pvh: Use fixed_percpu_data for early boot GSBASE

2025-01-25 Thread Brian Gerst
On Sat, Jan 25, 2025 at 10:07 AM Borislav Petkov  wrote:
>
>
> On Thu, Jan 23, 2025 at 02:07:36PM -0500, Brian Gerst wrote:
> > Instead of having a private area for the stack canary, use
> > fixed_percpu_data for GSBASE like the native kernel.
> >
> > Signed-off-by: Brian Gerst 
> > Reviewed-by: Ard Biesheuvel 
> > ---
> >  arch/x86/platform/pvh/head.S | 15 +--
> >  1 file changed, 9 insertions(+), 6 deletions(-)
>
> Use ./scripts/get_maintainer.pl pls. I've added Juergen now.
>
> > diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S
> > index 4733a5f467b8..fa0072e0ca43 100644
> > --- a/arch/x86/platform/pvh/head.S
> > +++ b/arch/x86/platform/pvh/head.S
> > @@ -173,10 +173,15 @@ SYM_CODE_START(pvh_start_xen)
> >  1:
> >   UNWIND_HINT_END_OF_STACK
> >
> > - /* Set base address in stack canary descriptor. */
> > - mov $MSR_GS_BASE,%ecx
> > - leal canary(%rip), %eax
> > - xor %edx, %edx
> > + /*
> > +  * Set up GSBASE.
> > +  * Note that, on SMP, the boot cpu uses init data section until
> > +  * the per cpu areas are set up.
>
> s/cpu/CPU/g
>
> check your whole set pls.

To be fair, this was a copy of an existing comment.  Is there a style
guide where all these grammar rules are documented, so I don't have to
keep resending these patches for trivial typos?


Brian Gerst



Re: [PATCH v6 04/15] x86/pvh: Use fixed_percpu_data for early boot GSBASE

2025-01-25 Thread Borislav Petkov


On Thu, Jan 23, 2025 at 02:07:36PM -0500, Brian Gerst wrote:
> Instead of having a private area for the stack canary, use
> fixed_percpu_data for GSBASE like the native kernel.
> 
> Signed-off-by: Brian Gerst 
> Reviewed-by: Ard Biesheuvel 
> ---
>  arch/x86/platform/pvh/head.S | 15 +--
>  1 file changed, 9 insertions(+), 6 deletions(-)

Use ./scripts/get_maintainer.pl pls. I've added Juergen now.

> diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S
> index 4733a5f467b8..fa0072e0ca43 100644
> --- a/arch/x86/platform/pvh/head.S
> +++ b/arch/x86/platform/pvh/head.S
> @@ -173,10 +173,15 @@ SYM_CODE_START(pvh_start_xen)
>  1:
>   UNWIND_HINT_END_OF_STACK
>  
> - /* Set base address in stack canary descriptor. */
> - mov $MSR_GS_BASE,%ecx
> - leal canary(%rip), %eax
> - xor %edx, %edx
> + /*
> +  * Set up GSBASE.
> +  * Note that, on SMP, the boot cpu uses init data section until
> +  * the per cpu areas are set up.

s/cpu/CPU/g

check your whole set pls.

> +  */
> + movl $MSR_GS_BASE,%ecx
> + leaq INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx
> + movq %edx, %eax
> + shrq $32, %rdx
>   wrmsr





Re: [PATCH v6 04/15] x86/pvh: Use fixed_percpu_data for early boot GSBASE

2025-01-25 Thread Borislav Petkov
On January 25, 2025 5:51:29 PM GMT+01:00, Brian Gerst  wrote:
>To be fair, this was a copy of an existing comment.  Is there a style
>guide where all these grammar rules are documented, so I don't have to
>keep resending these patches for trivial typos?

You don't have to keep resending them for trivial typos - you simply wait 1-2 
weeks to gather review feedback, you incorporate it and send a new version of 
the set. Like it is usually done on lkml. I think you know how the process 
works...


-- 
Sent from a small device: formatting sucks and brevity is inevitable.



RE: [PATCH v1 08/11] x86/cpufreq: add "cpufreq=amd-pstate,active" para

2025-01-25 Thread Penny, Zheng
[AMD Official Use Only - AMD Internal Distribution Only]

Hi,

> -Original Message-
> From: Jan Beulich 
> Sent: Thursday, January 9, 2025 7:24 PM
> To: Penny, Zheng 
> Cc: Stabellini, Stefano ; Huang, Ray
> ; Ragiadakou, Xenia ;
> Andryuk, Jason ; Andrew Cooper
> ; Julien Grall ; Stefano Stabellini
> ; Roger Pau Monné ; xen-
> de...@lists.xenproject.org
> Subject: Re: [PATCH v1 08/11] x86/cpufreq: add "cpufreq=amd-pstate,active" 
> para
>
> On 03.12.2024 09:11, Penny Zheng wrote:
> > From: Penny Zheng 
> >
> > The amd-pstate driver may support multiple working modes, passive and 
> > active.
> >
> > Introduce a new variable to keep track of which mode is currently enabled.
> > This variable will also help to choose which cpufreq driver to be 
> > registered.
> >
> > Signed-off-by: Penny Zheng 
> > ---
> >  docs/misc/xen-command-line.pandoc  |  9 -
> >  xen/arch/x86/acpi/cpufreq/amd-pstate.c | 12 +++-
> >  2 files changed, 19 insertions(+), 2 deletions(-)
> >
> > diff --git a/docs/misc/xen-command-line.pandoc
> > b/docs/misc/xen-command-line.pandoc
> > index 30f855fa18..a9a3e0ef79 100644
> > --- a/docs/misc/xen-command-line.pandoc
> > +++ b/docs/misc/xen-command-line.pandoc
> > @@ -499,7 +499,8 @@ If set, force use of the performance counters for
> > oprofile, rather than detectin  available support.
> >
> >  ### cpufreq
> > -> `= none | {{  | xen } {
> > -> [:[powersave|performance|ondemand|userspace][,[]][,[ > -> eq>]]] } [,verbose]} | dom0-kernel | hwp[:[][,verbose]] |
> > -> amd-pstate[:[verbose]]`
> > +> `= none | {{  | xen } {
> > +> [:[powersave|performance|ondemand|userspace][,[]][,[ > +> eq>]]] }
> > +[,verbose]} | dom0-kernel | hwp[:[][,verbose]] |
> > +amd-pstate[:[active][,verbose]]`
> >
> >  > Default: `xen`
> >
> > @@ -522,6 +523,12 @@ choice of `dom0-kernel` is deprecated and not
> supported by all Dom0 kernels.
> >on supported AMD hardware to provide a finer grained frequency control
> mechanism.
> >The default is disabled. If `amd-pstate` is selected, but hardware 
> > support
> >is not available, Xen will fallback to cpufreq=xen.
> > +* `active` is a boolean to enable amd-pstate driver in active(autonomous) 
> > mode.
> > +  In this mode, users could provide a hint with energy performance
> > +preference
> > +  register to the hardware if they want to bias toward
> > +performance(0x0) or
> > +  energy efficiency(0xff), then CPPC power algorithm will calculate
> > +the runtime
> > +  workload and adjust the realtime cores frequency according to the
> > +power supply
> > +  and themal, core voltage and some other hardware conditions.
>
> Nit: thermal
>
> > --- a/xen/arch/x86/acpi/cpufreq/amd-pstate.c
> > +++ b/xen/arch/x86/acpi/cpufreq/amd-pstate.c
> > @@ -27,6 +27,8 @@
> >  #define amd_pstate_warn(fmt, args...) \
> >  printk(XENLOG_WARNING "AMD_PSTATE: CPU%u warning: " fmt, cpu, ##
> > args)
> >
> > +static bool __ro_after_init opt_cpufreq_active = false;
>
> Pointless initializer.
>
> > @@ -398,5 +407,6 @@ int __init amd_pstate_register_driver(void)
> >  if ( !cpu_has_cppc )
> >  return -ENODEV;
> >
> > -return cpufreq_register_driver(&amd_pstate_cpufreq_driver);
> > +if ( !opt_cpufreq_active )
> > +return cpufreq_register_driver(&amd_pstate_cpufreq_driver);
> >  }
>
> I'm afraid the description is of no help in determining why this is a correct 
> change to
> make (here). How would the user provided hint (see cmdline option 
> description) be
> communicated to hardware when the driver isn't even registered?
>

Maybe I shall combine this commit into the next one about implementing epp 
driver for active mode,
and the changes will be like:
  -return cpufreq_register_driver(&amd_pstate_cpufreq_driver);
 +if ( opt_cpufreq_active )
 +return cpufreq_register_driver(&&amd_cppc_epp_driver);
 +else
 +return cpufreq_register_driver(&amd_cppc_cpufreq_driver);
Now, the description makes more sense.

> Finally I don't think the change above would build, as it leaves a return 
> from the
> function without return value.
>
> Jan

Many thanks
Penny


RE: [PATCH v1 09/11] xen/x86: implement EPP support for the AMD processors

2025-01-25 Thread Penny, Zheng
[AMD Official Use Only - AMD Internal Distribution Only]

Hi,

> -Original Message-
> From: Jan Beulich 
> Sent: Thursday, January 9, 2025 7:38 PM
> To: Penny, Zheng 
> Cc: Stabellini, Stefano ; Huang, Ray
> ; Ragiadakou, Xenia ;
> Andryuk, Jason ; Andrew Cooper
> ; Roger Pau Monné ; Julien
> Grall ; Stefano Stabellini ; xen-
> de...@lists.xenproject.org
> Subject: Re: [PATCH v1 09/11] xen/x86: implement EPP support for the AMD
> processors
>
> On 03.12.2024 09:11, Penny Zheng wrote:
> > +
> > +/* Initial min/max values for CPPC Performance Controls Register */
> > +max_perf = data->hw.highest_perf;
> > +min_perf = data->hw.lowest_perf;
> > +
> > +if ( data->policy == CPUFREQ_POLICY_PERFORMANCE )
> > +min_perf = max_perf;
>
> Why can't this be done ...
>
> > +/* CPPC EPP feature require to set zero to the desire perf bit */
> > +des_perf = 0;
> > +
> > +if ( data->policy == CPUFREQ_POLICY_PERFORMANCE )
> > +/* Force the epp value to be zero for performance policy */
> > +epp = CPPC_ENERGY_PERF_MAX_PERFORMANCE;
>
> ... here as well? And why is there nothing respective for ...

Ack

>
> > +else if ( data->policy == CPUFREQ_POLICY_POWERSAVE )
> > +/* Force the epp value to be 0xff for powersave policy */
> > +epp = CPPC_ENERGY_PERF_MAX_POWERSAVE;
>
> ... this case (e.g. setting max_perf from min_perf)?

If we put max_perf = min_perf = lowest_perf/lowest_nonlinear_perf, then
we are putting core in idle state. That's how we offline the cpu core in Linux
amd pstate epp driver, see 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/cpufreq/amd-pstate.c?h=v6.13#n1643

>
> Jan

Many thanks,
Penny