date:20250221

[PATCH] hw/rtc: Add RTC PCF8563 module

2025-02-21 Thread Ilya Chichkov

Add PCF8563 a real-time clock with calendar and I2C interface.
This commit adds support for interfacing with it and implements
functionality of setting timer, alarm, reading and writing time.

Datasheet: https://www.micros.com.pl/mediaserver/UZPCF8563ts5_0001.pdf

Signed-off-by: Ilya Chichkov 
---
 hw/rtc/Kconfig   |   5 +
 hw/rtc/meson.build   |   1 +
 hw/rtc/pcf8563_rtc.c | 638 +++
 hw/rtc/trace-events  |  11 +
 4 files changed, 655 insertions(+)
 create mode 100644 hw/rtc/pcf8563_rtc.c

diff --git a/hw/rtc/Kconfig b/hw/rtc/Kconfig
index d0d8dda084..fd7bd393bd 100644
--- a/hw/rtc/Kconfig
+++ b/hw/rtc/Kconfig
@@ -30,3 +30,8 @@ config GOLDFISH_RTC
 
 config LS7A_RTC
 bool
+
+config PCF8563_RTC
+bool
+depends on I2C
+default y if I2C_DEVICES
diff --git a/hw/rtc/meson.build b/hw/rtc/meson.build
index 3ea2affe0b..959541a96d 100644
--- a/hw/rtc/meson.build
+++ b/hw/rtc/meson.build
@@ -14,3 +14,4 @@ system_ss.add(when: 'CONFIG_GOLDFISH_RTC', if_true: 
files('goldfish_rtc.c'))
 system_ss.add(when: 'CONFIG_LS7A_RTC', if_true: files('ls7a_rtc.c'))
 system_ss.add(when: 'CONFIG_ALLWINNER_H3', if_true: files('allwinner-rtc.c'))
 system_ss.add(when: 'CONFIG_MC146818RTC', if_true: files('mc146818rtc.c'))
+system_ss.add(when: 'CONFIG_PCF8563_RTC', if_true: files('pcf8563_rtc.c'))
diff --git a/hw/rtc/pcf8563_rtc.c b/hw/rtc/pcf8563_rtc.c
new file mode 100644
index 00..63d5f95c42
--- /dev/null
+++ b/hw/rtc/pcf8563_rtc.c
@@ -0,0 +1,638 @@
+/*
+ * Real-time clock/caread_indexdar PCF8563 with I2C interface.
+ *
+ * Copyright (c) 2024 Ilya Chichkov 
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see .
+ */
+
+#include "qemu/osdep.h"
+#include "hw/sysbus.h"
+#include "hw/register.h"
+#include "hw/registerfields.h"
+#include "hw/irq.h"
+#include "qemu/bitops.h"
+#include "hw/qdev-properties.h"
+#include "qemu/timer.h"
+#include "qapi/error.h"
+#include "hw/i2c/i2c.h"
+#include "qemu/bcd.h"
+#include "qemu/module.h"
+#include "qom/object.h"
+#include "sysemu/rtc.h"
+#include "migration/vmstate.h"
+#include "qapi/visitor.h"
+#include "qemu/log.h"
+
+#include "trace.h"
+
+#define TYPE_PCF8563 "pcf8563"
+
+#define PCF8563(obj) \
+OBJECT_CHECK(Pcf8563State, (obj), TYPE_PCF8563)
+
+#define  PCF8563_CS10x00
+#define  PCF8563_CS20x01
+#define  PCF8563_VLS0x02
+#define  PCF8563_MINUTES0x03
+#define  PCF8563_HOURS  0x04
+#define  PCF8563_DAYS   0x05
+#define  PCF8563_WEEKDAYS   0x06
+#define  PCF8563_CENTURY_MONTHS 0x07
+#define  PCF8563_YEARS  0x08
+#define  PCF8563_MINUTE_A   0x09
+#define  PCF8563_HOUR_A 0x0A
+#define  PCF8563_DAY_A  0x0B
+#define  PCF8563_WEEKDAY_A  0x0C
+#define  PCF8563_CLKOUT_CTL 0x0D
+#define  PCF8563_TIMER_CTL  0x0E
+#define  PCF8563_TIMER  0x0F
+
+REG8(PCF8563_CS1, 0x00)
+FIELD(PCF8563_CS1, RSVD0,  0,  3)
+FIELD(PCF8563_CS1, TESTC,  3,  1)
+FIELD(PCF8563_CS1, RSVD1,  4,  1)
+FIELD(PCF8563_CS1, STOP,   5,  1)
+FIELD(PCF8563_CS1, RSVD2,  6,  1)
+FIELD(PCF8563_CS1, TEST1,  7,  1)
+
+REG8(PCF8563_CS2, 0x01)
+FIELD(PCF8563_CS2, TIE,   0,  1)
+FIELD(PCF8563_CS2, AIE,   1,  1)
+FIELD(PCF8563_CS2, TF,2,  1)
+FIELD(PCF8563_CS2, AF,3,  1)
+FIELD(PCF8563_CS2, TI_TP, 4,  1)
+FIELD(PCF8563_CS2, RSVD,  5,  3)
+
+REG8(PCF8563_VLS, 0x02)
+FIELD(PCF8563_VLS, SECONDS,  0,  7)
+FIELD(PCF8563_VLS, VL,   7,  1)
+
+REG8(PCF8563_MINUTES, 0x03)
+FIELD(PCF8563_MINUTES, MINUTES, 0,  7)
+FIELD(PCF8563_MINUTES, RSVD,7,  1)
+
+REG8(PCF8563_HOURS, 0x04)
+FIELD(PCF8563_HOURS, HOURS, 0,  6)
+FIELD(PCF8563_HOURS, RSVD,  6,  2)
+
+REG8(PCF8563_DAYS, 0x05)
+FIELD(PCF8563_DAYS, DAYS, 0,  6)
+FIELD(PCF8563_DAYS, RSVD, 6,  2)
+
+REG8(PCF8563_WEEKDAYS, 0x06)
+FIELD(PCF8563_WEEKDAYS, WEEKDAYS, 0,  3)
+FIELD(PCF8563_WEEKDAYS, RSVD, 3,  5)
+
+REG8(PCF8563_CENTURY_MONTHS, 0x07)
+FIELD(PCF8563_CENTURY_MONTHS, MONTHS,  0,  5)
+FIELD(PCF8563_CENTURY_MONTHS, RSVD,5,  2)
+FIELD(PCF8563_CENTURY_MONTHS, CENTURY, 7,  1)
+
+REG8(PCF8563_YEARS, 0x08)
+FIELD(PCF8563_YEARS, YEARS, 0,  8)
+
+REG8(PCF8563_MINUTE_A, 0x09)
+FIELD(PCF8563_MINUTE_A, MINUTE_A, 0,  7)
+FIELD(PCF8563_MINUTE_A, AE_M, 7,  1)
+
+REG8(PCF8563_HOUR_A, 0x0A)
+FIELD(PCF8563_HOUR_A, HOUR_A, 0,  7)
+FIELD(PCF8563_HOUR_A, AE

[PATCH v3 11/19] memory: Store user data pointer in the IOMMU notifiers

2025-02-21 Thread CLEMENT MATHIEU--DRIF

From: Clement Mathieu--Drif 

This will help developers of svm devices to track a state

Signed-off-by: Clement Mathieu--Drif 
---
 include/exec/memory.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/exec/memory.h b/include/exec/memory.h
index eee625a9c6..4d240cad1c 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -204,6 +204,7 @@ struct IOMMUNotifier {
 hwaddr start;
 hwaddr end;
 int iommu_idx;
+void *opaque;
 QLIST_ENTRY(IOMMUNotifier) node;
 };
 typedef struct IOMMUNotifier IOMMUNotifier;
-- 
2.48.1

[PATCH v3 16/19] pci: Add a pci-level API for ATS

2025-02-21 Thread CLEMENT MATHIEU--DRIF

From: Clement Mathieu--Drif 

Devices implementing ATS can send translation requests using
pci_ats_request_translation_pasid.

The invalidation events are sent back to the device using the iommu
notifier managed with pci_register_iommu_tlb_event_notifier and
pci_unregister_iommu_tlb_event_notifier

Signed-off-by: Clement Mathieu--Drif 
---
 hw/pci/pci.c | 46 +++
 include/hw/pci/pci.h | 52 
 2 files changed, 98 insertions(+)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index be29c0375f..0ccd0656b7 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2896,6 +2896,52 @@ void pci_device_unset_iommu_device(PCIDevice *dev)
 }
 }
 
+ssize_t pci_ats_request_translation_pasid(PCIDevice *dev, uint32_t pasid,
+  bool priv_req, bool exec_req,
+  hwaddr addr, size_t length,
+  bool no_write, IOMMUTLBEntry *result,
+  size_t result_length,
+  uint32_t *err_count)
+{
+IOMMUMemoryRegion *iommu_mr = pci_device_iommu_memory_region_pasid(dev,
+pasid);
+
+assert(result_length);
+
+if (!iommu_mr || !pcie_ats_enabled(dev)) {
+return -EPERM;
+}
+return memory_region_iommu_ats_request_translation(iommu_mr, priv_req,
+   exec_req, addr, length,
+   no_write, result,
+   result_length,
+   err_count);
+}
+
+int pci_register_iommu_tlb_event_notifier(PCIDevice *dev, uint32_t pasid,
+  IOMMUNotifier *n)
+{
+IOMMUMemoryRegion *iommu_mr = pci_device_iommu_memory_region_pasid(dev,
+pasid);
+if (!iommu_mr) {
+return -EPERM;
+}
+return memory_region_register_iommu_notifier(MEMORY_REGION(iommu_mr), n,
+ &error_fatal);
+}
+
+int pci_unregister_iommu_tlb_event_notifier(PCIDevice *dev, uint32_t pasid,
+ IOMMUNotifier *n)
+{
+IOMMUMemoryRegion *iommu_mr = pci_device_iommu_memory_region_pasid(dev,
+pasid);
+if (!iommu_mr) {
+return -EPERM;
+}
+memory_region_unregister_iommu_notifier(MEMORY_REGION(iommu_mr), n);
+return 0;
+}
+
 void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque)
 {
 /*
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index a11366e08d..592e72aee9 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -461,6 +461,58 @@ bool pci_iommu_init_iotlb_notifier(PCIDevice *dev, 
uint32_t pasid,
IOMMUNotifier *n, IOMMUNotify fn,
void *opaque);
 
+/**
+ * pci_ats_request_translation_pasid: perform an ATS request
+ *
+ * Return the number of translations stored in @result in case of success,
+ * a negative error code otherwise.
+ * -ENOMEM is returned when the result buffer is not large enough to store
+ * all the translations
+ *
+ * @dev: the ATS-capable PCI device
+ * @pasid: the pasid of the address space in which the translation will be made
+ * @priv_req: privileged mode bit (PASID TLP)
+ * @exec_req: execute request bit (PASID TLP)
+ * @addr: start address of the memory range to be translated
+ * @length: length of the memory range in bytes
+ * @no_write: request a read-only access translation (if supported by the 
IOMMU)
+ * @result: buffer in which the TLB entries will be stored
+ * @result_length: result buffer length
+ * @err_count: number of untranslated subregions
+ */
+ssize_t pci_ats_request_translation_pasid(PCIDevice *dev, uint32_t pasid,
+  bool priv_req, bool exec_req,
+  hwaddr addr, size_t length,
+  bool no_write, IOMMUTLBEntry *result,
+  size_t result_length,
+  uint32_t *err_count);
+
+/**
+ * pci_register_iommu_tlb_event_notifier: register a notifier for changes to
+ * IOMMU translation entries in a specific address space.
+ *
+ * Returns 0 on success, or a negative errno otherwise.
+ *
+ * @dev: the device that wants to get notified
+ * @pasid: the pasid of the address space to track
+ * @n: the notifier to register
+ */
+int pci_register_iommu_tlb_event_notifier(PCIDevice *dev, uint32_t pasid,
+  IOMMUNotifier *n);
+
+/**
+ * pci_unregister_iommu_tlb_event_notifier:

[PATCH v3 07/19] pcie: Helper function to check if ATS is enabled

2025-02-21 Thread CLEMENT MATHIEU--DRIF

From: Clement Mathieu--Drif 

ats_enabled checks whether the capability is
present or not. If so, we read the configuration space to get
the status of the feature (enabled or not).

Signed-off-by: Clement Mathieu--Drif 
---
 hw/pci/pcie.c | 9 +
 include/hw/pci/pcie.h | 1 +
 2 files changed, 10 insertions(+)

diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 8186d64234..3b8fd6f33c 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -1247,3 +1247,12 @@ bool pcie_pasid_enabled(const PCIDevice *dev)
 return (pci_get_word(dev->config + dev->exp.pasid_cap + PCI_PASID_CTRL) &
 PCI_PASID_CTRL_ENABLE) != 0;
 }
+
+bool pcie_ats_enabled(const PCIDevice *dev)
+{
+if (!pci_is_express(dev) || !dev->exp.ats_cap) {
+return false;
+}
+return (pci_get_word(dev->config + dev->exp.ats_cap + PCI_ATS_CTRL) &
+PCI_ATS_CTRL_ENABLE) != 0;
+}
diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
index 63604ccc6e..7e7b8baa6e 100644
--- a/include/hw/pci/pcie.h
+++ b/include/hw/pci/pcie.h
@@ -158,4 +158,5 @@ void pcie_pasid_init(PCIDevice *dev, uint16_t offset, 
uint8_t pasid_width,
  bool exec_perm, bool priv_mod);
 
 bool pcie_pasid_enabled(const PCIDevice *dev);
+bool pcie_ats_enabled(const PCIDevice *dev);
 #endif /* QEMU_PCIE_H */
-- 
2.48.1

Re: [PATCH v2 0/2] s390x: support virtio-mem-pci

2025-02-21 Thread David Hildenbrand


On 21.02.25 00:24, Michael S. Tsirkin wrote:

On Tue, Jan 28, 2025 at 07:57:03PM +0100, David Hildenbrand wrote:

This is based-on [1], which adds MSI-X support to virtio-balloon-pci,
but can be applied independently.

Turns out it is fairly easy to get virtio-mem-pci running on s390x. We
only have to add MSI-X support to virtio-mem-pci, and wire-up the
(un)plugging in the machine.

Tried some simple stuff (hotplug/hotunplug/resize/reboot), and all seems
to be working as expected.

The kernel in the VM needs both, CONFIG_VIRTIO_PCI and CONFIG_VIRTIO_MEM
for it to work.

[1] https://lkml.kernel.org/r/20250115161425.246348-1-ar...@linux.ibm.com



Fails CI:

https://gitlab.com/mstredhat/qemu/-/jobs/9202574981


Hm, I thought the CI was happy when Thomas sent it upstream.

CRITICAL: log: qemu-system-i386: -device virtio-mem-pci: can't apply 
global virtio-mem-pci.vectors=0: Property 'virtio-mem-pci.vectors' not found


And the same for the virtio-balloon devices with a similar change.

Trying to run one offending cmdline on current upstream:

./qemu-system-i386 -display none -vga none -S -machine 
pc-q35-8.0,accel=tcg -device virtio-mem-pci
qemu-system-i386: -device virtio-mem-pci: the configuration is not 
prepared for memory devices (e.g., for memory hotplug), consider 
specifying the maxmem option



So I am not sure what is happening here? Is some commit in the pipeline 
breaking this?


--
Cheers,

David / dhildenb

Re: [PATCH 2/3] target/riscv/kvm: use env->sie to read/write 'sie' CSR

2025-02-21 Thread Andrew Jones

On Thu, Feb 20, 2025 at 01:13:12PM -0300, Daniel Henrique Barboza wrote:
> Using env->sie is clearer than using env->mie.

Maybe? Just as sstatus is a subset of mstatus, sip and sie can be
subsets of mip and mie. However, the AIA can change sip/sie so they
no longer alias mip/mie, which is why we have 'mvip' an 'sie' members
in CPURISCVState. In the end, for KVM, it doesn't really matter since
this is just s/r storage. I'd probably just drop this patch and keep
using mie. Otherwise, what about mip?

Thanks,
drew

> 
> Signed-off-by: Daniel Henrique Barboza 
> ---
>  target/riscv/kvm/kvm-cpu.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c
> index 484b6afe7c..fea03f3657 100644
> --- a/target/riscv/kvm/kvm-cpu.c
> +++ b/target/riscv/kvm/kvm-cpu.c
> @@ -610,7 +610,7 @@ static int kvm_riscv_get_regs_csr(CPUState *cs)
>  CPURISCVState *env = &RISCV_CPU(cs)->env;
>  
>  KVM_RISCV_GET_CSR(cs, env, sstatus, env->mstatus);
> -KVM_RISCV_GET_CSR(cs, env, sie, env->mie);
> +KVM_RISCV_GET_CSR(cs, env, sie, env->sie);
>  KVM_RISCV_GET_CSR(cs, env, stvec, env->stvec);
>  KVM_RISCV_GET_CSR(cs, env, sscratch, env->sscratch);
>  KVM_RISCV_GET_CSR(cs, env, sepc, env->sepc);
> @@ -627,7 +627,7 @@ static int kvm_riscv_put_regs_csr(CPUState *cs)
>  CPURISCVState *env = &RISCV_CPU(cs)->env;
>  
>  KVM_RISCV_SET_CSR(cs, env, sstatus, env->mstatus);
> -KVM_RISCV_SET_CSR(cs, env, sie, env->mie);
> +KVM_RISCV_SET_CSR(cs, env, sie, env->sie);
>  KVM_RISCV_SET_CSR(cs, env, stvec, env->stvec);
>  KVM_RISCV_SET_CSR(cs, env, sscratch, env->sscratch);
>  KVM_RISCV_SET_CSR(cs, env, sepc, env->sepc);
> -- 
> 2.48.1
> 
>

Re: [PATCH 3/3] target/riscv/kvm: reset all available KVM CSRs in kvm_reset()

2025-02-21 Thread Andrew Jones

On Thu, Feb 20, 2025 at 01:13:13PM -0300, Daniel Henrique Barboza wrote:
> Explictly reset env->mstatus and env->sie.

mie was already getting set to zero, so that should have just been renamed
in the last patch, but I still think we should drop the last patch.

> Add a comment about env->mip
> being read/written into KVM 'sip' CSR.
> 
> We're also not read/writing 'scounteren' which is available in the KVM
> UAPI. Add it in kvm_reset() and get/put_regs_csr().
> 
> Signed-off-by: Daniel Henrique Barboza 
> ---
>  target/riscv/kvm/kvm-cpu.c | 8 
>  1 file changed, 8 insertions(+)
> 
> diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c
> index fea03f3657..ee7a9295b4 100644
> --- a/target/riscv/kvm/kvm-cpu.c
> +++ b/target/riscv/kvm/kvm-cpu.c
> @@ -618,6 +618,7 @@ static int kvm_riscv_get_regs_csr(CPUState *cs)
>  KVM_RISCV_GET_CSR(cs, env, stval, env->stval);
>  KVM_RISCV_GET_CSR(cs, env, sip, env->mip);
>  KVM_RISCV_GET_CSR(cs, env, satp, env->satp);
> +KVM_RISCV_GET_CSR(cs, env, scounteren, env->scounteren);

senvcfg is also missing.

>  
>  return 0;
>  }
> @@ -635,6 +636,7 @@ static int kvm_riscv_put_regs_csr(CPUState *cs)
>  KVM_RISCV_SET_CSR(cs, env, stval, env->stval);
>  KVM_RISCV_SET_CSR(cs, env, sip, env->mip);
>  KVM_RISCV_SET_CSR(cs, env, satp, env->satp);
> +KVM_RISCV_SET_CSR(cs, env, scounteren, env->scounteren);
>  
>  return 0;
>  }
> @@ -1609,6 +1611,10 @@ void kvm_riscv_reset_vcpu(RISCVCPU *cpu)
>  env->pc = cpu->env.kernel_addr;
>  env->gpr[10] = kvm_arch_vcpu_id(CPU(cpu)); /* a0 */
>  env->gpr[11] = cpu->env.fdt_addr;  /* a1 */
> +
> +/* sstatus is read/written into mstatus */

How about just a single comment above this function stating that we
reset all registers that we will s/r with csr get/put. Interested
parties can go look at get or put to see the mappings.

> +env->mstatus = 0;
> +env->sie = 0;
>  env->satp = 0;
>  env->mie = 0;
>  env->stvec = 0;
> @@ -1616,7 +1622,9 @@ void kvm_riscv_reset_vcpu(RISCVCPU *cpu)
>  env->sepc = 0;
>  env->scause = 0;
>  env->stval = 0;
> +/* sip is read/written into mip */
>  env->mip = 0;
> +env->scounteren = 0;
>  }
>  
>  void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level)
> -- 
> 2.48.1
> 
>

Thanks,
drew

[PATCH v3 02/19] intel_iommu: Declare supported PASID size

2025-02-21 Thread CLEMENT MATHIEU--DRIF

From: Clement Mathieu--Drif 

PSS field of the ecap register stores the supported PASID size minus 1.
Thus, this commit adds support for 20bits PASIDs.

Signed-off-by: Clement Mathieu--Drif 
---
 hw/i386/intel_iommu.c  | 2 +-
 hw/i386/intel_iommu_internal.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 7fde0603bf..1b4aaffedc 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -4574,7 +4574,7 @@ static void vtd_cap_init(IntelIOMMUState *s)
 }
 
 if (s->pasid) {
-s->ecap |= VTD_ECAP_PASID;
+s->ecap |= VTD_ECAP_PASID | VTD_ECAP_PSS;
 }
 }
 
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index e8b211e8b0..238f1f443f 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -192,6 +192,7 @@
 #define VTD_ECAP_SC (1ULL << 7)
 #define VTD_ECAP_MHMV   (15ULL << 20)
 #define VTD_ECAP_SRS(1ULL << 31)
+#define VTD_ECAP_PSS(19ULL << 35)
 #define VTD_ECAP_PASID  (1ULL << 40)
 #define VTD_ECAP_SMTS   (1ULL << 43)
 #define VTD_ECAP_SLTS   (1ULL << 46)
-- 
2.48.1

Re: [PATCH 1/3] target/riscv/cpu: ignore TCG init for KVM CPUs in reset_hold

2025-02-21 Thread Andrew Jones

On Thu, Feb 20, 2025 at 01:13:11PM -0300, Daniel Henrique Barboza wrote:
> riscv_cpu_reset_hold() does a lot of TCG-related initializations that
> aren't relevant for KVM, but nevertheless are impacting the reset state
> of KVM vcpus.
> 
> When running a KVM guest, kvm_riscv_reset_vcpu() is called at the end of
> reset_hold(). At that point env->mstatus is initialized to a non-zero
> value, and it will be use to write 'sstatus' in the vcpu
> (kvm_arch_put_registers() then kvm_riscv_put_regs_csr()).
> 
> Do an early exit in riscv_cpu_reset_hold() if we're running KVM. All the
> KVM reset procedure will be centered in kvm_riscv_reset_vcpu().
> 
> While we're at it, remove the kvm_enabled() check in
> kvm_riscv_reset_vcpu() since it's already being gated in
> riscv_cpu_reset_hold().
> 
> Signed-off-by: Daniel Henrique Barboza 
> ---
>  target/riscv/cpu.c | 9 +
>  target/riscv/kvm/kvm-cpu.c | 3 ---
>  2 files changed, 5 insertions(+), 7 deletions(-)
> 
> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> index 522d6584e4..8e6e629ec4 100644
> --- a/target/riscv/cpu.c
> +++ b/target/riscv/cpu.c
> @@ -1050,6 +1050,11 @@ static void riscv_cpu_reset_hold(Object *obj, 
> ResetType type)
>  mcc->parent_phases.hold(obj, type);
>  }
>  #ifndef CONFIG_USER_ONLY
> +if (kvm_enabled()) {
> +kvm_riscv_reset_vcpu(cpu);
> +return;
> +}
> +
>  env->misa_mxl = mcc->misa_mxl_max;
>  env->priv = PRV_M;
>  env->mstatus &= ~(MSTATUS_MIE | MSTATUS_MPRV);
> @@ -1146,10 +1151,6 @@ static void riscv_cpu_reset_hold(Object *obj, 
> ResetType type)
>  env->rnmip = 0;
>  env->mnstatus = set_field(env->mnstatus, MNSTATUS_NMIE, false);
>  }
> -
> -if (kvm_enabled()) {
> -kvm_riscv_reset_vcpu(cpu);
> -}
>  #endif
>  }
>  
> diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c
> index 23ce779359..484b6afe7c 100644
> --- a/target/riscv/kvm/kvm-cpu.c
> +++ b/target/riscv/kvm/kvm-cpu.c
> @@ -1603,9 +1603,6 @@ void kvm_riscv_reset_vcpu(RISCVCPU *cpu)
>  CPURISCVState *env = &cpu->env;
>  int i;
>  
> -if (!kvm_enabled()) {
> -return;
> -}
>  for (i = 0; i < 32; i++) {
>  env->gpr[i] = 0;
>  }
> -- 
> 2.48.1
> 
>

Reviewed-by: Andrew Jones

[PATCH v3 09/19] pci: Add IOMMU operations to get memory regions with PASID

2025-02-21 Thread CLEMENT MATHIEU--DRIF

From: Clement Mathieu--Drif 

The region returned by this operation will be used as the input region
for ATS.

Signed-off-by: Clement Mathieu--Drif 
---
 include/hw/pci/pci.h | 16 
 1 file changed, 16 insertions(+)

diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 4002bbeebd..644551550b 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -391,6 +391,22 @@ typedef struct PCIIOMMUOps {
  * @devfn: device and function number
  */
 AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int devfn);
+/**
+ * @get_memory_region_pasid: get the iommu memory region for a given
+ * device and pasid
+ *
+ * @bus: the #PCIBus being accessed.
+ *
+ * @opaque: the data passed to pci_setup_iommu().
+ *
+ * @devfn: device and function number
+ *
+ * @pasid: the pasid associated with the requested memory region
+ */
+IOMMUMemoryRegion * (*get_memory_region_pasid)(PCIBus *bus,
+   void *opaque,
+   int devfn,
+   uint32_t pasid);
 /**
  * @set_iommu_device: attach a HostIOMMUDevice to a vIOMMU
  *
-- 
2.48.1

Re: [PATCH v2 0/2] s390x: support virtio-mem-pci

2025-02-21 Thread David Hildenbrand


On 21.02.25 09:26, David Hildenbrand wrote:

On 21.02.25 00:24, Michael S. Tsirkin wrote:

On Tue, Jan 28, 2025 at 07:57:03PM +0100, David Hildenbrand wrote:

This is based-on [1], which adds MSI-X support to virtio-balloon-pci,
but can be applied independently.

Turns out it is fairly easy to get virtio-mem-pci running on s390x. We
only have to add MSI-X support to virtio-mem-pci, and wire-up the
(un)plugging in the machine.

Tried some simple stuff (hotplug/hotunplug/resize/reboot), and all seems
to be working as expected.

The kernel in the VM needs both, CONFIG_VIRTIO_PCI and CONFIG_VIRTIO_MEM
for it to work.

[1] https://lkml.kernel.org/r/20250115161425.246348-1-ar...@linux.ibm.com



Fails CI:

https://gitlab.com/mstredhat/qemu/-/jobs/9202574981


Hm, I thought the CI was happy when Thomas sent it upstream.

CRITICAL: log: qemu-system-i386: -device virtio-mem-pci: can't apply
global virtio-mem-pci.vectors=0: Property 'virtio-mem-pci.vectors' not found

And the same for the virtio-balloon devices with a similar change.

Trying to run one offending cmdline on current upstream:

./qemu-system-i386 -display none -vga none -S -machine
pc-q35-8.0,accel=tcg -device virtio-mem-pci
qemu-system-i386: -device virtio-mem-pci: the configuration is not
prepared for memory devices (e.g., for memory hotplug), consider
specifying the maxmem option


So I am not sure what is happening here? Is some commit in the pipeline
breaking this?


Note that these two patches, and the virtio-balloon-pci MSI-X
one are already upstream, Thomas included them in his s390 MR. I see 
that you have them included on your branch:


https://gitlab.com/mstredhat/qemu/-/commits/v03b-20-02-2025?ref_type=heads

The following commit in that tree is messed up:

* virtio-mem-pci: Allow setting nvectors, so we can use MSI-X

Likely, you can just drop these two patches from your tree; they are 
alreayd upstream.


--
Cheers,

David / dhildenb

Re: [PATCH v6 2/4] migration: enable multifd and postcopy together

2025-02-21 Thread Prasad Pandit

Hello Fabiano,

On Thu, 20 Feb 2025 at 19:06, Fabiano Rosas  wrote:
> This is more or less the handshake idea. Or at least it could be
> included in that work.
>
> I have parked the handshake idea for now because I'm not seeing an
> immediate need for it and there are more pressing issues to be dealt
> with first such as bugs and coordinating the new features (and their
> possible outcomings) that IMO need to be looked at first.

* I see, okay.

> I'm not opposed to that idea. When I started working with migration I
> had the impression that was the direction and that we could put every
> workload in a pool of multifd threads. Now, knowing the code better, I'm
> not sure that's feasible. Specially the dependence on a "main" channel
> seems difficult to do away with. It's also somewhat convenient to have a
> maint thread. But we could still attempt to group extra threads, such as
> what we're doing with the new thread pool in the device state series. At
> least thread management could be done entirely in a separate pool, main
> channel and all.
>

* True. To extend the two QEMUs working in tandem OR the handshake
idea further with the 'main' channel, let's say a user invokes
command:

$ virsh migrate --threads 4 --postcopy --postcopy-after-precopy ...

0) Channel = TCP socket connection between two machines.

1) The 'main' channel is the dedicated _control_ channel; And other
channels are dedicated _data_ channels. So with '--threads 4' option,
QEMU creates a total of 5 (main + 4) channels.

QEMU-A  -> 'main' channel-> QEMU-B
QEMU-A ->  'data' channel-1 -> QEMU-B
QEMU-A ->  'data' channel-2 -> QEMU-B
QEMU-A ->  'data' channel-3 -> QEMU-B
QEMU-A ->  'data' channel-4 -> QEMU-B

* Each channel is used by a thread of its own.

2) All channels are created _before_ the migration starts and stay
till the end of the migration. No asynchronous channels popping up
during migration, like a 'postcopy' channel now.

3) In the beginning source says 'Let's Precopy' to the destination on
the 'main' channel

 QEMU-A  -> main: Let's precopy  -> QEMU-B
 QEMU-A  <- main: Okay  <- QEMU-B

And migration data flows from QEMU-A  -> to -> QEMU-B  on the
'data' channels.

QEMU-A ->  'data' -> -> -> QEMU-B
QEMU-A ->  'data' -> -> -> QEMU-B
QEMU-A ->  'data' -> -> -> QEMU-B
QEMU-A ->  'data' -> -> -> QEMU-B

4) When it's time to switch to Postcopy,  source says 'Let's Postcopy'
to the destination on the 'main' channel

QEMU-A  -> main: Let's postcopy  -> QEMU-B
QEMU-A  <- main: Okay<- QEMU-B

And migration page requests/data use the same 'data' channels.

QEMU-A <- <- 'request/data'  -> -> QEMU-B
QEMU-A <- <- 'request/data'  -> -> QEMU-B
QEMU-A <- <- 'request/data'  -> -> QEMU-B
QEMU-A <- <- 'request/data'  -> -> QEMU-B

5) This way:
 - 'main' channel could be used to co-ordinate actions of two QEMUs.
 - All data channels may be used during Postcopy too, instead of
one channel now.
 - There may not be race conditions while creating channels.
 - No differentiation of precopy/multifd/postcopy/preempt etc. channels.

(thinking out loud if that sounds workable)

Thank you.
---
  - Prasad

Re: [PATCH 3/3] target/riscv/kvm: reset all available KVM CSRs in kvm_reset()

2025-02-21 Thread Andrew Jones

On Fri, Feb 21, 2025 at 09:45:35AM +0100, Andrew Jones wrote:
> On Thu, Feb 20, 2025 at 01:13:13PM -0300, Daniel Henrique Barboza wrote:
> > Explictly reset env->mstatus and env->sie.
> 
> mie was already getting set to zero, so that should have just been renamed
> in the last patch, but I still think we should drop the last patch.
> 
> > Add a comment about env->mip
> > being read/written into KVM 'sip' CSR.
> > 
> > We're also not read/writing 'scounteren' which is available in the KVM
> > UAPI. Add it in kvm_reset() and get/put_regs_csr().
> > 
> > Signed-off-by: Daniel Henrique Barboza 
> > ---
> >  target/riscv/kvm/kvm-cpu.c | 8 
> >  1 file changed, 8 insertions(+)
> > 
> > diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c
> > index fea03f3657..ee7a9295b4 100644
> > --- a/target/riscv/kvm/kvm-cpu.c
> > +++ b/target/riscv/kvm/kvm-cpu.c
> > @@ -618,6 +618,7 @@ static int kvm_riscv_get_regs_csr(CPUState *cs)
> >  KVM_RISCV_GET_CSR(cs, env, stval, env->stval);
> >  KVM_RISCV_GET_CSR(cs, env, sip, env->mip);
> >  KVM_RISCV_GET_CSR(cs, env, satp, env->satp);
> > +KVM_RISCV_GET_CSR(cs, env, scounteren, env->scounteren);
> 
> senvcfg is also missing.
> 
> >  
> >  return 0;
> >  }
> > @@ -635,6 +636,7 @@ static int kvm_riscv_put_regs_csr(CPUState *cs)
> >  KVM_RISCV_SET_CSR(cs, env, stval, env->stval);
> >  KVM_RISCV_SET_CSR(cs, env, sip, env->mip);
> >  KVM_RISCV_SET_CSR(cs, env, satp, env->satp);
> > +KVM_RISCV_SET_CSR(cs, env, scounteren, env->scounteren);
> >  
> >  return 0;
> >  }
> > @@ -1609,6 +1611,10 @@ void kvm_riscv_reset_vcpu(RISCVCPU *cpu)
> >  env->pc = cpu->env.kernel_addr;
> >  env->gpr[10] = kvm_arch_vcpu_id(CPU(cpu)); /* a0 */
> >  env->gpr[11] = cpu->env.fdt_addr;  /* a1 */
> > +
> > +/* sstatus is read/written into mstatus */
> 
> How about just a single comment above this function stating that we
> reset all registers that we will s/r with csr get/put. Interested
> parties can go look at get or put to see the mappings.
> 
> > +env->mstatus = 0;
> > +env->sie = 0;
> >  env->satp = 0;
> >  env->mie = 0;
> >  env->stvec = 0;
> > @@ -1616,7 +1622,9 @@ void kvm_riscv_reset_vcpu(RISCVCPU *cpu)
> >  env->sepc = 0;
> >  env->scause = 0;
> >  env->stval = 0;
> > +/* sip is read/written into mip */
> >  env->mip = 0;
> > +env->scounteren = 0;

It'd be nice to put all the above register assignments in the order of
struct kvm_riscv_csr, like get/put do.

Thanks,
drew


> >  }
> >  
> >  void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level)
> > -- 
> > 2.48.1
> > 
> >
> 
> Thanks,
> drew

[PATCH v4 08/14] acpi/generic_event_device: add logic to detect if HEST addr is available

2025-02-21 Thread Mauro Carvalho Chehab

Create a new property (x-has-hest-addr) and use it to detect if
the GHES table offsets can be calculated from the HEST address
(qemu 10.0 and upper) or via the legacy way via an offset obtained
from the hardware_errors firmware file.

Signed-off-by: Mauro Carvalho Chehab 
Reviewed-by: Jonathan Cameron 
---
 hw/acpi/generic_event_device.c |  1 +
 hw/arm/virt-acpi-build.c   | 18 --
 hw/core/machine.c  |  2 ++
 3 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c
index 5346cae573b7..14d8513a5440 100644
--- a/hw/acpi/generic_event_device.c
+++ b/hw/acpi/generic_event_device.c
@@ -318,6 +318,7 @@ static void acpi_ged_send_event(AcpiDeviceIf *adev, 
AcpiEventStatusBits ev)
 
 static const Property acpi_ged_properties[] = {
 DEFINE_PROP_UINT32("ged-event", AcpiGedState, ged_event_bitmap, 0),
+DEFINE_PROP_BOOL("x-has-hest-addr", AcpiGedState, 
ghes_state.use_hest_addr, false),
 };
 
 static const VMStateDescription vmstate_memhp_state = {
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 4439252e1a75..9de51105a513 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -897,6 +897,10 @@ static const AcpiNotificationSourceId hest_ghes_notify[] = 
{
 { ACPI_HEST_SRC_ID_SYNC, ACPI_GHES_NOTIFY_SEA },
 };
 
+static const AcpiNotificationSourceId hest_ghes_notify_9_2[] = {
+{ ACPI_HEST_SRC_ID_SYNC, ACPI_GHES_NOTIFY_SEA },
+};
+
 static
 void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables)
 {
@@ -950,7 +954,9 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables 
*tables)
 build_dbg2(tables_blob, tables->linker, vms);
 
 if (vms->ras) {
+static const AcpiNotificationSourceId *notify;
 AcpiGedState *acpi_ged_state;
+unsigned int notify_sz;
 AcpiGhesState *ags;
 
 acpi_ged_state = ACPI_GED(object_resolve_path_type("", TYPE_ACPI_GED,
@@ -959,9 +965,17 @@ void virt_acpi_build(VirtMachineState *vms, 
AcpiBuildTables *tables)
 ags = &acpi_ged_state->ghes_state;
 
 acpi_add_table(table_offsets, tables_blob);
+
+if (!ags->use_hest_addr) {
+notify = hest_ghes_notify_9_2;
+notify_sz = ARRAY_SIZE(hest_ghes_notify_9_2);
+} else {
+notify = hest_ghes_notify;
+notify_sz = ARRAY_SIZE(hest_ghes_notify);
+}
+
 acpi_build_hest(ags, tables_blob, tables->hardware_errors,
-tables->linker, hest_ghes_notify,
-ARRAY_SIZE(hest_ghes_notify),
+tables->linker, notify, notify_sz,
 vms->oem_id, vms->oem_table_id);
 }
 }
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 02cff735b3fb..7a11e0f87b11 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -34,6 +34,7 @@
 #include "hw/virtio/virtio-pci.h"
 #include "hw/virtio/virtio-net.h"
 #include "hw/virtio/virtio-iommu.h"
+#include "hw/acpi/generic_event_device.h"
 #include "audio/audio.h"
 
 GlobalProperty hw_compat_9_2[] = {
@@ -43,6 +44,7 @@ GlobalProperty hw_compat_9_2[] = {
 { "virtio-balloon-pci-non-transitional", "vectors", "0" },
 { "virtio-mem-pci", "vectors", "0" },
 { "migration", "multifd-clean-tls-termination", "false" },
+{ TYPE_ACPI_GED, "x-has-hest-addr", "false" },
 };
 const size_t hw_compat_9_2_len = G_N_ELEMENTS(hw_compat_9_2);
 
-- 
2.48.1

[PATCH v4 13/14] qapi/acpi-hest: add an interface to do generic CPER error injection

2025-02-21 Thread Mauro Carvalho Chehab

Creates a QMP command to be used for generic ACPI APEI hardware error
injection (HEST) via GHESv2, and add support for it for ARM guests.

Error injection uses ACPI_HEST_SRC_ID_QMP source ID to be platform
independent. This is mapped at arch virt bindings, depending on the
types supported by QEMU and by the BIOS. So, on ARM, this is supported
via ACPI_GHES_NOTIFY_GPIO notification type.

This patch is co-authored:
- original ghes logic to inject a simple ARM record by Shiju Jose;
- generic logic to handle block addresses by Jonathan Cameron;
- generic GHESv2 error inject by Mauro Carvalho Chehab;

Co-authored-by: Jonathan Cameron 
Co-authored-by: Shiju Jose 
Co-authored-by: Mauro Carvalho Chehab 
Signed-off-by: Jonathan Cameron 
Signed-off-by: Shiju Jose 
Signed-off-by: Mauro Carvalho Chehab 
Acked-by: Igor Mammedov 
Acked-by: Markus Armbruster 
---

Changes since v9:
- ARM source IDs renamed to reflect SYNC/ASYNC;
- command name changed to better reflect what it does;
- some improvements at JSON documentation;
- add a check for QMP source at the notification logic.

Signed-off-by: Mauro Carvalho Chehab 
---
 MAINTAINERS  |  7 +++
 hw/acpi/Kconfig  |  5 +
 hw/acpi/ghes.c   |  2 +-
 hw/acpi/ghes_cper.c  | 38 ++
 hw/acpi/ghes_cper_stub.c | 19 +++
 hw/acpi/meson.build  |  2 ++
 hw/arm/virt-acpi-build.c |  1 +
 hw/arm/virt.c|  7 +++
 include/hw/acpi/ghes.h   |  1 +
 include/hw/arm/virt.h|  1 +
 qapi/acpi-hest.json  | 35 +++
 qapi/meson.build |  1 +
 qapi/qapi-schema.json|  1 +
 13 files changed, 119 insertions(+), 1 deletion(-)
 create mode 100644 hw/acpi/ghes_cper.c
 create mode 100644 hw/acpi/ghes_cper_stub.c
 create mode 100644 qapi/acpi-hest.json

diff --git a/MAINTAINERS b/MAINTAINERS
index 3848d37a38d2..aed0f4cc62cd 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2080,6 +2080,13 @@ F: hw/acpi/ghes.c
 F: include/hw/acpi/ghes.h
 F: docs/specs/acpi_hest_ghes.rst
 
+ACPI/HEST/GHES/ARM processor CPER
+R: Mauro Carvalho Chehab 
+S: Maintained
+F: hw/arm/ghes_cper.c
+F: hw/acpi/ghes_cper_stub.c
+F: qapi/acpi-hest.json
+
 ppc4xx
 L: qemu-...@nongnu.org
 S: Orphan
diff --git a/hw/acpi/Kconfig b/hw/acpi/Kconfig
index 1d4e9f0845c0..daabbe6cd11e 100644
--- a/hw/acpi/Kconfig
+++ b/hw/acpi/Kconfig
@@ -51,6 +51,11 @@ config ACPI_APEI
 bool
 depends on ACPI
 
+config GHES_CPER
+bool
+depends on ACPI_APEI
+default y
+
 config ACPI_PCI
 bool
 depends on ACPI && PCI
diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
index 401789259f60..3bea55e2e8e9 100644
--- a/hw/acpi/ghes.c
+++ b/hw/acpi/ghes.c
@@ -553,7 +553,7 @@ void ghes_record_cper_errors(AcpiGhesState *ags, const void 
*cper, size_t len,
 /* Write the generic error data entry into guest memory */
 cpu_physical_memory_write(cper_addr, cper, len);
 
-notifier_list_notify(&acpi_generic_error_notifiers, NULL);
+notifier_list_notify(&acpi_generic_error_notifiers, &source_id);
 }
 
 int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
diff --git a/hw/acpi/ghes_cper.c b/hw/acpi/ghes_cper.c
new file mode 100644
index ..0a2d95dd8b27
--- /dev/null
+++ b/hw/acpi/ghes_cper.c
@@ -0,0 +1,38 @@
+/*
+ * CPER payload parser for error injection
+ *
+ * Copyright(C) 2024-2025 Huawei LTD.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+
+#include "qemu/base64.h"
+#include "qemu/error-report.h"
+#include "qemu/uuid.h"
+#include "qapi/qapi-commands-acpi-hest.h"
+#include "hw/acpi/ghes.h"
+
+void qmp_inject_ghes_v2_error(const char *qmp_cper, Error **errp)
+{
+AcpiGhesState *ags;
+
+ags = acpi_ghes_get_state();
+if (!ags) {
+return;
+}
+
+uint8_t *cper;
+size_t  len;
+
+cper = qbase64_decode(qmp_cper, -1, &len, errp);
+if (!cper) {
+error_setg(errp, "missing GHES CPER payload");
+return;
+}
+
+ghes_record_cper_errors(ags, cper, len, ACPI_HEST_SRC_ID_QMP, errp);
+}
diff --git a/hw/acpi/ghes_cper_stub.c b/hw/acpi/ghes_cper_stub.c
new file mode 100644
index ..5ebc61970a78
--- /dev/null
+++ b/hw/acpi/ghes_cper_stub.c
@@ -0,0 +1,19 @@
+/*
+ * Stub interface for CPER payload parser for error injection
+ *
+ * Copyright(C) 2024-2025 Huawei LTD.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-acpi-hest.h"
+#include "hw/acpi/ghes.h"
+
+void qmp_inject_ghes_v2_error(const char *cper, Error **errp)
+{
+error_setg(errp, "GHES QMP error inject is not compiled in");
+}
diff --git a/hw/acpi/meson.build b/hw/acpi/meson.build
index 73f02b96912b..56b5d1ec9691 100644
--- a/hw/acpi/meson.build
+++ b/hw/acpi/meson.build
@@ -34,4 +34,6

[PATCH v4 06/14] acpi/ghes: create an ancillary acpi_ghes_get_state() function

2025-02-21 Thread Mauro Carvalho Chehab

Instead of having a function to check if ACPI is enabled
(acpi_ghes_present), change its logic to be more generic,
returing a pointed to AcpiGhesState.

Such change allows cleanup the ghes GED state code, avoiding
to read it multiple times, and simplifying the code.

Signed-off-by: Mauro Carvalho Chehab 
Reviewed-by: Jonathan Cameron 
Reviewed-by:  Igor Mammedov 
---
 hw/acpi/ghes-stub.c|  7 ---
 hw/acpi/ghes.c | 38 ++
 include/hw/acpi/ghes.h | 14 --
 target/arm/kvm.c   |  7 +--
 4 files changed, 27 insertions(+), 39 deletions(-)

diff --git a/hw/acpi/ghes-stub.c b/hw/acpi/ghes-stub.c
index 7cec1812dad9..40f660c246fe 100644
--- a/hw/acpi/ghes-stub.c
+++ b/hw/acpi/ghes-stub.c
@@ -11,12 +11,13 @@
 #include "qemu/osdep.h"
 #include "hw/acpi/ghes.h"
 
-int acpi_ghes_memory_errors(uint16_t source_id, uint64_t physical_address)
+int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
+uint64_t physical_address)
 {
 return -1;
 }
 
-bool acpi_ghes_present(void)
+AcpiGhesState *acpi_ghes_get_state(void)
 {
-return false;
+return NULL;
 }
diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
index f2d1cc7369f4..401789259f60 100644
--- a/hw/acpi/ghes.c
+++ b/hw/acpi/ghes.c
@@ -425,10 +425,6 @@ static void get_hw_error_offsets(uint64_t ghes_addr,
  uint64_t *cper_addr,
  uint64_t *read_ack_register_addr)
 {
-if (!ghes_addr) {
-return;
-}
-
 /*
  * non-HEST version supports only one source, so no need to change
  * the start offset based on the source ID. Also, we can't validate
@@ -517,27 +513,16 @@ static void get_ghes_source_offsets(uint16_t source_id,
 NotifierList acpi_generic_error_notifiers =
 NOTIFIER_LIST_INITIALIZER(error_device_notifiers);
 
-void ghes_record_cper_errors(const void *cper, size_t len,
+void ghes_record_cper_errors(AcpiGhesState *ags, const void *cper, size_t len,
  uint16_t source_id, Error **errp)
 {
 uint64_t cper_addr = 0, read_ack_register_addr = 0, read_ack_register;
-AcpiGedState *acpi_ged_state;
-AcpiGhesState *ags;
 
 if (len > ACPI_GHES_MAX_RAW_DATA_LENGTH) {
 error_setg(errp, "GHES CPER record is too big: %zd", len);
 return;
 }
 
-acpi_ged_state = ACPI_GED(object_resolve_path_type("", TYPE_ACPI_GED,
-   NULL));
-if (!acpi_ged_state) {
-error_setg(errp, "Can't find ACPI_GED object");
-return;
-}
-ags = &acpi_ged_state->ghes_state;
-
-
 if (!ags->use_hest_addr) {
 get_hw_error_offsets(le64_to_cpu(ags->hw_error_le),
  &cper_addr, &read_ack_register_addr);
@@ -546,11 +531,6 @@ void ghes_record_cper_errors(const void *cper, size_t len,
 &cper_addr, &read_ack_register_addr, errp);
 }
 
-if (!cper_addr) {
-error_setg(errp, "can not find Generic Error Status Block");
-return;
-}
-
 cpu_physical_memory_read(read_ack_register_addr,
  &read_ack_register, sizeof(read_ack_register));
 
@@ -576,7 +556,8 @@ void ghes_record_cper_errors(const void *cper, size_t len,
 notifier_list_notify(&acpi_generic_error_notifiers, NULL);
 }
 
-int acpi_ghes_memory_errors(uint16_t source_id, uint64_t physical_address)
+int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
+uint64_t physical_address)
 {
 /* Memory Error Section Type */
 const uint8_t guid[] =
@@ -602,7 +583,7 @@ int acpi_ghes_memory_errors(uint16_t source_id, uint64_t 
physical_address)
 acpi_ghes_build_append_mem_cper(block, physical_address);
 
 /* Report the error */
-ghes_record_cper_errors(block->data, block->len, source_id, &errp);
+ghes_record_cper_errors(ags, block->data, block->len, source_id, &errp);
 
 g_array_free(block, true);
 
@@ -614,7 +595,7 @@ int acpi_ghes_memory_errors(uint16_t source_id, uint64_t 
physical_address)
 return 0;
 }
 
-bool acpi_ghes_present(void)
+AcpiGhesState *acpi_ghes_get_state(void)
 {
 AcpiGedState *acpi_ged_state;
 AcpiGhesState *ags;
@@ -623,11 +604,12 @@ bool acpi_ghes_present(void)
NULL));
 
 if (!acpi_ged_state) {
-return false;
+return NULL;
 }
 ags = &acpi_ged_state->ghes_state;
-if (!ags->hw_error_le && !ags->hest_addr_le)
-return false;
 
-return true;
+if (!ags->hw_error_le && !ags->hest_addr_le) {
+return NULL;
+}
+return ags;
 }
diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h
index 219aa7ab4fe0..276f9dc076d9 100644
--- a/include/hw/acpi/ghes.h
+++ b/include/hw/acpi/ghes.h
@@ -99,15 +99,17 @@ void acpi_build_hest(AcpiGhesState *ags, GArray *table_data,
  const char *oem_id, con

[PATCH v4 14/14] scripts/ghes_inject: add a script to generate GHES error inject

2025-02-21 Thread Mauro Carvalho Chehab

Using the QMP GHESv2 API requires preparing a raw data array
containing a CPER record.

Add a helper script with subcommands to prepare such data.

Currently, only ARM Processor error CPER record is supported, by
using:
$ ghes_inject.py arm

which produces those warnings on Linux:

[  705.032426] [Firmware Warn]: GHES: Unhandled processor error type 0x02: 
cache error
[  774.866308] {4}[Hardware Error]: Hardware error from APEI Generic Hardware 
Error Source: 1
[  774.866583] {4}[Hardware Error]: event severity: recoverable
[  774.866738] {4}[Hardware Error]:  Error 0, type: recoverable
[  774.866889] {4}[Hardware Error]:   section_type: ARM processor error
[  774.867048] {4}[Hardware Error]:   MIDR: 0x000f0510
[  774.867189] {4}[Hardware Error]:   running state: 0x0
[  774.867321] {4}[Hardware Error]:   Power State Coordination Interface state: 0
[  774.867511] {4}[Hardware Error]:   Error info structure 0:
[  774.867679] {4}[Hardware Error]:   num errors: 2
[  774.867801] {4}[Hardware Error]:error_type: 0x02: cache error
[  774.867962] {4}[Hardware Error]:error_info: 0x0091000f
[  774.868124] {4}[Hardware Error]: transaction type: Data Access
[  774.868280] {4}[Hardware Error]: cache error, operation type: Data write
[  774.868465] {4}[Hardware Error]: cache level: 2
[  774.868592] {4}[Hardware Error]: processor context not corrupted
[  774.868774] [Firmware Warn]: GHES: Unhandled processor error type 0x02: 
cache error

Such script allows customizing the error data, allowing to change
all fields at the record. Please use:

$ ghes_inject.py arm -h

For more details about its usage.

Signed-off-by: Mauro Carvalho Chehab 
Reviewed-by: Jonathan Cameron 
---
 MAINTAINERS|   3 +
 scripts/arm_processor_error.py | 476 ++
 scripts/ghes_inject.py |  51 +++
 scripts/qmp_helper.py  | 702 +
 4 files changed, 1232 insertions(+)
 create mode 100644 scripts/arm_processor_error.py
 create mode 100755 scripts/ghes_inject.py
 create mode 100755 scripts/qmp_helper.py

diff --git a/MAINTAINERS b/MAINTAINERS
index aed0f4cc62cd..203baee63712 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2086,6 +2086,9 @@ S: Maintained
 F: hw/arm/ghes_cper.c
 F: hw/acpi/ghes_cper_stub.c
 F: qapi/acpi-hest.json
+F: scripts/ghes_inject.py
+F: scripts/arm_processor_error.py
+F: scripts/qmp_helper.py
 
 ppc4xx
 L: qemu-...@nongnu.org
diff --git a/scripts/arm_processor_error.py b/scripts/arm_processor_error.py
new file mode 100644
index ..1dd42e42a877
--- /dev/null
+++ b/scripts/arm_processor_error.py
@@ -0,0 +1,476 @@
+#!/usr/bin/env python3
+#
+# pylint: disable=C0301,C0114,R0903,R0912,R0913,R0914,R0915,W0511
+# SPDX-License-Identifier: GPL-2.0-or-later
+#
+# Copyright (C) 2024-2025 Mauro Carvalho Chehab 
+
+# TODO: current implementation has dummy defaults.
+#
+# For a better implementation, a QMP addition/call is needed to
+# retrieve some data for ARM Processor Error injection:
+#
+#   - ARM registers: power_state, mpidr.
+
+"""
+Generates an ARM processor error CPER, compatible with
+UEFI 2.9A Errata.
+
+Injecting such errors can be done using:
+
+$ ./scripts/ghes_inject.py arm
+Error injected.
+
+Produces a simple CPER register, as detected on a Linux guest:
+
+[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 1
+[Hardware Error]: event severity: recoverable
+[Hardware Error]:  Error 0, type: recoverable
+[Hardware Error]:   section_type: ARM processor error
+[Hardware Error]:   MIDR: 0x
+[Hardware Error]:   running state: 0x0
+[Hardware Error]:   Power State Coordination Interface state: 0
+[Hardware Error]:   Error info structure 0:
+[Hardware Error]:   num errors: 2
+[Hardware Error]:error_type: 0x02: cache error
+[Hardware Error]:error_info: 0x0091000f
+[Hardware Error]: transaction type: Data Access
+[Hardware Error]: cache error, operation type: Data write
+[Hardware Error]: cache level: 2
+[Hardware Error]: processor context not corrupted
+[Firmware Warn]: GHES: Unhandled processor error type 0x02: cache error
+
+The ARM Processor Error message can be customized via command line
+parameters. For instance:
+
+$ ./scripts/ghes_inject.py arm --mpidr 0x444 --running --affinity 1 \
+--error-info 12345678 --vendor 0x13,123,4,5,1 --ctx-array 0,1,2,3,4,5 \
+-t cache tlb bus micro-arch tlb,micro-arch
+Error injected.
+
+Injects this error, as detected on a Linux guest:
+
+[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 1
+[Hardware Error]: event severity: recoverable
+[Hardware Error]:  Error 0, type: recoverable
+[Hardware Error]:   section_type: ARM processor error
+[Hardware Error]:   MIDR: 0x
+[Hardware Error]:   Multiprocessor Affinity Register (MPIDR): 
0x
+[Hardware Error]:   error affinity level: 0
+[Hardware Error]:   run

[PATCH v4 11/14] arm/virt: Wire up a GED error device for ACPI / GHES

2025-02-21 Thread Mauro Carvalho Chehab

Adds support to ARM virtualization to allow handling
generic error ACPI Event via GED & error source device.

It is aligned with Linux Kernel patch:
https://lore.kernel.org/lkml/1272350481-27951-8-git-send-email-ying.hu...@intel.com/

Co-authored-by: Mauro Carvalho Chehab 
Co-authored-by: Jonathan Cameron 
Signed-off-by: Jonathan Cameron 
Signed-off-by: Mauro Carvalho Chehab 
Acked-by: Igor Mammedov 

---

Changes from v8:

- Added a call to the function that produces GHES generic
  records, as this is now added earlier in this series.

Signed-off-by: Mauro Carvalho Chehab 
---
 hw/acpi/generic_event_device.c |  2 +-
 hw/arm/virt-acpi-build.c   |  1 +
 hw/arm/virt.c  | 12 +++-
 include/hw/arm/virt.h  |  1 +
 4 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c
index 180eebbce1cd..f5e899155d34 100644
--- a/hw/acpi/generic_event_device.c
+++ b/hw/acpi/generic_event_device.c
@@ -331,7 +331,7 @@ static void acpi_ged_send_event(AcpiDeviceIf *adev, 
AcpiEventStatusBits ev)
 
 static const Property acpi_ged_properties[] = {
 DEFINE_PROP_UINT32("ged-event", AcpiGedState, ged_event_bitmap, 0),
-DEFINE_PROP_BOOL("x-has-hest-addr", AcpiGedState, 
ghes_state.use_hest_addr, false),
+DEFINE_PROP_BOOL("x-has-hest-addr", AcpiGedState, 
ghes_state.use_hest_addr, true),
 };
 
 static const VMStateDescription vmstate_memhp_state = {
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 9de51105a513..4f174795ed60 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -861,6 +861,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, 
VirtMachineState *vms)
 }
 
 acpi_dsdt_add_power_button(scope);
+aml_append(scope, aml_error_device());
 #ifdef CONFIG_TPM
 acpi_dsdt_add_tpm(scope, vms);
 #endif
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 4a5a9666e916..3faf32f900b5 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -678,7 +678,7 @@ static inline DeviceState *create_acpi_ged(VirtMachineState 
*vms)
 DeviceState *dev;
 MachineState *ms = MACHINE(vms);
 int irq = vms->irqmap[VIRT_ACPI_GED];
-uint32_t event = ACPI_GED_PWR_DOWN_EVT;
+uint32_t event = ACPI_GED_PWR_DOWN_EVT | ACPI_GED_ERROR_EVT;
 
 if (ms->ram_slots) {
 event |= ACPI_GED_MEM_HOTPLUG_EVT;
@@ -1010,6 +1010,13 @@ static void virt_powerdown_req(Notifier *n, void *opaque)
 }
 }
 
+static void virt_generic_error_req(Notifier *n, void *opaque)
+{
+VirtMachineState *s = container_of(n, VirtMachineState, 
generic_error_notifier);
+
+acpi_send_event(s->acpi_dev, ACPI_GENERIC_ERROR);
+}
+
 static void create_gpio_keys(char *fdt, DeviceState *pl061_dev,
  uint32_t phandle)
 {
@@ -2404,6 +2411,9 @@ static void machvirt_init(MachineState *machine)
 
 if (has_ged && aarch64 && firmware_loaded && virt_is_acpi_enabled(vms)) {
 vms->acpi_dev = create_acpi_ged(vms);
+vms->generic_error_notifier.notify = virt_generic_error_req;
+notifier_list_add(&acpi_generic_error_notifiers,
+  &vms->generic_error_notifier);
 } else {
 create_gpio_devices(vms, VIRT_GPIO, sysmem);
 }
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index c8e94e6aedc9..f3cf28436770 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -176,6 +176,7 @@ struct VirtMachineState {
 DeviceState *gic;
 DeviceState *acpi_dev;
 Notifier powerdown_notifier;
+Notifier generic_error_notifier;
 PCIBus *bus;
 char *oem_id;
 char *oem_table_id;
-- 
2.48.1

[PATCH v4 05/14] acpi/ghes: add a notifier to notify when error data is ready

2025-02-21 Thread Mauro Carvalho Chehab

Some error injection notify methods are async, like GPIO
notify. Add a notifier to be used when the error record is
ready to be sent to the guest OS.

Signed-off-by: Mauro Carvalho Chehab 
Reviewed-by: Jonathan Cameron 
---
 hw/acpi/ghes.c | 5 -
 include/hw/acpi/ghes.h | 3 +++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
index 4a4ea8f4be90..f2d1cc7369f4 100644
--- a/hw/acpi/ghes.c
+++ b/hw/acpi/ghes.c
@@ -514,6 +514,9 @@ static void get_ghes_source_offsets(uint16_t source_id,
 *read_ack_start_addr = le64_to_cpu(*read_ack_start_addr);
 }
 
+NotifierList acpi_generic_error_notifiers =
+NOTIFIER_LIST_INITIALIZER(error_device_notifiers);
+
 void ghes_record_cper_errors(const void *cper, size_t len,
  uint16_t source_id, Error **errp)
 {
@@ -570,7 +573,7 @@ void ghes_record_cper_errors(const void *cper, size_t len,
 /* Write the generic error data entry into guest memory */
 cpu_physical_memory_write(cper_addr, cper, len);
 
-return;
+notifier_list_notify(&acpi_generic_error_notifiers, NULL);
 }
 
 int acpi_ghes_memory_errors(uint16_t source_id, uint64_t physical_address)
diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h
index 51c6b6b33327..219aa7ab4fe0 100644
--- a/include/hw/acpi/ghes.h
+++ b/include/hw/acpi/ghes.h
@@ -24,6 +24,9 @@
 
 #include "hw/acpi/bios-linker-loader.h"
 #include "qapi/error.h"
+#include "qemu/notify.h"
+
+extern NotifierList acpi_generic_error_notifiers;
 
 /*
  * Values for Hardware Error Notification Type field
-- 
2.48.1

[PATCH v4 07/14] acpi/generic_event_device: Update GHES migration to cover hest addr

2025-02-21 Thread Mauro Carvalho Chehab

The GHES migration logic should now support HEST table location too.

Signed-off-by: Mauro Carvalho Chehab 
Reviewed-by: Jonathan Cameron 
Reviewed-by: Igor Mammedov 
---
 hw/acpi/generic_event_device.c | 29 +
 1 file changed, 29 insertions(+)

diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c
index c85d97ca3776..5346cae573b7 100644
--- a/hw/acpi/generic_event_device.c
+++ b/hw/acpi/generic_event_device.c
@@ -386,6 +386,34 @@ static const VMStateDescription vmstate_ghes_state = {
 }
 };
 
+static const VMStateDescription vmstate_hest = {
+.name = "acpi-hest",
+.version_id = 1,
+.minimum_version_id = 1,
+.fields = (const VMStateField[]) {
+VMSTATE_UINT64(hest_addr_le, AcpiGhesState),
+VMSTATE_END_OF_LIST()
+},
+};
+
+static bool hest_needed(void *opaque)
+{
+AcpiGedState *s = opaque;
+return s->ghes_state.hest_addr_le;
+}
+
+static const VMStateDescription vmstate_hest_state = {
+.name = "acpi-ged/hest",
+.version_id = 1,
+.minimum_version_id = 1,
+.needed = hest_needed,
+.fields = (const VMStateField[]) {
+VMSTATE_STRUCT(ghes_state, AcpiGedState, 1,
+   vmstate_hest, AcpiGhesState),
+VMSTATE_END_OF_LIST()
+}
+};
+
 static const VMStateDescription vmstate_acpi_ged = {
 .name = "acpi-ged",
 .version_id = 1,
@@ -398,6 +426,7 @@ static const VMStateDescription vmstate_acpi_ged = {
 &vmstate_memhp_state,
 &vmstate_cpuhp_state,
 &vmstate_ghes_state,
+&vmstate_hest_state,
 NULL
 }
 };
-- 
2.48.1

Re: [RFC PATCH v1 01/19] target/i386/hvf: fix a typo in a type name

2025-02-21 Thread Philippe Mathieu-Daudé


On 21/2/25 09:36, Wei Liu wrote:

The prefix x68 is wrong. Change it to x86.

Signed-off-by: Wei Liu 
---
  target/i386/hvf/hvf.c   |  2 +-
  target/i386/hvf/x86.c   |  4 ++--
  target/i386/hvf/x86.h   |  8 
  target/i386/hvf/x86_descr.c |  8 
  target/i386/hvf/x86_descr.h |  6 +++---
  target/i386/hvf/x86_task.c  | 22 +++---
  target/i386/hvf/x86_task.h  |  2 +-
  7 files changed, 26 insertions(+), 26 deletions(-)


Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH] tests/functional: Bump some arm test timeouts

2025-02-21 Thread Thomas Huth


On 21/02/2025 15.06, Peter Maydell wrote:

On my local machine, for a debug build, sbsaref_alpine takes
nearly 900s:

$ (cd build/x86 && ./pyvenv/bin/meson test --setup thorough --suite 
func-thorough func-aarch64-aarch64_sbsaref_alpine
)

1/1 qemu:func-thorough+func-aarch64-thorough+thorough / 
func-aarch64-aarch64_sbsaref_alpine
   OK 896.90s

arm_aspeed_rainier can also run close to its current timeout:
  6/44 qemu:func-thorough+func-arm-thorough+thorough / 
func-arm-arm_aspeed_rainier
   OK 215.75s

and arm_aspeed_ast2500 and arm_aspeed_ast2600 can go over:
13/44 qemu:func-thorough+func-arm-thorough+thorough / 
func-arm-arm_aspeed_ast2600
   OK 792.94s

27/44 qemu:func-thorough+func-arm-thorough+thorough / 
func-arm-arm_aspeed_ast2500
  TIMEOUT 480.01s

The sx1 test fails not on the overall meson timeout but on the
60 second timeout in some of the subtests.

Bump all these timeouts up a bit.

Signed-off-by: Peter Maydell 
---
This at least gets 'make -j4 check-functional' to passing for me
for an arm/aarch64 debug build, apart from the gpu test hang which
we're discussing in a different thread. Whole thing takes 25 mins...
---
  tests/functional/meson.build | 8 
  tests/functional/test_arm_sx1.py | 6 +++---
  2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/functional/meson.build b/tests/functional/meson.build
index b516d21cba1..effa31701cf 100644
--- a/tests/functional/meson.build
+++ b/tests/functional/meson.build
@@ -15,16 +15,16 @@ test_timeouts = {
'aarch64_raspi4' : 480,
'aarch64_rme_virt' : 1200,
'aarch64_rme_sbsaref' : 1200,
-  'aarch64_sbsaref_alpine' : 720,
+  'aarch64_sbsaref_alpine' : 1200,


I wonder whether we should disable that test with @skipSlowTest() by 
default, since it's really very slow...?


Anyway, for this patch here:
Reviewed-by: Thomas Huth

[PATCH v4 01/14] acpi/ghes: prepare to change the way HEST offsets are calculated

2025-02-21 Thread Mauro Carvalho Chehab

Add a new ags flag to change the way HEST offsets are calculated.
Currently, offsets needed to store ACPI HEST offsets and read ack
are calculated based on a previous knowledge from the logic
which creates the HEST table.

Such logic is not generic, not allowing to easily add more HEST
entries nor replicates what OSPM does.

As the next patches will be adding a more generic logic, add a
new use_hest_addr, set to false, in preparation for such changes.

Signed-off-by: Mauro Carvalho Chehab 
---
 hw/acpi/ghes.c   | 46 
 hw/arm/virt-acpi-build.c | 15 ++---
 include/hw/acpi/ghes.h   | 14 ++--
 3 files changed, 52 insertions(+), 23 deletions(-)

diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
index b709c177cdea..e49a03fdb94e 100644
--- a/hw/acpi/ghes.c
+++ b/hw/acpi/ghes.c
@@ -206,7 +206,8 @@ ghes_gen_err_data_uncorrectable_recoverable(GArray *block,
  * Initialize "etc/hardware_errors" and "etc/hardware_errors_addr" fw_cfg 
blobs.
  * See docs/specs/acpi_hest_ghes.rst for blobs format.
  */
-static void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker)
+static void build_ghes_error_table(AcpiGhesState *ags, GArray *hardware_errors,
+   BIOSLinker *linker)
 {
 int i, error_status_block_offset;
 
@@ -251,13 +252,15 @@ static void build_ghes_error_table(GArray 
*hardware_errors, BIOSLinker *linker)
i * ACPI_GHES_MAX_RAW_DATA_LENGTH);
 }
 
-/*
- * tell firmware to write hardware_errors GPA into
- * hardware_errors_addr fw_cfg, once the former has been initialized.
- */
-bios_linker_loader_write_pointer(linker, ACPI_HW_ERROR_ADDR_FW_CFG_FILE, 0,
- sizeof(uint64_t),
- ACPI_HW_ERROR_FW_CFG_FILE, 0);
+if (!ags->use_hest_addr) {
+/*
+ * Tell firmware to write hardware_errors GPA into
+ * hardware_errors_addr fw_cfg, once the former has been initialized.
+ */
+bios_linker_loader_write_pointer(linker, 
ACPI_HW_ERROR_ADDR_FW_CFG_FILE,
+ 0, sizeof(uint64_t),
+ ACPI_HW_ERROR_FW_CFG_FILE, 0);
+}
 }
 
 /* Build Generic Hardware Error Source version 2 (GHESv2) */
@@ -331,14 +334,15 @@ static void build_ghes_v2(GArray *table_data,
 }
 
 /* Build Hardware Error Source Table */
-void acpi_build_hest(GArray *table_data, GArray *hardware_errors,
+void acpi_build_hest(AcpiGhesState *ags, GArray *table_data,
+ GArray *hardware_errors,
  BIOSLinker *linker,
  const char *oem_id, const char *oem_table_id)
 {
 AcpiTable table = { .sig = "HEST", .rev = 1,
 .oem_id = oem_id, .oem_table_id = oem_table_id };
 
-build_ghes_error_table(hardware_errors, linker);
+build_ghes_error_table(ags, hardware_errors, linker);
 
 acpi_table_begin(&table, table_data);
 
@@ -357,11 +361,11 @@ void acpi_ghes_add_fw_cfg(AcpiGhesState *ags, FWCfgState 
*s,
 fw_cfg_add_file(s, ACPI_HW_ERROR_FW_CFG_FILE, hardware_error->data,
 hardware_error->len);
 
-/* Create a read-write fw_cfg file for Address */
-fw_cfg_add_file_callback(s, ACPI_HW_ERROR_ADDR_FW_CFG_FILE, NULL, NULL,
-NULL, &(ags->hw_error_le), sizeof(ags->hw_error_le), false);
-
-ags->present = true;
+if (!ags->use_hest_addr) {
+/* Create a read-write fw_cfg file for Address */
+fw_cfg_add_file_callback(s, ACPI_HW_ERROR_ADDR_FW_CFG_FILE, NULL, NULL,
+NULL, &(ags->hw_error_le), sizeof(ags->hw_error_le), false);
+}
 }
 
 static void get_hw_error_offsets(uint64_t ghes_addr,
@@ -411,8 +415,11 @@ void ghes_record_cper_errors(const void *cper, size_t len,
 ags = &acpi_ged_state->ghes_state;
 
 assert(ACPI_GHES_ERROR_SOURCE_COUNT == 1);
-get_hw_error_offsets(le64_to_cpu(ags->hw_error_le),
- &cper_addr, &read_ack_register_addr);
+
+if (!ags->use_hest_addr) {
+get_hw_error_offsets(le64_to_cpu(ags->hw_error_le),
+ &cper_addr, &read_ack_register_addr);
+}
 
 if (!cper_addr) {
 error_setg(errp, "can not find Generic Error Status Block");
@@ -494,5 +501,8 @@ bool acpi_ghes_present(void)
 return false;
 }
 ags = &acpi_ged_state->ghes_state;
-return ags->present;
+if (!ags->hw_error_le)
+return false;
+
+return true;
 }
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 3ac8f8e17861..8ab8d11b6536 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -946,9 +946,18 @@ void virt_acpi_build(VirtMachineState *vms, 
AcpiBuildTables *tables)
 build_dbg2(tables_blob, tables->linker, vms);
 
 if (vms->ras) {
-acpi_add_table(table_offsets, tables_blob);
-acpi_build_hest(tables_blob, tables->har

[PATCH v4 00/14] Change ghes to use HEST-based offsets and add support for error inject

2025-02-21 Thread Mauro Carvalho Chehab

Now that the ghes preparation patches were merged, let's add support
for error injection.

On this series, the first 6 patches chang to the math used to calculate offsets 
at HEST
table and hardware_error firmware file, together with its migration code. 
Migration tested
with both latest QEMU released kernel and upstream, on both directions.

The next patches add a new QAPI to allow injecting GHESv2 errors, and a script 
using such QAPI
   to inject ARM Processor Error records.

---
v4:
- added an extra comment for AcpiGhesState structure;
- patches reordered;
- no functional changes, just code shift between the patches in this series.

v3:
- addressed more nits;
- hest_add_le now points to the beginning of HEST table;
- removed HEST from tests/data/acpi;
- added an extra patch to not use fw_cfg with virt-10.0 for hw_error_le

v2: 
- address some nits;
- improved ags cleanup patch and removed ags.present field;
- added some missing le*_to_cpu() calls;
- update date at copyright for new files to 2024-2025;
- qmp command changed to: inject-ghes-v2-error ans since updated to 10.0;
- added HEST and DSDT tables after the changes to make check target happy.
  (two patches: first one whitelisting such tables; second one removing from
   whitelist and updating/adding such tables to tests/data/acpi)



Mauro Carvalho Chehab (14):
  acpi/ghes: prepare to change the way HEST offsets are calculated
  acpi/ghes: add a firmware file with HEST address
  acpi/ghes: Use HEST table offsets when preparing GHES records
  acpi/ghes: don't hard-code the number of sources for HEST table
  acpi/ghes: add a notifier to notify when error data is ready
  acpi/ghes: create an ancillary acpi_ghes_get_state() function
  acpi/generic_event_device: Update GHES migration to cover hest addr
  acpi/generic_event_device: add logic to detect if HEST addr is
available
  acpi/generic_event_device: add an APEI error device
  tests/acpi: virt: allow acpi table changes for a new table: HEST
  arm/virt: Wire up a GED error device for ACPI / GHES
  tests/acpi: virt: add a HEST table to aarch64 virt and update DSDT
  qapi/acpi-hest: add an interface to do generic CPER error injection
  scripts/ghes_inject: add a script to generate GHES error inject

 MAINTAINERS   |  10 +
 hw/acpi/Kconfig   |   5 +
 hw/acpi/aml-build.c   |  10 +
 hw/acpi/generic_event_device.c|  43 ++
 hw/acpi/ghes-stub.c   |   7 +-
 hw/acpi/ghes.c| 231 --
 hw/acpi/ghes_cper.c   |  38 +
 hw/acpi/ghes_cper_stub.c  |  19 +
 hw/acpi/meson.build   |   2 +
 hw/arm/virt-acpi-build.c  |  37 +-
 hw/arm/virt.c |  19 +-
 hw/core/machine.c |   2 +
 include/hw/acpi/acpi_dev_interface.h  |   1 +
 include/hw/acpi/aml-build.h   |   2 +
 include/hw/acpi/generic_event_device.h|   1 +
 include/hw/acpi/ghes.h|  54 +-
 include/hw/arm/virt.h |   2 +
 qapi/acpi-hest.json   |  35 +
 qapi/meson.build  |   1 +
 qapi/qapi-schema.json |   1 +
 scripts/arm_processor_error.py| 476 
 scripts/ghes_inject.py|  51 ++
 scripts/qmp_helper.py | 702 ++
 target/arm/kvm.c  |   7 +-
 tests/data/acpi/aarch64/virt/DSDT | Bin 5196 -> 5240 bytes
 .../data/acpi/aarch64/virt/DSDT.acpihmatvirt  | Bin 5282 -> 5326 bytes
 tests/data/acpi/aarch64/virt/DSDT.memhp   | Bin 6557 -> 6601 bytes
 tests/data/acpi/aarch64/virt/DSDT.pxb | Bin 7679 -> 7723 bytes
 tests/data/acpi/aarch64/virt/DSDT.topology| Bin 5398 -> 5442 bytes
 29 files changed, 1677 insertions(+), 79 deletions(-)
 create mode 100644 hw/acpi/ghes_cper.c
 create mode 100644 hw/acpi/ghes_cper_stub.c
 create mode 100644 qapi/acpi-hest.json
 create mode 100644 scripts/arm_processor_error.py
 create mode 100755 scripts/ghes_inject.py
 create mode 100755 scripts/qmp_helper.py

-- 
2.48.1

[PATCH v4 09/14] acpi/generic_event_device: add an APEI error device

2025-02-21 Thread Mauro Carvalho Chehab

Adds a generic error device to handle generic hardware error
events as specified at ACPI 6.5 specification at 18.3.2.7.2:
https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#event-notification-for-generic-error-sources
using HID PNP0C33.

The PNP0C33 device is used to report hardware errors to
the guest via ACPI APEI Generic Hardware Error Source (GHES).

Co-authored-by: Mauro Carvalho Chehab 
Co-authored-by: Jonathan Cameron 
Signed-off-by: Jonathan Cameron 
Signed-off-by: Mauro Carvalho Chehab 
Reviewed-by: Igor Mammedov 
---
 hw/acpi/aml-build.c| 10 ++
 hw/acpi/generic_event_device.c | 13 +
 include/hw/acpi/acpi_dev_interface.h   |  1 +
 include/hw/acpi/aml-build.h|  2 ++
 include/hw/acpi/generic_event_device.h |  1 +
 5 files changed, 27 insertions(+)

diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index f8f93a9f66c8..e4bd7b611372 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -2614,3 +2614,13 @@ Aml *aml_i2c_serial_bus_device(uint16_t address, const 
char *resource_source)
 
 return var;
 }
+
+/* ACPI 5.0b: 18.3.2.6.2 Event Notification For Generic Error Sources */
+Aml *aml_error_device(void)
+{
+Aml *dev = aml_device(ACPI_APEI_ERROR_DEVICE);
+aml_append(dev, aml_name_decl("_HID", aml_string("PNP0C33")));
+aml_append(dev, aml_name_decl("_UID", aml_int(0)));
+
+return dev;
+}
diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c
index 14d8513a5440..180eebbce1cd 100644
--- a/hw/acpi/generic_event_device.c
+++ b/hw/acpi/generic_event_device.c
@@ -26,6 +26,7 @@ static const uint32_t ged_supported_events[] = {
 ACPI_GED_PWR_DOWN_EVT,
 ACPI_GED_NVDIMM_HOTPLUG_EVT,
 ACPI_GED_CPU_HOTPLUG_EVT,
+ACPI_GED_ERROR_EVT,
 };
 
 /*
@@ -116,6 +117,16 @@ void build_ged_aml(Aml *table, const char *name, 
HotplugHandler *hotplug_dev,
aml_notify(aml_name(ACPI_POWER_BUTTON_DEVICE),
   aml_int(0x80)));
 break;
+case ACPI_GED_ERROR_EVT:
+/*
+ * ACPI 5.0b: 5.6.6 Device Object Notifications
+ * Table 5-135 Error Device Notification Values
+ * Defines 0x80 as the value to be used on notifications
+ */
+aml_append(if_ctx,
+   aml_notify(aml_name(ACPI_APEI_ERROR_DEVICE),
+  aml_int(0x80)));
+break;
 case ACPI_GED_NVDIMM_HOTPLUG_EVT:
 aml_append(if_ctx,
aml_notify(aml_name("\\_SB.NVDR"),
@@ -295,6 +306,8 @@ static void acpi_ged_send_event(AcpiDeviceIf *adev, 
AcpiEventStatusBits ev)
 sel = ACPI_GED_MEM_HOTPLUG_EVT;
 } else if (ev & ACPI_POWER_DOWN_STATUS) {
 sel = ACPI_GED_PWR_DOWN_EVT;
+} else if (ev & ACPI_GENERIC_ERROR) {
+sel = ACPI_GED_ERROR_EVT;
 } else if (ev & ACPI_NVDIMM_HOTPLUG_STATUS) {
 sel = ACPI_GED_NVDIMM_HOTPLUG_EVT;
 } else if (ev & ACPI_CPU_HOTPLUG_STATUS) {
diff --git a/include/hw/acpi/acpi_dev_interface.h 
b/include/hw/acpi/acpi_dev_interface.h
index 68d9d15f50aa..8294f8f0ccca 100644
--- a/include/hw/acpi/acpi_dev_interface.h
+++ b/include/hw/acpi/acpi_dev_interface.h
@@ -13,6 +13,7 @@ typedef enum {
 ACPI_NVDIMM_HOTPLUG_STATUS = 16,
 ACPI_VMGENID_CHANGE_STATUS = 32,
 ACPI_POWER_DOWN_STATUS = 64,
+ACPI_GENERIC_ERROR = 128,
 } AcpiEventStatusBits;
 
 #define TYPE_ACPI_DEVICE_IF "acpi-device-interface"
diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h
index c18f68134246..f38e12971932 100644
--- a/include/hw/acpi/aml-build.h
+++ b/include/hw/acpi/aml-build.h
@@ -252,6 +252,7 @@ struct CrsRangeSet {
 /* Consumer/Producer */
 #define AML_SERIAL_BUS_FLAG_CONSUME_ONLY(1 << 1)
 
+#define ACPI_APEI_ERROR_DEVICE   "GEDD"
 /**
  * init_aml_allocator:
  *
@@ -382,6 +383,7 @@ Aml *aml_dma(AmlDmaType typ, AmlDmaBusMaster bm, 
AmlTransferSize sz,
  uint8_t channel);
 Aml *aml_sleep(uint64_t msec);
 Aml *aml_i2c_serial_bus_device(uint16_t address, const char *resource_source);
+Aml *aml_error_device(void);
 
 /* Block AML object primitives */
 Aml *aml_scope(const char *name_format, ...) G_GNUC_PRINTF(1, 2);
diff --git a/include/hw/acpi/generic_event_device.h 
b/include/hw/acpi/generic_event_device.h
index d2dac87b4a9f..1c18ac296fcb 100644
--- a/include/hw/acpi/generic_event_device.h
+++ b/include/hw/acpi/generic_event_device.h
@@ -101,6 +101,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(AcpiGedState, ACPI_GED)
 #define ACPI_GED_PWR_DOWN_EVT  0x2
 #define ACPI_GED_NVDIMM_HOTPLUG_EVT 0x4
 #define ACPI_GED_CPU_HOTPLUG_EVT0x8
+#define ACPI_GED_ERROR_EVT  0x10
 
 typedef struct GEDState {
 MemoryRegion evt;
-- 
2.48.1

[PATCH v4 02/14] acpi/ghes: add a firmware file with HEST address

2025-02-21 Thread Mauro Carvalho Chehab

Store HEST table address at GPA, placing its the start of the table at
hest_addr_le variable.

Signed-off-by: Mauro Carvalho Chehab 
Reviewed-by: Jonathan Cameron 
Reviewed-by: Igor Mammedov 
---
 hw/acpi/ghes.c | 22 --
 include/hw/acpi/ghes.h |  7 ++-
 2 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
index e49a03fdb94e..ba37be9e7022 100644
--- a/hw/acpi/ghes.c
+++ b/hw/acpi/ghes.c
@@ -30,6 +30,7 @@
 
 #define ACPI_HW_ERROR_FW_CFG_FILE   "etc/hardware_errors"
 #define ACPI_HW_ERROR_ADDR_FW_CFG_FILE  "etc/hardware_errors_addr"
+#define ACPI_HEST_ADDR_FW_CFG_FILE  "etc/acpi_table_hest_addr"
 
 /* The max size in bytes for one error block */
 #define ACPI_GHES_MAX_RAW_DATA_LENGTH   (1 * KiB)
@@ -341,6 +342,9 @@ void acpi_build_hest(AcpiGhesState *ags, GArray *table_data,
 {
 AcpiTable table = { .sig = "HEST", .rev = 1,
 .oem_id = oem_id, .oem_table_id = oem_table_id };
+uint32_t hest_offset;
+
+hest_offset = table_data->len;
 
 build_ghes_error_table(ags, hardware_errors, linker);
 
@@ -352,6 +356,17 @@ void acpi_build_hest(AcpiGhesState *ags, GArray 
*table_data,
   ACPI_GHES_NOTIFY_SEA, ACPI_HEST_SRC_ID_SEA);
 
 acpi_table_end(linker, &table);
+
+if (ags->use_hest_addr) {
+/*
+ * Tell firmware to write into GPA the address of HEST via fw_cfg,
+ * once initialized.
+ */
+bios_linker_loader_write_pointer(linker,
+ ACPI_HEST_ADDR_FW_CFG_FILE, 0,
+ sizeof(uint64_t),
+ ACPI_BUILD_TABLE_FILE, hest_offset);
+}
 }
 
 void acpi_ghes_add_fw_cfg(AcpiGhesState *ags, FWCfgState *s,
@@ -361,7 +376,10 @@ void acpi_ghes_add_fw_cfg(AcpiGhesState *ags, FWCfgState 
*s,
 fw_cfg_add_file(s, ACPI_HW_ERROR_FW_CFG_FILE, hardware_error->data,
 hardware_error->len);
 
-if (!ags->use_hest_addr) {
+if (ags->use_hest_addr) {
+fw_cfg_add_file_callback(s, ACPI_HEST_ADDR_FW_CFG_FILE, NULL, NULL,
+NULL, &(ags->hest_addr_le), sizeof(ags->hest_addr_le), false);
+} else {
 /* Create a read-write fw_cfg file for Address */
 fw_cfg_add_file_callback(s, ACPI_HW_ERROR_ADDR_FW_CFG_FILE, NULL, NULL,
 NULL, &(ags->hw_error_le), sizeof(ags->hw_error_le), false);
@@ -501,7 +519,7 @@ bool acpi_ghes_present(void)
 return false;
 }
 ags = &acpi_ged_state->ghes_state;
-if (!ags->hw_error_le)
+if (!ags->hw_error_le && !ags->hest_addr_le)
 return false;
 
 return true;
diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h
index a3d62b96584f..454e97b5341c 100644
--- a/include/hw/acpi/ghes.h
+++ b/include/hw/acpi/ghes.h
@@ -71,9 +71,14 @@ enum {
  * meaning an offset from the etc/hardware_errors firmware address. This
  * is the default on QEMU 9.x.
  *
- * An offset value equal to zero means that GHES is not present.
+ * When use_hest_addr is true, the stored offset is placed at hest_addr_le,
+ * meaning an offset from theHEST table address from etc/acpi/tables firmware.
+ * This is the default for QEMU 10.x and above.
+ *
+ * If both offset values are equal to zero, it means that GHES is not present.
  */
 typedef struct AcpiGhesState {
+uint64_t hest_addr_le;
 uint64_t hw_error_le;
 bool use_hest_addr; /* Currently, always false */
 } AcpiGhesState;
-- 
2.48.1

[RFC PATCH v1 09/19] target/i386/hvf: use emul_ops->read_mem in x86_emu.c

2025-02-21 Thread Wei Liu

No functional change.

Signed-off-by: Wei Liu 
---
 target/i386/hvf/x86_emu.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/target/i386/hvf/x86_emu.c b/target/i386/hvf/x86_emu.c
index d0a8e221ea..f1244640e6 100644
--- a/target/i386/hvf/x86_emu.c
+++ b/target/i386/hvf/x86_emu.c
@@ -183,7 +183,7 @@ void write_val_ext(CPUX86State *env, target_ulong ptr, 
target_ulong val, int siz
 
 uint8_t *read_mmio(CPUX86State *env, target_ulong ptr, int bytes)
 {
-vmx_read_mem(env_cpu(env), env->mmio_buf, ptr, bytes);
+emul_ops->read_mem(env_cpu(env), env->mmio_buf, ptr, bytes);
 return env->mmio_buf;
 }
 
@@ -509,8 +509,8 @@ static void exec_outs_single(CPUX86State *env, struct 
x86_decode *decode)
 {
 target_ulong addr = decode_linear_addr(env, decode, RSI(env), R_DS);
 
-vmx_read_mem(env_cpu(env), env->mmio_buf, addr,
- decode->operand_size);
+emul_ops->read_mem(env_cpu(env), env->mmio_buf, addr,
+   decode->operand_size);
 emul_ops->handle_io(env_cpu(env), DX(env), env->mmio_buf, 1,
 decode->operand_size, 1);
 
@@ -619,7 +619,7 @@ static void exec_scas_single(CPUX86State *env, struct 
x86_decode *decode)
 addr = linear_addr_size(env_cpu(env), RDI(env),
 decode->addressing_size, R_ES);
 decode->op[1].type = X86_VAR_IMMEDIATE;
-vmx_read_mem(env_cpu(env), &decode->op[1].val, addr, decode->operand_size);
+emul_ops->read_mem(env_cpu(env), &decode->op[1].val, addr, 
decode->operand_size);
 
 EXEC_2OP_FLAGS_CMD(env, decode, -, SET_FLAGS_OSZAPC_SUB, false);
 string_increment_reg(env, R_EDI, decode);
@@ -644,7 +644,7 @@ static void exec_lods_single(CPUX86State *env, struct 
x86_decode *decode)
 target_ulong val = 0;
 
 addr = decode_linear_addr(env, decode, RSI(env), R_DS);
-vmx_read_mem(env_cpu(env), &val, addr,  decode->operand_size);
+emul_ops->read_mem(env_cpu(env), &val, addr,  decode->operand_size);
 write_reg(env, R_EAX, val, decode->operand_size);
 
 string_increment_reg(env, R_ESI, decode);
-- 
2.39.5 (Apple Git-154)

[RFC PATCH v1 05/19] target/i386/hvf: remove HVF specific calls from x86_decode.c

2025-02-21 Thread Wei Liu

Use the newly defined emul_ops. This allows the module to be reused
by other accelerator in the future.

No functional change intended.

Signed-off-by: Wei Liu 
---
 target/i386/hvf/x86_decode.c | 19 ++-
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/target/i386/hvf/x86_decode.c b/target/i386/hvf/x86_decode.c
index d6d5894e54..31285952ad 100644
--- a/target/i386/hvf/x86_decode.c
+++ b/target/i386/hvf/x86_decode.c
@@ -21,6 +21,7 @@
 #include "panic.h"
 #include "x86_decode.h"
 #include "vmx.h"
+#include "x86_emu.h"
 #include "x86_mmu.h"
 #include "x86_descr.h"
 
@@ -74,7 +75,7 @@ static inline uint64_t decode_bytes(CPUX86State *env, struct 
x86_decode *decode,
 break;
 }
 target_ulong va  = linear_rip(env_cpu(env), env->eip) + decode->len;
-vmx_read_mem(env_cpu(env), &val, va, size);
+emul_ops->read_mem(env_cpu(env), &val, va, size);
 decode->len += size;
 
 return val;
@@ -1893,16 +1894,6 @@ static void decode_prefix(CPUX86State *env, struct 
x86_decode *decode)
 }
 }
 
-static struct x86_segment_descriptor get_cs_descriptor(CPUState *s)
-{
-struct vmx_segment vmx_cs;
-x86_segment_descriptor cs;
-vmx_read_segment_descriptor(s, &vmx_cs, R_CS);
-vmx_segment_to_x86_descriptor(s, &vmx_cs, &cs);
-
-return cs;
-}
-
 void set_addressing_size(CPUX86State *env, struct x86_decode *decode)
 {
 decode->addressing_size = -1;
@@ -1914,7 +1905,8 @@ void set_addressing_size(CPUX86State *env, struct 
x86_decode *decode)
 }
 } else if (!x86_is_long_mode(env_cpu(env))) {
 /* protected */
-x86_segment_descriptor cs = get_cs_descriptor(env_cpu(env));
+x86_segment_descriptor cs;
+emul_ops->read_segment_descriptor(env_cpu(env), &cs, R_CS);
 /* check db */
 if (cs.db) {
 if (decode->addr_size_override) {
@@ -1950,7 +1942,8 @@ void set_operand_size(CPUX86State *env, struct x86_decode 
*decode)
 }
 } else if (!x86_is_long_mode(env_cpu(env))) {
 /* protected */
-x86_segment_descriptor cs = get_cs_descriptor(env_cpu(env));
+x86_segment_descriptor cs;
+emul_ops->read_segment_descriptor(env_cpu(env), &cs, R_CS);
 /* check db */
 if (cs.db) {
 if (decode->op_size_override) {
-- 
2.39.5 (Apple Git-154)

Re: [PATCH 00/10] fpu: Remove remaining target ifdefs and build only once

2025-02-21 Thread Philippe Mathieu-Daudé


On 17/2/25 13:50, Peter Maydell wrote:


(1) floatx80 behaviours

Two QEMU targets implement floatx80: x86 and m68k. (PPC also has one
use in the xsrqpxp round-to-80-bit-precision operation, and the
Linux-user NWFPE emulation nominally supports it, but these are
minor.) x86 and m68k disagree about some of the corner cases of
floatx80 where the value has the explicit Integer bit wrongly set.  At
the moment the fpu code defaults to "floatx80 behaves like x86", with
TARGET_M68K ifdefs to get the other option.

The first six patches in this series remove those ifdefs, replacing
them with a floatx80_behaviour field in float_status which can have
various flags set to select the individual behaviours. The default is
"like x86", which allows us to set these only for m68k and not worry
about the minor "technically makes some use of floatx80" cases.




Peter Maydell (10):
   fpu: Make targets specify floatx80 default Inf at runtime
   target/m68k: Avoid using floatx80_infinity global const
   target/i386: Avoid using floatx80_infinity global const


Bothering again, we can add the floatx80_default_inf() refactor as the
first patch:

-- >8 --
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index 09a40b43106..afae3906024 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -963,0 +964 @@ extern const floatx80 floatx80_infinity;
+floatx80 floatx80_default_inf(bool zSign, float_status *status);
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index f4fed9bfda9..f56ae886c53 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -5147,3 +5147 @@ floatx80 roundAndPackFloatx80(FloatX80RoundPrec 
roundingPrecision, bool zSign,

-return packFloatx80(zSign,
-floatx80_infinity_high,
-floatx80_infinity_low);
+return floatx80_default_inf(zSign, status);
diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
index f112c6c6737..741af09f908 100644
--- a/target/i386/tcg/fpu_helper.c
+++ b/target/i386/tcg/fpu_helper.c
@@ -1835 +1835 @@ void helper_fxtract(CPUX86State *env)
-ST1 = floatx80_infinity;
+ST1 = floatx80_default_inf(0, &env->fp_status);
@@ -2361,3 +2361,2 @@ void helper_fscale(CPUX86State *env)
-ST0 = (floatx80_is_neg(ST0) ?
-   floatx80_chs(floatx80_infinity) :
-   floatx80_infinity);
+ST0 = floatx80_default_inf(floatx80_is_neg(ST0),
+   &env->fp_status);
diff --git a/target/m68k/softfloat.c b/target/m68k/softfloat.c
index 02dcc03d15d..d1f150e641f 100644
--- a/target/m68k/softfloat.c
+++ b/target/m68k/softfloat.c
@@ -145,2 +145 @@ floatx80 floatx80_scale(floatx80 a, floatx80 b, 
float_status *status)

-return packFloatx80(aSign, floatx80_infinity.high,
-floatx80_infinity.low);
+return floatx80_default_inf(aSign, status);
@@ -248 +247 @@ floatx80 floatx80_lognp1(floatx80 a, float_status *status)
-return packFloatx80(0, floatx80_infinity.high, 
floatx80_infinity.low);

+return floatx80_default_inf(0, status);
@@ -258,2 +257 @@ floatx80 floatx80_lognp1(floatx80 a, float_status *status)
-return packFloatx80(aSign, floatx80_infinity.high,
-floatx80_infinity.low);
+return floatx80_default_inf(aSign, status);
@@ -445,2 +443 @@ floatx80 floatx80_logn(floatx80 a, float_status *status)
-return packFloatx80(0, floatx80_infinity.high,
-floatx80_infinity.low);
+return floatx80_default_inf(0, status);
@@ -455,2 +452 @@ floatx80 floatx80_logn(floatx80 a, float_status *status)
-return packFloatx80(1, floatx80_infinity.high,
-floatx80_infinity.low);
+return floatx80_default_inf(1, status);
@@ -613,2 +609 @@ floatx80 floatx80_log10(floatx80 a, float_status *status)
-return packFloatx80(0, floatx80_infinity.high,
-floatx80_infinity.low);
+return floatx80_default_inf(0, status);
@@ -620,2 +615 @@ floatx80 floatx80_log10(floatx80 a, float_status *status)
-return packFloatx80(1, floatx80_infinity.high,
-floatx80_infinity.low);
+return floatx80_default_inf(1, status);
@@ -671,2 +665 @@ floatx80 floatx80_log2(floatx80 a, float_status *status)
-return packFloatx80(0, floatx80_infinity.high,
-floatx80_infinity.low);
+return floatx80_default_inf(0, status);
@@ -679,2 +672 @@ floatx80 floatx80_log2(floatx80 a, float_status *status)
-return packFloatx80(1, floatx80_infinity.high,
-floatx80_infinity.low);
+return floatx80_default_inf(1, status);
@@ -743,2 +735 @@ floatx80 floatx80_etox(floatx80 a, float_status *status)
-return packFloatx80(0, floatx80_in

[PATCH v4 03/14] acpi/ghes: Use HEST table offsets when preparing GHES records

2025-02-21 Thread Mauro Carvalho Chehab

There are two pointers that are needed during error injection:

1. The start address of the CPER block to be stored;
2. The address of the ack.

It is preferable to calculate them from the HEST table.  This allows
checking the source ID, the size of the table and the type of the
HEST error block structures.

Yet, keep the old code, as this is needed for migration purposes.

Signed-off-by: Mauro Carvalho Chehab 
Reviewed-by: Jonathan Cameron 
---
 hw/acpi/ghes.c | 100 +
 include/hw/acpi/ghes.h |   2 +-
 2 files changed, 101 insertions(+), 1 deletion(-)

diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
index ba37be9e7022..7efea519f766 100644
--- a/hw/acpi/ghes.c
+++ b/hw/acpi/ghes.c
@@ -41,6 +41,12 @@
 /* Address offset in Generic Address Structure(GAS) */
 #define GAS_ADDR_OFFSET 4
 
+/*
+ * ACPI spec 1.0b
+ * 5.2.3 System Description Table Header
+ */
+#define ACPI_DESC_HEADER_OFFSET 36
+
 /*
  * The total size of Generic Error Data Entry
  * ACPI 6.1/6.2: 18.3.2.7.1 Generic Error Data,
@@ -61,6 +67,30 @@
  */
 #define ACPI_GHES_GESB_SIZE 20
 
+/*
+ * See the memory layout map at docs/specs/acpi_hest_ghes.rst.
+ */
+
+/*
+ * ACPI 6.1: 18.3.2.8 Generic Hardware Error Source version 2
+ * Table 18-344 Generic Hardware Error Source version 2 (GHESv2) Structure
+ */
+#define HEST_GHES_V2_ENTRY_SIZE  92
+
+/*
+ * ACPI 6.1: 18.3.2.7: Generic Hardware Error Source
+ * Table 18-344 Generic Hardware Error Source version 2 (GHESv2) Structure
+ * Read Ack Register
+ */
+#define GHES_READ_ACK_ADDR_OFF  64
+
+/*
+ * ACPI 6.1: 18.3.2.7: Generic Hardware Error Source
+ * Table 18-341 Generic Hardware Error Source Structure
+ * Error Status Address
+ */
+#define GHES_ERR_STATUS_ADDR_OFF  20
+
 /*
  * Values for error_severity field
  */
@@ -412,6 +442,73 @@ static void get_hw_error_offsets(uint64_t ghes_addr,
 *read_ack_register_addr = ghes_addr + sizeof(uint64_t);
 }
 
+static void get_ghes_source_offsets(uint16_t source_id,
+uint64_t hest_addr,
+uint64_t *cper_addr,
+uint64_t *read_ack_start_addr,
+Error **errp)
+{
+uint64_t hest_err_block_addr, hest_read_ack_addr;
+uint64_t err_source_entry, error_block_addr;
+uint32_t num_sources, i;
+
+hest_addr += ACPI_DESC_HEADER_OFFSET;
+
+cpu_physical_memory_read(hest_addr, &num_sources,
+ sizeof(num_sources));
+num_sources = le32_to_cpu(num_sources);
+
+err_source_entry = hest_addr + sizeof(num_sources);
+
+/*
+ * Currently, HEST Error source navigates only for GHESv2 tables
+ */
+for (i = 0; i < num_sources; i++) {
+uint64_t addr = err_source_entry;
+uint16_t type, src_id;
+
+cpu_physical_memory_read(addr, &type, sizeof(type));
+type = le16_to_cpu(type);
+
+/* For now, we only know the size of GHESv2 table */
+if (type != ACPI_GHES_SOURCE_GENERIC_ERROR_V2) {
+error_setg(errp, "HEST: type %d not supported.", type);
+return;
+}
+
+/* Compare CPER source address at the GHESv2 structure */
+addr += sizeof(type);
+cpu_physical_memory_read(addr, &src_id, sizeof(src_id));
+if (le16_to_cpu(src_id) == source_id) {
+break;
+}
+
+err_source_entry += HEST_GHES_V2_ENTRY_SIZE;
+}
+if (i == num_sources) {
+error_setg(errp, "HEST: Source %d not found.", source_id);
+return;
+}
+
+/* Navigate though table address pointers */
+hest_err_block_addr = err_source_entry + GHES_ERR_STATUS_ADDR_OFF +
+  GAS_ADDR_OFFSET;
+
+cpu_physical_memory_read(hest_err_block_addr, &error_block_addr,
+ sizeof(error_block_addr));
+error_block_addr = le64_to_cpu(error_block_addr);
+
+cpu_physical_memory_read(error_block_addr, cper_addr,
+ sizeof(*cper_addr));
+*cper_addr = le64_to_cpu(*cper_addr);
+
+hest_read_ack_addr = err_source_entry + GHES_READ_ACK_ADDR_OFF +
+ GAS_ADDR_OFFSET;
+cpu_physical_memory_read(hest_read_ack_addr, read_ack_start_addr,
+ sizeof(*read_ack_start_addr));
+*read_ack_start_addr = le64_to_cpu(*read_ack_start_addr);
+}
+
 void ghes_record_cper_errors(const void *cper, size_t len,
  uint16_t source_id, Error **errp)
 {
@@ -437,6 +534,9 @@ void ghes_record_cper_errors(const void *cper, size_t len,
 if (!ags->use_hest_addr) {
 get_hw_error_offsets(le64_to_cpu(ags->hw_error_le),
  &cper_addr, &read_ack_register_addr);
+} else {
+get_ghes_source_offsets(source_id, le64_to_cpu(ags->hest_addr_le),
+&cper_addr, &read_ack_register_addr, errp);
 }
 
 if (!cper_addr) {
dif

[PATCH v4 04/14] acpi/ghes: don't hard-code the number of sources for HEST table

2025-02-21 Thread Mauro Carvalho Chehab

The current code is actually dependent on having just one error
structure with a single source, as any change there would cause
migration issues.

As the number of sources should be arch-dependent, as it will depend on
what kind of notifications will exist, and how many errors can be
reported at the same time, change the logic to be more flexible,
allowing the number of sources to be defined when building the
HEST table by the caller.

Signed-off-by: Mauro Carvalho Chehab 
Reviewed-by: Jonathan Cameron 
Reviewed-by: Igor Mammedov 
---
 hw/acpi/ghes.c   | 38 +-
 hw/arm/virt-acpi-build.c |  8 +++-
 include/hw/acpi/ghes.h   | 17 -
 3 files changed, 40 insertions(+), 23 deletions(-)

diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
index 7efea519f766..4a4ea8f4be90 100644
--- a/hw/acpi/ghes.c
+++ b/hw/acpi/ghes.c
@@ -238,17 +238,17 @@ ghes_gen_err_data_uncorrectable_recoverable(GArray *block,
  * See docs/specs/acpi_hest_ghes.rst for blobs format.
  */
 static void build_ghes_error_table(AcpiGhesState *ags, GArray *hardware_errors,
-   BIOSLinker *linker)
+   BIOSLinker *linker, int num_sources)
 {
 int i, error_status_block_offset;
 
 /* Build error_block_address */
-for (i = 0; i < ACPI_GHES_ERROR_SOURCE_COUNT; i++) {
+for (i = 0; i < num_sources; i++) {
 build_append_int_noprefix(hardware_errors, 0, sizeof(uint64_t));
 }
 
 /* Build read_ack_register */
-for (i = 0; i < ACPI_GHES_ERROR_SOURCE_COUNT; i++) {
+for (i = 0; i < num_sources; i++) {
 /*
  * Initialize the value of read_ack_register to 1, so GHES can be
  * writable after (re)boot.
@@ -263,13 +263,13 @@ static void build_ghes_error_table(AcpiGhesState *ags, 
GArray *hardware_errors,
 
 /* Reserve space for Error Status Data Block */
 acpi_data_push(hardware_errors,
-ACPI_GHES_MAX_RAW_DATA_LENGTH * ACPI_GHES_ERROR_SOURCE_COUNT);
+ACPI_GHES_MAX_RAW_DATA_LENGTH * num_sources);
 
 /* Tell guest firmware to place hardware_errors blob into RAM */
 bios_linker_loader_alloc(linker, ACPI_HW_ERROR_FW_CFG_FILE,
  hardware_errors, sizeof(uint64_t), false);
 
-for (i = 0; i < ACPI_GHES_ERROR_SOURCE_COUNT; i++) {
+for (i = 0; i < num_sources; i++) {
 /*
  * Tell firmware to patch error_block_address entries to point to
  * corresponding "Generic Error Status Block"
@@ -295,12 +295,14 @@ static void build_ghes_error_table(AcpiGhesState *ags, 
GArray *hardware_errors,
 }
 
 /* Build Generic Hardware Error Source version 2 (GHESv2) */
-static void build_ghes_v2(GArray *table_data,
-  BIOSLinker *linker,
-  enum AcpiGhesNotifyType notify,
-  uint16_t source_id)
+static void build_ghes_v2_entry(GArray *table_data,
+BIOSLinker *linker,
+const AcpiNotificationSourceId *notif_src,
+uint16_t index, int num_sources)
 {
 uint64_t address_offset;
+const uint16_t notify = notif_src->notify;
+const uint16_t source_id = notif_src->source_id;
 
 /*
  * Type:
@@ -331,7 +333,7 @@ static void build_ghes_v2(GArray *table_data,
address_offset + GAS_ADDR_OFFSET,
sizeof(uint64_t),
ACPI_HW_ERROR_FW_CFG_FILE,
-   source_id * sizeof(uint64_t));
+   index * sizeof(uint64_t));
 
 /* Notification Structure */
 build_ghes_hw_error_notification(table_data, notify);
@@ -351,8 +353,7 @@ static void build_ghes_v2(GArray *table_data,
address_offset + GAS_ADDR_OFFSET,
sizeof(uint64_t),
ACPI_HW_ERROR_FW_CFG_FILE,
-   (ACPI_GHES_ERROR_SOURCE_COUNT + source_id)
-   * sizeof(uint64_t));
+   (num_sources + index) * sizeof(uint64_t));
 
 /*
  * Read Ack Preserve field
@@ -368,22 +369,26 @@ static void build_ghes_v2(GArray *table_data,
 void acpi_build_hest(AcpiGhesState *ags, GArray *table_data,
  GArray *hardware_errors,
  BIOSLinker *linker,
+ const AcpiNotificationSourceId *notif_source,
+ int num_sources,
  const char *oem_id, const char *oem_table_id)
 {
 AcpiTable table = { .sig = "HEST", .rev = 1,
 .oem_id = oem_id, .oem_table_id = oem_table_id };
 uint32_t hest_offset;
+int i;
 
 hest_offset = table_data->len;
 
-build_ghes_error_table(ags, hardware_errors, linker);
+build_ghes_error_table(ags, hardware_erro

[PATCH v6] hw/arm/virt: Support larger highmem MMIO regions

2025-02-21 Thread Matthew R. Ochs

The MMIO region size required to support virtualized environments with
large PCI BAR regions can exceed the hardcoded limit configured in QEMU.
For example, a VM with multiple NVIDIA Grace-Hopper GPUs passed through
requires more MMIO memory than the amount provided by VIRT_HIGH_PCIE_MMIO
(currently 512GB). Instead of updating VIRT_HIGH_PCIE_MMIO, introduce a
new parameter, highmem-mmio-size, that specifies the MMIO size required
to support the VM configuration.

Example usage with 1TB MMIO region size:
-machine virt,gic-version=3,highmem-mmio-size=1T

Signed-off-by: Matthew R. Ochs 
Reviewed-by: Gavin Shan 
Reviewed-by: Shameer Kolothum 
Reviewed-by: Eric Auger 
Reviewed-by: Nicolin Chen 
---
v6: - Fixed minor coding style nit
v5: - Removed hyphens from power of 2
- Consistently use property name in all error messages
- Use #defines for default high PCIE MMIO size
- Use size_to_str() when printing size values
- Add comment clarifying that highmem-mmio-size will
  update the corresponding value in extended_memmap
v4: - Added default size to highmem-mmio-size description
v3: - Updated highmem-mmio-size description
v2: - Add unit suffix to example in commit message
- Use existing "high memory region" terminology
- Resolve minor braces nit

 docs/system/arm/virt.rst |  4 
 hw/arm/virt.c| 52 +++-
 2 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst
index 0c9c2ce0351c..adf446c0a295 100644
--- a/docs/system/arm/virt.rst
+++ b/docs/system/arm/virt.rst
@@ -144,6 +144,10 @@ highmem-mmio
   Set ``on``/``off`` to enable/disable the high memory region for PCI MMIO.
   The default is ``on``.
 
+highmem-mmio-size
+  Set the high memory region size for PCI MMIO. Must be a power of 2 and
+  greater than or equal to the default size (512G).
+
 gic-version
   Specify the version of the Generic Interrupt Controller (GIC) to provide.
   Valid values are:
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 4a5a9666e916..ee69081ef421 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -53,6 +53,7 @@
 #include "hw/loader.h"
 #include "qapi/error.h"
 #include "qemu/bitops.h"
+#include "qemu/cutils.h"
 #include "qemu/error-report.h"
 #include "qemu/module.h"
 #include "hw/pci-host/gpex.h"
@@ -192,6 +193,10 @@ static const MemMapEntry base_memmap[] = {
 [VIRT_MEM] ={ GiB, LEGACY_RAMLIMIT_BYTES },
 };
 
+/* Update the docs for highmem-mmio-size when changing this default */
+#define DEFAULT_HIGH_PCIE_MMIO_SIZE_GB 512
+#define DEFAULT_HIGH_PCIE_MMIO_SIZE (DEFAULT_HIGH_PCIE_MMIO_SIZE_GB * GiB)
+
 /*
  * Highmem IO Regions: This memory map is floating, located after the RAM.
  * Each MemMapEntry base (GPA) will be dynamically computed, depending on the
@@ -207,13 +212,16 @@ static const MemMapEntry base_memmap[] = {
  * PA space for one specific region is always reserved, even if the region
  * has been disabled or doesn't fit into the PA space. However, the PA space
  * for the region won't be reserved in these circumstances with compact layout.
+ *
+ * Note that the highmem-mmio-size property will update the high PCIE MMIO size
+ * field in this array.
  */
 static MemMapEntry extended_memmap[] = {
 /* Additional 64 MB redist region (can contain up to 512 redistributors) */
 [VIRT_HIGH_GIC_REDIST2] =   { 0x0, 64 * MiB },
 [VIRT_HIGH_PCIE_ECAM] = { 0x0, 256 * MiB },
 /* Second PCIe window */
-[VIRT_HIGH_PCIE_MMIO] = { 0x0, 512 * GiB },
+[VIRT_HIGH_PCIE_MMIO] = { 0x0, DEFAULT_HIGH_PCIE_MMIO_SIZE },
 };
 
 static const int a15irqmap[] = {
@@ -2550,6 +2558,40 @@ static void virt_set_highmem_mmio(Object *obj, bool 
value, Error **errp)
 vms->highmem_mmio = value;
 }
 
+static void virt_get_highmem_mmio_size(Object *obj, Visitor *v,
+   const char *name, void *opaque,
+   Error **errp)
+{
+uint64_t size = extended_memmap[VIRT_HIGH_PCIE_MMIO].size;
+
+visit_type_size(v, name, &size, errp);
+}
+
+static void virt_set_highmem_mmio_size(Object *obj, Visitor *v,
+   const char *name, void *opaque,
+   Error **errp)
+{
+uint64_t size;
+
+if (!visit_type_size(v, name, &size, errp)) {
+return;
+}
+
+if (!is_power_of_2(size)) {
+error_setg(errp, "highmem-mmio-size is not a power of 2");
+return;
+}
+
+if (size < DEFAULT_HIGH_PCIE_MMIO_SIZE) {
+char *sz = size_to_str(DEFAULT_HIGH_PCIE_MMIO_SIZE);
+error_setg(errp, "highmem-mmio-size cannot be set to a lower value "
+ "than the default (%s)", sz);
+g_free(sz);
+return;
+}
+
+extended_memmap[VIRT_HIGH_PCIE_MMIO].size = size;
+}
 
 static bool virt_get_its(Object *obj, Error **errp)
 {
@@ -3207,6 +3249,14 @@ static void virt_machine_class_in

[PATCH v4 12/14] tests/acpi: virt: add a HEST table to aarch64 virt and update DSDT

2025-02-21 Thread Mauro Carvalho Chehab

--- a/DSDT.dsl2025-01-28 09:38:15.155347858 +0100
+++ b/DSDT.dsl2025-01-28 09:39:01.684836954 +0100
@@ -9,9 +9,9 @@
  *
  * Original Table Header:
  * Signature"DSDT"
- * Length   0x1516 (5398)
+ * Length   0x1542 (5442)
  * Revision 0x02
- * Checksum 0x0F
+ * Checksum 0xE9
  * OEM ID   "BOCHS "
  * OEM Table ID "BXPC"
  * OEM Revision 0x0001 (1)
@@ -1931,6 +1931,11 @@
 {
 Notify (PWRB, 0x80) // Status Change
 }
+
+If (((Local0 & 0x10) == 0x10))
+{
+Notify (GEDD, 0x80) // Status Change
+}
 }
 }

@@ -1939,6 +1944,12 @@
 Name (_HID, "PNP0C0C" /* Power Button Device */)  // _HID: 
Hardware ID
 Name (_UID, Zero)  // _UID: Unique ID
 }
+
+Device (GEDD)
+{
+Name (_HID, "PNP0C33" /* Error Device */)  // _HID: Hardware ID
+Name (_UID, Zero)  // _UID: Unique ID
+}
 }
 }

Signed-off-by: Mauro Carvalho Chehab 
Reviewed-by: Jonathan Cameron 
---
 tests/data/acpi/aarch64/virt/DSDT | Bin 5196 -> 5240 bytes
 .../data/acpi/aarch64/virt/DSDT.acpihmatvirt  | Bin 5282 -> 5326 bytes
 tests/data/acpi/aarch64/virt/DSDT.memhp   | Bin 6557 -> 6601 bytes
 tests/data/acpi/aarch64/virt/DSDT.pxb | Bin 7679 -> 7723 bytes
 tests/data/acpi/aarch64/virt/DSDT.topology| Bin 5398 -> 5442 bytes
 tests/qtest/bios-tables-test-allowed-diff.h   |   1 -
 6 files changed, 1 deletion(-)

diff --git a/tests/data/acpi/aarch64/virt/DSDT 
b/tests/data/acpi/aarch64/virt/DSDT
index 
36d3e5d5a5e47359b6dcb3706f98b4f225677591..a182bd9d7182dccdf63c650d048c58f18505d001
 100644
GIT binary patch
delta 109
zcmX@3@k4{lCD}G*h$dM)euOOwJsW4+;nC=*7E+g>V+Q2D|zsED)Gn
zoxsJ!z{S)S5FX^j)c_F?VBivHb9Z%dnXE4&D;?b=31V}^dw9C=2KWUSI2#)?aKwjt
Hx-b9$X;vI^

delta 64
zcmeyNaYlp7CDV+Q2D|zsED)Gn
UoxsJ!z{S)S5FX?-*+E1W06%jPR{#J2

diff --git a/tests/data/acpi/aarch64/virt/DSDT.acpihmatvirt 
b/tests/data/acpi/aarch64/virt/DSDT.acpihmatvirt
index 
e6154d0355f84fdcc51387b4db8f9ee63acae4e9..af1f2b0eb0b77a80c5bd74f201d24f71e486627f
 100644
GIT binary patch
delta 110
zcmZ3ac}|ndCDc(oCIR8`a+lGdXii78eO-)SH|wBICY5U~+W=mjDBo
yK%2X(iwjpnbdzL2c#soEyoaX?Z-8HbfwO@#14n$Qrwc=LlO#wDl9aJAR0;r(tsHj%

delta 66
zcmX@7xk!`CCDE%Q&X{O%5jp|7vOwJsWyG4Q-^(NmJk>Ot;Fu6K`OMrn(
opv~RY#bxqO5n1WzCP@&RBi_T)g*U)2z`)tqn1Lfc)YF9l01l28

Re: [PATCH 01/10] fpu: Make targets specify floatx80 default Inf at runtime

2025-02-21 Thread Philippe Mathieu-Daudé


On 17/2/25 13:50, Peter Maydell wrote:

Currently we hardcode at compile time whether the floatx80 default
Infinity value has the explicit integer bit set or not (x86 sets it;
m68k does not).  To be able to compile softfloat once for all targets
we'd like to move this setting to runtime.

Define a new FloatX80Behaviour enum which is a set of flags that
define the target's floatx80 handling.  Initially we define just one
flag, for whether the default Infinity has the Integer bit set or
not, but we will expand this in future commits to cover the other
floatx80 target specifics that we currently make compile-time
settings.

Define a new function floatx80_default_inf() which returns the
appropriate default Infinity value of the given sign, and use it in
the code that was previously directly using the compile-time constant
floatx80_infinity_{low,high} values when packing an infinity into a
floatx80.

Since floatx80 is highly unlikely to be supported in any new
architecture, and the existing code is generally written as "default
to like x87, with an ifdef for m68k", we make the default value for
the floatx80 behaviour flags be "what x87 does".  This means we only
need to change the m68k target to specify the behaviour flags.

(Other users of floatx80 are the Arm NWFPE emulation, which is
obsolete and probably not actually doing the right thing anyway, and
the PPC xsrqpxp insn.  Making the default be "like x87" avoids our
needing to review and test for behaviour changes there.)

We will clean up the remaining uses of the floatx80_infinity global
constant in subsequent commits.

Signed-off-by: Peter Maydell 
---
  include/fpu/softfloat-helpers.h | 12 
  include/fpu/softfloat-types.h   | 13 +
  include/fpu/softfloat.h |  1 +
  fpu/softfloat.c |  7 +++
  target/m68k/cpu.c   |  6 ++
  fpu/softfloat-specialize.c.inc  | 10 ++
  6 files changed, 45 insertions(+), 4 deletions(-)




diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index f4fed9bfda9..b12ad2b42a9 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -1860,7 +1860,8 @@ static floatx80 
floatx80_round_pack_canonical(FloatParts128 *p,
  
  case float_class_inf:

  /* x86 and m68k differ in the setting of the integer bit. */
-frac = floatx80_infinity_low;
+frac = s->floatx80_behaviour & floatx80_default_inf_int_bit_is_zero ?
+0 : (1ULL << 63);


Indent off, otherwise:

Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH v3 00/14] Change ghes to use HEST-based offsets and add support for error inject

2025-02-21 Thread Mauro Carvalho Chehab

Em Fri, 21 Feb 2025 13:23:06 +0100
Mauro Carvalho Chehab  escreveu:

> Em Fri, 21 Feb 2025 10:21:27 +
> Jonathan Cameron  escreveu:
> 
> > On Fri, 21 Feb 2025 07:38:23 +0100
> > Mauro Carvalho Chehab  wrote:
> >   
> > > Em Mon, 3 Feb 2025 16:22:36 +0100
> > > Igor Mammedov  escreveu:
> > > 
> > > > On Mon, 3 Feb 2025 11:09:34 +
> > > > Jonathan Cameron  wrote:
> > > >   
> > > > > On Fri, 31 Jan 2025 18:42:41 +0100
> > > > > Mauro Carvalho Chehab  wrote:
> > > > > 
> > > > > > Now that the ghes preparation patches were merged, let's add support
> > > > > > for error injection.
> > > > > > 
> > > > > > On this series, the first 6 patches chang to the math used to 
> > > > > > calculate offsets at HEST
> > > > > > table and hardware_error firmware file, together with its migration 
> > > > > > code. Migration tested
> > > > > > with both latest QEMU released kernel and upstream, on both 
> > > > > > directions.
> > > > > > 
> > > > > > The next patches add a new QAPI to allow injecting GHESv2 errors, 
> > > > > > and a script using such QAPI
> > > > > >to inject ARM Processor Error records.
> > > > > > 
> > > > > > If I'm counting well, this is the 19th submission of my error 
> > > > > > inject patches.  
> > > > > 
> > > > > Looks good to me. All remaining trivial things are in the category
> > > > > of things to consider only if you are doing another spin.  The code
> > > > > ends up how I'd like it at the end of the series anyway, just
> > > > > a question of the precise path to that state!
> > > > 
> > > > if you look at series as a whole it's more or less fine (I guess you
> > > > and me got used to it)
> > > > 
> > > > however if you take it patch by patch (as if you've never seen it)
> > > > ordering is messed up (the same would apply to everyone after a while
> > > > when it's forgotten)
> > > > 
> > > > So I'd strongly suggest to restructure the series (especially 2-6/14).
> > > > re sum up my comments wrt ordering:
> > > > 
> > > > 0  add testcase for HEST table with current HEST as expected blob
> > > >(currently missing), so that we can be sure that we haven't messed
> > > >existing tables during refactoring.  
> > 
> > To potentially save time I think Igor is asking that before you do anything
> > at all you plug the existing test hole which is that we don't test HEST
> > at all.   Even after this series I think we don't test HEST.   
> 
> On a previous review (v2, I guess), Igor requested me to do the DSDT
> test just before and after the patch which is actually changing its
> content (patch 11). The HEST table is inside DSDT firmware, and it is
> already tested.
> 
> > You add
> > a stub hest and exclusion but then in patch 12 the HEST stub is deleted 
> > whereas
> > it should be replaced with the example data for the test.  
> 
> This was actually a misinterpretation from my side: patch 10 adds the
> etc/hardware_errors table (mistakenly naming it as HEST), but this
> was never tested. For the next submission, I'll drop etc/hardware_errors
> table from patches 10 and 12.
> 
> > That indeed doesn't address testing the error data storage which would be
> > a different problem.  
> > > 
> > > Not sure if I got this one. The HEST table is part of etc/acpi/tables,
> > > which is already tested, as you pointed at the previous reviews. Doing
> > > changes there is already detected. That's basically why we added patches
> > > 10 and 12:
> > > 
> > >   [PATCH v3 10/14] tests/acpi: virt: allow acpi table changes for a new 
> > > table: HEST
> > >   [PATCH v3 12/14] tests/acpi: virt: add a HEST table to aarch64 virt and 
> > > update DSDT
> > > 
> > > What tests don't have is a check for etc/hardware_errors firmware inside 
> > > tests/data/acpi/aarch64/virt/, but, IMO, we shouldn't add it there.
> > > 
> > > See, hardware_errors table contains only some skeleton space to
> > > store:
> > > 
> > >   - 1 or more error block address offsets;
> > >   - 1 or more read ack register;
> > >   - 1 or more HEST source entries containing CPER blocks.
> > > 
> > > There's nothing there to be actually checked: it is just some
> > > empty spaces with a variable number of fields.
> > > 
> > > With the new code, the actual number of CPER blocks and their
> > > corresponding offsets and read ack registers can be different on 
> > > different architectures. So, for instance, when we add x86 support,
> > > we'll likely start with just one error source entry, while arm will
> > > have two after this changeset.
> > > 
> > > Also, one possibility to address the issues reported by Gavin Shan at
> > > https://lore.kernel.org/qemu-devel/20250214041635.608012-1-gs...@redhat.com/
> > > would be to have one entry per each CPU. So, the size of such firmware
> > > could be dependent on the number of CPUs.
> > > 
> > > So, adding any validation to it would just cause pain and probably
> > > won't detect any problems.
> > 
> > If we did do this the test would use a fixed

Re: [PATCH v2 2/3] target/riscv/cpu.c: create flag for ziccrse

2025-02-21 Thread Andrew Jones

On Fri, Feb 21, 2025 at 11:18:33AM -0300, Daniel Henrique Barboza wrote:
> At this moment ziccrse is a TCG always enabled named feature for
> priv_ver > 1.11 that has no exclusive flag. In the next patch we'll make
> the KVM driver update ziccrse as well, turning it on/off depending on
> host settings, but for that we'll need an ext_ziccrse flag in the CPU
> state.
> 
> Create an exclusive flag for it like we do with other named features.
> As with any named features we already have, it won't be exposed to
> users. TCG will keep the same restiction for it (always enabled if
> has_priv_1_11 is true) and KVM will be free to turn it on/off as
> required.

Reading this as "KVM can choose" makes it sound wrong, since KVM can't
choose. However, KVM will turn it on/off depending on whether this
extension is/isn't present. So reading it as "TCG always has it on, but
KVM will turn it off when the extension isn't available", makes more
sense.

> 
> Signed-off-by: Daniel Henrique Barboza 
> ---
>  target/riscv/cpu.c | 3 ++-
>  target/riscv/cpu_cfg.h | 3 +++
>  target/riscv/tcg/tcg-cpu.c | 2 ++
>  3 files changed, 7 insertions(+), 1 deletion(-)
> 
> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> index 522d6584e4..fc4632ce36 100644
> --- a/target/riscv/cpu.c
> +++ b/target/riscv/cpu.c
> @@ -105,7 +105,7 @@ const RISCVIsaExtData isa_edata_arr[] = {
>  ISA_EXT_DATA_ENTRY(ziccamoa, PRIV_VERSION_1_11_0, has_priv_1_11),
>  ISA_EXT_DATA_ENTRY(ziccif, PRIV_VERSION_1_11_0, has_priv_1_11),
>  ISA_EXT_DATA_ENTRY(zicclsm, PRIV_VERSION_1_11_0, has_priv_1_11),
> -ISA_EXT_DATA_ENTRY(ziccrse, PRIV_VERSION_1_11_0, has_priv_1_11),
> +ISA_EXT_DATA_ENTRY(ziccrse, PRIV_VERSION_1_11_0, ext_ziccrse),
>  ISA_EXT_DATA_ENTRY(zicfilp, PRIV_VERSION_1_12_0, ext_zicfilp),
>  ISA_EXT_DATA_ENTRY(zicfiss, PRIV_VERSION_1_13_0, ext_zicfiss),
>  ISA_EXT_DATA_ENTRY(zicond, PRIV_VERSION_1_12_0, ext_zicond),
> @@ -1749,6 +1749,7 @@ const RISCVCPUMultiExtConfig riscv_cpu_named_features[] 
> = {
>  MULTI_EXT_CFG_BOOL("zic64b", ext_zic64b, true),
>  MULTI_EXT_CFG_BOOL("ssstateen", ext_ssstateen, true),
>  MULTI_EXT_CFG_BOOL("sha", ext_sha, true),
> +MULTI_EXT_CFG_BOOL("ziccrse", ext_ziccrse, true),
>  
>  { },
>  };
> diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h
> index 3f3c1118c0..8a843482cc 100644
> --- a/target/riscv/cpu_cfg.h
> +++ b/target/riscv/cpu_cfg.h
> @@ -166,6 +166,9 @@ struct RISCVCPUConfig {
>  bool has_priv_1_12;
>  bool has_priv_1_11;
>  
> +/* Always enabled for TCG if has_priv_1_11 */
> +bool ext_ziccrse;
> +
>  /* Vendor-specific custom extensions */
>  bool ext_xtheadba;
>  bool ext_xtheadbb;
> diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c
> index ea8d77d06a..c93612b1da 100644
> --- a/target/riscv/tcg/tcg-cpu.c
> +++ b/target/riscv/tcg/tcg-cpu.c
> @@ -360,6 +360,8 @@ static void riscv_cpu_update_named_features(RISCVCPU *cpu)
>  
>  cpu->cfg.ext_sha = riscv_has_ext(&cpu->env, RVH) &&
> cpu->cfg.ext_ssstateen;
> +
> +cpu->cfg.ext_ziccrse = cpu->cfg.has_priv_1_11;
>  }
>  
>  static void riscv_cpu_validate_g(RISCVCPU *cpu)
> -- 
> 2.48.1
>

Other than my hangup on the commit message,

Reviewed-by: Andrew Jones

Re: [PATCH v3 01/28] hw/intc/aspeed: Support setting different memory and register size

2025-02-21 Thread Cédric Le Goater


On 2/20/25 06:45, Jamin Lin wrote:

Hi Cedric,


Subject: Re: [PATCH v3 01/28] hw/intc/aspeed: Support setting different
memory and register size

Hello Jamin,

On 2/13/25 04:35, Jamin Lin wrote:

According to the AST2700 datasheet, the INTC(CPU DIE) controller has
16KB
(0x4000) of register space, and the INTCIO (I/O DIE) controller has
1KB (0x400) of register space.

Introduced a new class attribute "mem_size" to set different memory
sizes for the INTC models in AST2700.

Introduced a new class attribute "reg_size" to set different register
sizes for the INTC models in AST2700.


Shouldn't that be multiple patches ?



I will add one patch for reg_size and another for mem_size.


Signed-off-by: Jamin Lin 
---
   hw/intc/aspeed_intc.c | 17 +
   include/hw/intc/aspeed_intc.h |  4 
   2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/hw/intc/aspeed_intc.c b/hw/intc/aspeed_intc.c index
126b711b94..316885a27a 100644
--- a/hw/intc/aspeed_intc.c
+++ b/hw/intc/aspeed_intc.c
@@ -117,10 +117,11 @@ static void aspeed_intc_set_irq(void *opaque, int

irq, int level)

   static uint64_t aspeed_intc_read(void *opaque, hwaddr offset, unsigned

int size)

   {
   AspeedINTCState *s = ASPEED_INTC(opaque);
+AspeedINTCClass *aic = ASPEED_INTC_GET_CLASS(s);
   uint32_t addr = offset >> 2;
   uint32_t value = 0;

-if (addr >= ASPEED_INTC_NR_REGS) {


Side note, ASPEED_INTC_NR_REGS is defined as

#define ASPEED_INTC_NR_REGS (0x2000 >> 2)

and the register array as:

uint32_t regs[ASPEED_INTC_NR_REGS];

The number of regs looks pretty big for me. Are the registers covering the
whole MMIO aperture ?


According to the datasheet, the entire register address space size of INTC (CPU 
DIE) is 16KB
(0x1210-0x12103FFF). Therefore, I set the memory size to 0x4000.
Currently, we need to use the "GICINT192-201 raw status and clear" register 
INTC1B04.
Thus, an array size of 0x2000 is sufficient.


yes but we are only using these regs :

REG32(GICINT128_EN, 0x1000)
REG32(GICINT128_STATUS, 0x1004)
REG32(GICINT129_EN, 0x1100)
REG32(GICINT129_STATUS, 0x1104)
REG32(GICINT130_EN, 0x1200)
REG32(GICINT130_STATUS, 0x1204)
REG32(GICINT131_EN, 0x1300)
REG32(GICINT131_STATUS, 0x1304)
REG32(GICINT132_EN, 0x1400)
REG32(GICINT132_STATUS, 0x1404)
REG32(GICINT133_EN, 0x1500)
REG32(GICINT133_STATUS, 0x1504)
REG32(GICINT134_EN, 0x1600)
REG32(GICINT134_STATUS, 0x1604)
REG32(GICINT135_EN, 0x1700)
REG32(GICINT135_STATUS, 0x1704)
REG32(GICINT136_EN, 0x1800)
REG32(GICINT136_STATUS, 0x1804)
REG32(GICINT192_201_EN, 0x1B00)
REG32(GICINT192_201_STATUS, 0x1B04)

so the first 0x1000 are unused.




However, we are going to increase the size to 0x3000 to support the 
co-processors SSP and TSP
In the another patch series.
We also need to include the following register definitions:

/* SSP INTC Registers */
REG32(SSPINT128_EN, 0x2000)
REG32(SSPINT128_STATUS, 0x2004)
REG32(SSPINT129_EN, 0x2100)
REG32(SSPINT129_STATUS, 0x2104)
REG32(SSPINT130_EN, 0x2200)
REG32(SSPINT130_STATUS, 0x2204)
REG32(SSPINT131_EN, 0x2300)
REG32(SSPINT131_STATUS, 0x2304)
REG32(SSPINT132_EN, 0x2400)
REG32(SSPINT132_STATUS, 0x2404)
REG32(SSPINT133_EN, 0x2500)
REG32(SSPINT133_STATUS, 0x2504)
REG32(SSPINT134_EN, 0x2600)
REG32(SSPINT134_STATUS, 0x2604)
REG32(SSPINT135_EN, 0x2700)
REG32(SSPINT135_STATUS, 0x2704)
REG32(SSPINT136_EN, 0x2800)
REG32(SSPINT136_STATUS, 0x2804)
REG32(SSPINT137_EN, 0x2900)
REG32(SSPINT137_STATUS, 0x2904)
REG32(SSPINT138_EN, 0x2A00)
REG32(SSPINT138_STATUS, 0x2A04)
REG32(SSPINT160_169_EN, 0x2B00)
REG32(SSPINT160_169_STATUS, 0x2B04)




+if (offset >= aic->reg_size) {


This is dead code since the MMIO aperture has the same size. You could
remove the check.


Will remove.



   qemu_log_mask(LOG_GUEST_ERROR,
 "%s: Out-of-bounds read at offset 0x%"

HWADDR_PRIx "\n",

 __func__, offset); @@ -143,7 +144,7 @@

static

void aspeed_intc_write(void *opaque, hwaddr offset, uint64_t data,
   uint32_t change;
   uint32_t irq;

-if (addr >= ASPEED_INTC_NR_REGS) {
+if (offset >= aic->reg_size) {
   qemu_log_mask(LOG_GUEST_ERROR,
 "%s: Out-of-bounds write at offset 0x%"

HWADDR_PRIx "\n",

 __func__, offset); @@ -302,10 +303,16 @@
static void aspeed_intc_realize(DeviceState *dev, Error **errp)
   AspeedINTCClass *aic = ASPEED_INTC_GET_CLASS(s);
   int i;

+memory_region_init(&s->iomem_container, OBJECT(s),
+TYPE_ASPEED_INTC ".container", aic->mem_size);
+
+sysbus_init_mmio(sbd, &s->iomem_container);


Why introduce a c

Re: [PATCH 01/10] fpu: Make targets specify floatx80 default Inf at runtime

2025-02-21 Thread Peter Maydell

On Fri, 21 Feb 2025 at 14:42, Philippe Mathieu-Daudé  wrote:
>
> On 17/2/25 13:50, Peter Maydell wrote:
> > Currently we hardcode at compile time whether the floatx80 default
> > Infinity value has the explicit integer bit set or not (x86 sets it;
> > m68k does not).  To be able to compile softfloat once for all targets
> > we'd like to move this setting to runtime.
> >
> > Define a new FloatX80Behaviour enum which is a set of flags that
> > define the target's floatx80 handling.  Initially we define just one
> > flag, for whether the default Infinity has the Integer bit set or
> > not, but we will expand this in future commits to cover the other
> > floatx80 target specifics that we currently make compile-time
> > settings.
> >
> > Define a new function floatx80_default_inf() which returns the
> > appropriate default Infinity value of the given sign, and use it in
> > the code that was previously directly using the compile-time constant
> > floatx80_infinity_{low,high} values when packing an infinity into a
> > floatx80.
> >
> > Since floatx80 is highly unlikely to be supported in any new
> > architecture, and the existing code is generally written as "default
> > to like x87, with an ifdef for m68k", we make the default value for
> > the floatx80 behaviour flags be "what x87 does".  This means we only
> > need to change the m68k target to specify the behaviour flags.
> >
> > (Other users of floatx80 are the Arm NWFPE emulation, which is
> > obsolete and probably not actually doing the right thing anyway, and
> > the PPC xsrqpxp insn.  Making the default be "like x87" avoids our
> > needing to review and test for behaviour changes there.)
> >
> > We will clean up the remaining uses of the floatx80_infinity global
> > constant in subsequent commits.
> >
> > Signed-off-by: Peter Maydell 
> > ---
> >   include/fpu/softfloat-helpers.h | 12 
> >   include/fpu/softfloat-types.h   | 13 +
> >   include/fpu/softfloat.h |  1 +
> >   fpu/softfloat.c |  7 +++
> >   target/m68k/cpu.c   |  6 ++
> >   fpu/softfloat-specialize.c.inc  | 10 ++
> >   6 files changed, 45 insertions(+), 4 deletions(-)
>
>
> > diff --git a/fpu/softfloat.c b/fpu/softfloat.c
> > index f4fed9bfda9..b12ad2b42a9 100644
> > --- a/fpu/softfloat.c
> > +++ b/fpu/softfloat.c
> > @@ -1860,7 +1860,8 @@ static floatx80 
> > floatx80_round_pack_canonical(FloatParts128 *p,
> >
> >   case float_class_inf:
> >   /* x86 and m68k differ in the setting of the integer bit. */
> > -frac = floatx80_infinity_low;
> > +frac = s->floatx80_behaviour & 
> > floatx80_default_inf_int_bit_is_zero ?
> > +0 : (1ULL << 63);
>
> Indent off

This is the indent emacs uses here, and it's the usual
"4 spaces in for an expression continued onto the next line"
I think.

-- PMM

Re: [PULL 15/50] hw/char: sifive_uart: Print uart characters async

2025-02-21 Thread Clément Chigot

On Fri, Feb 14, 2025 at 1:52 PM Clément Chigot  wrote:
>
> Hi Alistair,
>
> I've an issue following this patch. When the system is reset (e.g
> using HTIF syscalls), the fifo might not be empty and thus some
> characters are lost.
> I discovered it on a Windows host. But by extending
> "TX_INTERRUPT_TRIGGER_DELAY_NS" to a huge value, I'm able to reproduce
> on Linux as well.

The root cause of my issue was unrelated to these early shutdowns. On
Windows, the character device behind `-serial mon:stdio`
(char-win-stdio) doesn't provide an `add_watch` method. Therefore,
`qemu_chr_fe_add_watch` calls always result in an error, flushing the
fifo. I saw in @Philippe Mathieu-Daudé patch about pl011 that
`G_SOURCE_CONTINUE` is returned instead of calling it and it does
work. @Alistair Francis  do you remember if there was a reason for
calling `add_watch` ?

> I've tried to flush within an unrealized function but it didn't work.
> Any suggestions ?

FTR, I still have found a solution here using
qemu_register_shutdown_notifier. Though I'm wondering if this is
useful: the cases where a shutdown occurs between two "fifo_update"
seems really narrow, but they could happen.
 @Philippe Mathieu-Daudé AFAICT, the new pl011 and other char devices
implementing write fifo have the same issue. Thus, pinging you here to
get your advice.

Thanks,
Clément

> >  static void sifive_uart_reset_enter(Object *obj, ResetType type)
> >  {
> > ...
> > +fifo8_create(&s->tx_fifo, SIFIVE_UART_TX_FIFO_SIZE);
>
> I'm also wondering if that part could not lead to memory leak.
> `fifo8_destroy` is never called and AFAIK, there are ways to reset a
> device dynamically (e.g snapshot, though not sure if it's supported
> here).
>
> Thanks, Clément

Re: [PATCH] tests/functional: Bump some arm test timeouts

2025-02-21 Thread Alex Bennée

Thomas Huth  writes:

> On 21/02/2025 15.06, Peter Maydell wrote:
>> On my local machine, for a debug build, sbsaref_alpine takes
>> nearly 900s:
>> $ (cd build/x86 && ./pyvenv/bin/meson test --setup thorough --suite
>> func-thorough func-aarch64-aarch64_sbsaref_alpine
>> )
>> 1/1 qemu:func-thorough+func-aarch64-thorough+thorough /
>> func-aarch64-aarch64_sbsaref_alpine
>>OK 896.90s
>> arm_aspeed_rainier can also run close to its current timeout:
>>   6/44 qemu:func-thorough+func-arm-thorough+thorough / 
>> func-arm-arm_aspeed_rainier
>>OK 215.75s
>> and arm_aspeed_ast2500 and arm_aspeed_ast2600 can go over:
>> 13/44 qemu:func-thorough+func-arm-thorough+thorough / 
>> func-arm-arm_aspeed_ast2600
>>OK 792.94s
>> 27/44 qemu:func-thorough+func-arm-thorough+thorough /
>> func-arm-arm_aspeed_ast2500
>>   TIMEOUT 480.01s
>> The sx1 test fails not on the overall meson timeout but on the
>> 60 second timeout in some of the subtests.
>> Bump all these timeouts up a bit.
>> Signed-off-by: Peter Maydell 
>> ---
>> This at least gets 'make -j4 check-functional' to passing for me
>> for an arm/aarch64 debug build, apart from the gpu test hang which
>> we're discussing in a different thread. Whole thing takes 25 mins...
>> ---
>>   tests/functional/meson.build | 8 
>>   tests/functional/test_arm_sx1.py | 6 +++---
>>   2 files changed, 7 insertions(+), 7 deletions(-)
>> diff --git a/tests/functional/meson.build
>> b/tests/functional/meson.build
>> index b516d21cba1..effa31701cf 100644
>> --- a/tests/functional/meson.build
>> +++ b/tests/functional/meson.build
>> @@ -15,16 +15,16 @@ test_timeouts = {
>> 'aarch64_raspi4' : 480,
>> 'aarch64_rme_virt' : 1200,
>> 'aarch64_rme_sbsaref' : 1200,
>> -  'aarch64_sbsaref_alpine' : 720,
>> +  'aarch64_sbsaref_alpine' : 1200,
>
> I wonder whether we should disable that test with @skipSlowTest() by
> default, since it's really very slow...?

The pauth subtest is marked as @skipSlowTest() - but I think we can drop
test_sbsaref_alpine_linux_cortex_a57 and
test_sbsaref_alpine_linux_max_pauth_off because we don't actually verify
anything so its wasted cycles booting something that we know already
boots.

>
> Anyway, for this patch here:
> Reviewed-by: Thomas Huth 

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro

[PATCH v3 2/3] target/riscv/cpu.c: create flag for ziccrse

2025-02-21 Thread Daniel Henrique Barboza

At this moment ziccrse is a TCG always enabled named feature for
priv_ver > 1.11 that has no exclusive flag. In the next patch we'll make
the KVM driver turn ziccrse off if the extension isn't available in the
host, and we'll need an ext_ziccrse flag in the CPU state for that.

Create an exclusive flag for it like we do with other named features.
As with any named features we already have, it won't be exposed to
users.

Signed-off-by: Daniel Henrique Barboza 
Reviewed-by: Andrew Jones 
---
 target/riscv/cpu.c | 3 ++-
 target/riscv/cpu_cfg.h | 3 +++
 target/riscv/tcg/tcg-cpu.c | 2 ++
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 522d6584e4..fc4632ce36 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -105,7 +105,7 @@ const RISCVIsaExtData isa_edata_arr[] = {
 ISA_EXT_DATA_ENTRY(ziccamoa, PRIV_VERSION_1_11_0, has_priv_1_11),
 ISA_EXT_DATA_ENTRY(ziccif, PRIV_VERSION_1_11_0, has_priv_1_11),
 ISA_EXT_DATA_ENTRY(zicclsm, PRIV_VERSION_1_11_0, has_priv_1_11),
-ISA_EXT_DATA_ENTRY(ziccrse, PRIV_VERSION_1_11_0, has_priv_1_11),
+ISA_EXT_DATA_ENTRY(ziccrse, PRIV_VERSION_1_11_0, ext_ziccrse),
 ISA_EXT_DATA_ENTRY(zicfilp, PRIV_VERSION_1_12_0, ext_zicfilp),
 ISA_EXT_DATA_ENTRY(zicfiss, PRIV_VERSION_1_13_0, ext_zicfiss),
 ISA_EXT_DATA_ENTRY(zicond, PRIV_VERSION_1_12_0, ext_zicond),
@@ -1749,6 +1749,7 @@ const RISCVCPUMultiExtConfig riscv_cpu_named_features[] = 
{
 MULTI_EXT_CFG_BOOL("zic64b", ext_zic64b, true),
 MULTI_EXT_CFG_BOOL("ssstateen", ext_ssstateen, true),
 MULTI_EXT_CFG_BOOL("sha", ext_sha, true),
+MULTI_EXT_CFG_BOOL("ziccrse", ext_ziccrse, true),
 
 { },
 };
diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h
index 3f3c1118c0..8a843482cc 100644
--- a/target/riscv/cpu_cfg.h
+++ b/target/riscv/cpu_cfg.h
@@ -166,6 +166,9 @@ struct RISCVCPUConfig {
 bool has_priv_1_12;
 bool has_priv_1_11;
 
+/* Always enabled for TCG if has_priv_1_11 */
+bool ext_ziccrse;
+
 /* Vendor-specific custom extensions */
 bool ext_xtheadba;
 bool ext_xtheadbb;
diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c
index ea8d77d06a..c93612b1da 100644
--- a/target/riscv/tcg/tcg-cpu.c
+++ b/target/riscv/tcg/tcg-cpu.c
@@ -360,6 +360,8 @@ static void riscv_cpu_update_named_features(RISCVCPU *cpu)
 
 cpu->cfg.ext_sha = riscv_has_ext(&cpu->env, RVH) &&
cpu->cfg.ext_ssstateen;
+
+cpu->cfg.ext_ziccrse = cpu->cfg.has_priv_1_11;
 }
 
 static void riscv_cpu_validate_g(RISCVCPU *cpu)
-- 
2.48.1

[PATCH v3 0/3] target/riscv/kvm: update to Linux 6.14-rc3

2025-02-21 Thread Daniel Henrique Barboza

Hi,

In this series we made changes in the commit msg in patch 2 to make it
less ambiguious what the KVM driver will do with ziccrse. 

No other changes made. Patches based on alistair/riscv_to_apply.next.

Changes from v2:
- patch 2:
  - reworded commit message
- v2 link: 
https://lore.kernel.org/qemu-riscv/20250221141834.626722-1-dbarb...@ventanamicro.com/


Daniel Henrique Barboza (3):
  linux-headers: Update to Linux v6.14-rc3
  target/riscv/cpu.c: create flag for ziccrse
  target/riscv/kvm: add extensions after 6.14-rc3 update

 include/standard-headers/linux/ethtool.h  |  4 +
 include/standard-headers/linux/fuse.h | 76 ++-
 .../linux/input-event-codes.h |  1 +
 include/standard-headers/linux/pci_regs.h | 16 ++--
 include/standard-headers/linux/virtio_pci.h   | 14 
 linux-headers/asm-arm64/kvm.h |  3 -
 linux-headers/asm-loongarch/kvm_para.h|  1 +
 linux-headers/asm-riscv/kvm.h |  7 +-
 linux-headers/asm-x86/kvm.h   |  1 +
 linux-headers/linux/iommufd.h | 35 ++---
 linux-headers/linux/kvm.h |  8 +-
 linux-headers/linux/stddef.h  | 13 +++-
 linux-headers/linux/vduse.h   |  2 +-
 target/riscv/cpu.c|  3 +-
 target/riscv/cpu_cfg.h|  3 +
 target/riscv/kvm/kvm-cpu.c|  3 +
 target/riscv/tcg/tcg-cpu.c|  2 +
 17 files changed, 156 insertions(+), 36 deletions(-)

-- 
2.48.1

[PATCH v3 1/3] linux-headers: Update to Linux v6.14-rc3

2025-02-21 Thread Daniel Henrique Barboza

Update headers to retrieve the latest KVM caps for RISC-V.

Signed-off-by: Daniel Henrique Barboza 
---
 include/standard-headers/linux/ethtool.h  |  4 +
 include/standard-headers/linux/fuse.h | 76 ++-
 .../linux/input-event-codes.h |  1 +
 include/standard-headers/linux/pci_regs.h | 16 ++--
 include/standard-headers/linux/virtio_pci.h   | 14 
 linux-headers/asm-arm64/kvm.h |  3 -
 linux-headers/asm-loongarch/kvm_para.h|  1 +
 linux-headers/asm-riscv/kvm.h |  7 +-
 linux-headers/asm-x86/kvm.h   |  1 +
 linux-headers/linux/iommufd.h | 35 ++---
 linux-headers/linux/kvm.h |  8 +-
 linux-headers/linux/stddef.h  | 13 +++-
 linux-headers/linux/vduse.h   |  2 +-
 13 files changed, 146 insertions(+), 35 deletions(-)

diff --git a/include/standard-headers/linux/ethtool.h 
b/include/standard-headers/linux/ethtool.h
index 67c47912e5..e83382531c 100644
--- a/include/standard-headers/linux/ethtool.h
+++ b/include/standard-headers/linux/ethtool.h
@@ -681,6 +681,8 @@ enum ethtool_link_ext_substate_module {
  * @ETH_SS_STATS_ETH_MAC: names of IEEE 802.3 MAC statistics
  * @ETH_SS_STATS_ETH_CTRL: names of IEEE 802.3 MAC Control statistics
  * @ETH_SS_STATS_RMON: names of RMON statistics
+ * @ETH_SS_STATS_PHY: names of PHY(dev) statistics
+ * @ETH_SS_TS_FLAGS: hardware timestamping flags
  *
  * @ETH_SS_COUNT: number of defined string sets
  */
@@ -706,6 +708,8 @@ enum ethtool_stringset {
ETH_SS_STATS_ETH_MAC,
ETH_SS_STATS_ETH_CTRL,
ETH_SS_STATS_RMON,
+   ETH_SS_STATS_PHY,
+   ETH_SS_TS_FLAGS,
 
/* add new constants above here */
ETH_SS_COUNT
diff --git a/include/standard-headers/linux/fuse.h 
b/include/standard-headers/linux/fuse.h
index 889e12ad15..d303effb2a 100644
--- a/include/standard-headers/linux/fuse.h
+++ b/include/standard-headers/linux/fuse.h
@@ -220,6 +220,15 @@
  *
  *  7.41
  *  - add FUSE_ALLOW_IDMAP
+ *  7.42
+ *  - Add FUSE_OVER_IO_URING and all other io-uring related flags and data
+ *structures:
+ *- struct fuse_uring_ent_in_out
+ *- struct fuse_uring_req_header
+ *- struct fuse_uring_cmd_req
+ *- FUSE_URING_IN_OUT_HEADER_SZ
+ *- FUSE_URING_OP_IN_OUT_SZ
+ *- enum fuse_uring_cmd
  */
 
 #ifndef _LINUX_FUSE_H
@@ -251,7 +260,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 41
+#define FUSE_KERNEL_MINOR_VERSION 42
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -421,6 +430,7 @@ struct fuse_file_lock {
  * FUSE_HAS_RESEND: kernel supports resending pending requests, and the high 
bit
  * of the request ID indicates resend requests
  * FUSE_ALLOW_IDMAP: allow creation of idmapped mounts
+ * FUSE_OVER_IO_URING: Indicate that client supports io-uring
  */
 #define FUSE_ASYNC_READ(1 << 0)
 #define FUSE_POSIX_LOCKS   (1 << 1)
@@ -467,6 +477,7 @@ struct fuse_file_lock {
 /* Obsolete alias for FUSE_DIRECT_IO_ALLOW_MMAP */
 #define FUSE_DIRECT_IO_RELAX   FUSE_DIRECT_IO_ALLOW_MMAP
 #define FUSE_ALLOW_IDMAP   (1ULL << 40)
+#define FUSE_OVER_IO_URING (1ULL << 41)
 
 /**
  * CUSE INIT request/reply flags
@@ -1202,4 +1213,67 @@ struct fuse_supp_groups {
uint32_tgroups[];
 };
 
+/**
+ * Size of the ring buffer header
+ */
+#define FUSE_URING_IN_OUT_HEADER_SZ 128
+#define FUSE_URING_OP_IN_OUT_SZ 128
+
+/* Used as part of the fuse_uring_req_header */
+struct fuse_uring_ent_in_out {
+   uint64_t flags;
+
+   /*
+* commit ID to be used in a reply to a ring request (see also
+* struct fuse_uring_cmd_req)
+*/
+   uint64_t commit_id;
+
+   /* size of user payload buffer */
+   uint32_t payload_sz;
+   uint32_t padding;
+
+   uint64_t reserved;
+};
+
+/**
+ * Header for all fuse-io-uring requests
+ */
+struct fuse_uring_req_header {
+   /* struct fuse_in_header / struct fuse_out_header */
+   char in_out[FUSE_URING_IN_OUT_HEADER_SZ];
+
+   /* per op code header */
+   char op_in[FUSE_URING_OP_IN_OUT_SZ];
+
+   struct fuse_uring_ent_in_out ring_ent_in_out;
+};
+
+/**
+ * sqe commands to the kernel
+ */
+enum fuse_uring_cmd {
+   FUSE_IO_URING_CMD_INVALID = 0,
+
+   /* register the request buffer and fetch a fuse request */
+   FUSE_IO_URING_CMD_REGISTER = 1,
+
+   /* commit fuse request result and fetch next request */
+   FUSE_IO_URING_CMD_COMMIT_AND_FETCH = 2,
+};
+
+/**
+ * In the 80B command area of the SQE.
+ */
+struct fuse_uring_cmd_req {
+   uint64_t flags;
+
+   /* entry identifier for commits */
+   uint64_t commit_id;
+
+   /* queue the command is for (queue index) */
+   uint16_t qid;
+   uint8_t padding[6];
+};
+
 #endif /* _LINUX_FUSE_H */
diff --git a/include/standard-headers/linux/input-event-codes.h 
b/

[PATCH v3 3/3] target/riscv/kvm: add extensions after 6.14-rc3 update

2025-02-21 Thread Daniel Henrique Barboza

Expose ziccrse, zabha and svvptc.

Signed-off-by: Daniel Henrique Barboza 
Reviewed-by: Andrew Jones 
---
 target/riscv/kvm/kvm-cpu.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c
index 23ce779359..471fd554b3 100644
--- a/target/riscv/kvm/kvm-cpu.c
+++ b/target/riscv/kvm/kvm-cpu.c
@@ -274,6 +274,7 @@ static void kvm_riscv_update_cpu_misa_ext(RISCVCPU *cpu, 
CPUState *cs)
 static KVMCPUConfig kvm_multi_ext_cfgs[] = {
 KVM_EXT_CFG("zicbom", ext_zicbom, KVM_RISCV_ISA_EXT_ZICBOM),
 KVM_EXT_CFG("zicboz", ext_zicboz, KVM_RISCV_ISA_EXT_ZICBOZ),
+KVM_EXT_CFG("ziccrse", ext_ziccrse, KVM_RISCV_ISA_EXT_ZICCRSE),
 KVM_EXT_CFG("zicntr", ext_zicntr, KVM_RISCV_ISA_EXT_ZICNTR),
 KVM_EXT_CFG("zicond", ext_zicond, KVM_RISCV_ISA_EXT_ZICOND),
 KVM_EXT_CFG("zicsr", ext_zicsr, KVM_RISCV_ISA_EXT_ZICSR),
@@ -283,6 +284,7 @@ static KVMCPUConfig kvm_multi_ext_cfgs[] = {
 KVM_EXT_CFG("zihpm", ext_zihpm, KVM_RISCV_ISA_EXT_ZIHPM),
 KVM_EXT_CFG("zimop", ext_zimop, KVM_RISCV_ISA_EXT_ZIMOP),
 KVM_EXT_CFG("zcmop", ext_zcmop, KVM_RISCV_ISA_EXT_ZCMOP),
+KVM_EXT_CFG("zabha", ext_zabha, KVM_RISCV_ISA_EXT_ZABHA),
 KVM_EXT_CFG("zacas", ext_zacas, KVM_RISCV_ISA_EXT_ZACAS),
 KVM_EXT_CFG("zawrs", ext_zawrs, KVM_RISCV_ISA_EXT_ZAWRS),
 KVM_EXT_CFG("zfa", ext_zfa, KVM_RISCV_ISA_EXT_ZFA),
@@ -325,6 +327,7 @@ static KVMCPUConfig kvm_multi_ext_cfgs[] = {
 KVM_EXT_CFG("svinval", ext_svinval, KVM_RISCV_ISA_EXT_SVINVAL),
 KVM_EXT_CFG("svnapot", ext_svnapot, KVM_RISCV_ISA_EXT_SVNAPOT),
 KVM_EXT_CFG("svpbmt", ext_svpbmt, KVM_RISCV_ISA_EXT_SVPBMT),
+KVM_EXT_CFG("svvptc", ext_svvptc, KVM_RISCV_ISA_EXT_SVVPTC),
 };
 
 static void *kvmconfig_get_cfg_addr(RISCVCPU *cpu, KVMCPUConfig *kvmcfg)
-- 
2.48.1

[PATCH 13/15] rust: memory: wrap MemoryRegion with Opaque<>

2025-02-21 Thread Paolo Bonzini

Signed-off-by: Paolo Bonzini 
---
 rust/qemu-api/src/bindings.rs |  3 ---
 rust/qemu-api/src/memory.rs   | 30 --
 2 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/rust/qemu-api/src/bindings.rs b/rust/qemu-api/src/bindings.rs
index b791ca6d87f..26cc8de0cf2 100644
--- a/rust/qemu-api/src/bindings.rs
+++ b/rust/qemu-api/src/bindings.rs
@@ -34,9 +34,6 @@ unsafe impl Sync for CharBackend {}
 unsafe impl Send for Chardev {}
 unsafe impl Sync for Chardev {}
 
-unsafe impl Send for MemoryRegion {}
-unsafe impl Sync for MemoryRegion {}
-
 unsafe impl Send for ObjectClass {}
 unsafe impl Sync for ObjectClass {}
 
diff --git a/rust/qemu-api/src/memory.rs b/rust/qemu-api/src/memory.rs
index 713c494ca2e..fdb1ea11fcf 100644
--- a/rust/qemu-api/src/memory.rs
+++ b/rust/qemu-api/src/memory.rs
@@ -6,9 +6,8 @@
 
 use std::{
 ffi::{CStr, CString},
-marker::{PhantomData, PhantomPinned},
+marker::PhantomData,
 os::raw::{c_uint, c_void},
-ptr::addr_of,
 };
 
 pub use bindings::{hwaddr, MemTxAttrs};
@@ -16,6 +15,7 @@
 use crate::{
 bindings::{self, device_endian, memory_region_init_io},
 callbacks::FnCall,
+cell::Opaque,
 prelude::*,
 zeroable::Zeroable,
 };
@@ -132,13 +132,13 @@ fn default() -> Self {
 }
 }
 
-/// A safe wrapper around [`bindings::MemoryRegion`].  Compared to the
-/// underlying C struct it is marked as pinned because the QOM tree
-/// contains a pointer to it.
-pub struct MemoryRegion {
-inner: bindings::MemoryRegion,
-_pin: PhantomPinned,
-}
+/// A safe wrapper around [`bindings::MemoryRegion`].
+#[repr(transparent)]
+#[derive(qemu_api_macros::Wrapper)]
+pub struct MemoryRegion(Opaque);
+
+unsafe impl Send for MemoryRegion {}
+unsafe impl Sync for MemoryRegion {}
 
 impl MemoryRegion {
 // inline to ensure that it is not included in tests, which only
@@ -174,13 +174,15 @@ pub fn init_io>(
 size: u64,
 ) {
 unsafe {
-Self::do_init_io(&mut self.inner, owner.cast::(), &ops.0, 
name, size);
+Self::do_init_io(
+self.0.as_mut_ptr(),
+owner.cast::(),
+&ops.0,
+name,
+size,
+);
 }
 }
-
-pub(crate) const fn as_mut_ptr(&self) -> *mut bindings::MemoryRegion {
-addr_of!(self.inner) as *mut _
-}
 }
 
 unsafe impl ObjectType for MemoryRegion {
-- 
2.48.1

Re: [PATCH rfcv2 13/20] intel_iommu: Add PASID cache management infrastructure

2025-02-21 Thread Eric Auger





On 2/19/25 9:22 AM, Zhenzhong Duan wrote:
> This adds an new entry VTDPASIDCacheEntry in VTDAddressSpace to cache the
> pasid entry and track PASID usage and future PASID tagged DMA address
> translation support in vIOMMU.
>
> VTDAddressSpace of PCI_NO_PASID is allocated when device is plugged and
> never freed. For other pasid, VTDAddressSpace instance is created/destroyed
> per the guest pasid entry set up/destroy for passthrough devices. While for
> emulated devices, VTDAddressSpace instance is created in the PASID tagged DMA
> translation and be destroyed per guest PASID cache invalidation. This focuses
> on the PASID cache management for passthrough devices as there is no PASID
> capable emulated devices yet.
>
> When guest modifies a PASID entry, QEMU will capture the guest pasid selective
> pasid cache invalidation, allocate or remove a VTDAddressSpace instance per 
> the
> invalidation reasons:
>
> *) a present pasid entry moved to non-present
> *) a present pasid entry to be a present entry
> *) a non-present pasid entry moved to present
>
> vIOMMU emulator could figure out the reason by fetching latest guest pasid 
> entry
> and compare it with the PASID cache.
>
> Signed-off-by: Yi Liu 
> Signed-off-by: Yi Sun 
> Signed-off-by: Zhenzhong Duan 
> ---
>  hw/i386/intel_iommu_internal.h |  29 ++
>  include/hw/i386/intel_iommu.h  |   6 +
>  hw/i386/intel_iommu.c  | 484 -
Don't you have ways to split this patch. It has a huge change set and
this is really heavy to digest at once (at least for me).
>  hw/i386/trace-events   |   4 +
>  4 files changed, 513 insertions(+), 10 deletions(-)
>
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index 18bc22fc72..632fda2853 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -315,6 +315,7 @@ typedef enum VTDFaultReason {
>* request while disabled */
>  VTD_FR_IR_SID_ERR = 0x26,   /* Invalid Source-ID */
>  
> +VTD_FR_RTADDR_INV_TTM = 0x31,  /* Invalid TTM in RTADDR */
>  /* PASID directory entry access failure */
>  VTD_FR_PASID_DIR_ACCESS_ERR = 0x50,
>  /* The Present(P) field of pasid directory entry is 0 */
> @@ -492,6 +493,15 @@ typedef union VTDInvDesc VTDInvDesc;
>  #define VTD_INV_DESC_PIOTLB_RSVD_VAL0 0xfff0f1c0ULL
>  #define VTD_INV_DESC_PIOTLB_RSVD_VAL1 0xf80ULL
>  
> +#define VTD_INV_DESC_PASIDC_G  (3ULL << 4)
> +#define VTD_INV_DESC_PASIDC_PASID(val) (((val) >> 32) & 0xfULL)
> +#define VTD_INV_DESC_PASIDC_DID(val)   (((val) >> 16) & VTD_DOMAIN_ID_MASK)
> +#define VTD_INV_DESC_PASIDC_RSVD_VAL0  0xfff0f1c0ULL
> +
> +#define VTD_INV_DESC_PASIDC_DSI(0ULL << 4)
> +#define VTD_INV_DESC_PASIDC_PASID_SI   (1ULL << 4)
> +#define VTD_INV_DESC_PASIDC_GLOBAL (3ULL << 4)
> +
>  /* Information about page-selective IOTLB invalidate */
>  struct VTDIOTLBPageInvInfo {
>  uint16_t domain_id;
> @@ -548,10 +558,28 @@ typedef struct VTDRootEntry VTDRootEntry;
>  #define VTD_CTX_ENTRY_LEGACY_SIZE 16
>  #define VTD_CTX_ENTRY_SCALABLE_SIZE   32
>  
> +#define VTD_SM_CONTEXT_ENTRY_PDTS(val)  (((val) >> 9) & 0x7)
>  #define VTD_SM_CONTEXT_ENTRY_RID2PASID_MASK 0xf
>  #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw)  (0x1e0ULL | ~VTD_HAW_MASK(aw))
>  #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1  0xffe0ULL
>  
> +typedef enum VTDPCInvType {
> +/* force reset all */
> +VTD_PASID_CACHE_FORCE_RESET = 0,
> +/* pasid cache invalidation rely on guest PASID entry */
> +VTD_PASID_CACHE_GLOBAL_INV,
> +VTD_PASID_CACHE_DOMSI,
> +VTD_PASID_CACHE_PASIDSI,
> +} VTDPCInvType;
> +
> +typedef struct VTDPASIDCacheInfo {
> +VTDPCInvType type;
> +uint16_t domain_id;
> +uint32_t pasid;
> +PCIBus *bus;
> +uint16_t devfn;
> +} VTDPASIDCacheInfo;
> +
>  /* PASID Table Related Definitions */
>  #define VTD_PASID_DIR_BASE_ADDR_MASK  (~0xfffULL)
>  #define VTD_PASID_TABLE_BASE_ADDR_MASK (~0xfffULL)
> @@ -563,6 +591,7 @@ typedef struct VTDRootEntry VTDRootEntry;
>  #define VTD_PASID_TABLE_BITS_MASK (0x3fULL)
>  #define VTD_PASID_TABLE_INDEX(pasid)  ((pasid) & VTD_PASID_TABLE_BITS_MASK)
>  #define VTD_PASID_ENTRY_FPD   (1ULL << 1) /* Fault Processing 
> Disable */
> +#define VTD_PASID_TBL_ENTRY_NUM   (1ULL << 6)
>  
>  /* PASID Granular Translation Type Mask */
>  #define VTD_PASID_ENTRY_P  1ULL
> diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
> index 50f9b27a45..fbc9da903a 100644
> --- a/include/hw/i386/intel_iommu.h
> +++ b/include/hw/i386/intel_iommu.h
> @@ -95,6 +95,11 @@ struct VTDPASIDEntry {
>  uint64_t val[8];
>  };
>  
> +typedef struct VTDPASIDCacheEntry {
> +struct VTDPASIDEntry pasid_entry;
> +bool cache_filled;
> +} VTDPASIDCacheEntry;
> +
>  struct VTDAddressSpace {
>  PCIBus *bus;
>  uint8_t devfn;
> @@ -107,6 +112,7 @@ s

[PATCH 3/4] target/arm: Move softfloat specific FPCR/FPSR handling to tcg/

2025-02-21 Thread Peter Maydell

The softfloat (i.e. TCG) specific handling for the FPCR
and FPSR is abstracted behind five functions:
 arm_set_default_fp_behaviours
 arm_set_ah_fp_behaviours
 vfp_get_fpsr_from_host
 vfp_clear_float_status_exc_flags
 vfp_set_fpsr_to_host

Currently we rely on the first two calling softfloat functions that
work even in a KVM-only compile because they're defined as inline in
the softfloat header file, and we provide stub versions of the last
three in arm/vfp_helper.c if CONFIG_TCG isn't defined.

Move the softfloat-specific versions of these functions to
tcg/vfp_helper.c, and provide the non-TCG stub versions in
tcg-stubs.c.

This lets us drop the softfloat header include and the last
set of CONFIG_TCG ifdefs from arm/vfp_helper.c.

Signed-off-by: Peter Maydell 
---
 target/arm/internals.h  |   9 ++
 target/arm/tcg-stubs.c  |  22 
 target/arm/tcg/vfp_helper.c | 228 +
 target/arm/vfp_helper.c | 248 
 4 files changed, 259 insertions(+), 248 deletions(-)

diff --git a/target/arm/internals.h b/target/arm/internals.h
index b3187341456..a6ff228f9fd 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -1833,5 +1833,14 @@ int alle1_tlbmask(CPUARMState *env);
 void arm_set_default_fp_behaviours(float_status *s);
 /* Set the float_status behaviour to match Arm FPCR.AH=1 behaviour */
 void arm_set_ah_fp_behaviours(float_status *s);
+/* Read the float_status info and return the appropriate FPSR value */
+uint32_t vfp_get_fpsr_from_host(CPUARMState *env);
+/* Clear the exception status flags from all float_status fields */
+void vfp_clear_float_status_exc_flags(CPUARMState *env);
+/*
+ * Update float_status fields to handle the bits of the FPCR
+ * specified by mask changing to the values in val.
+ */
+void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask);
 
 #endif
diff --git a/target/arm/tcg-stubs.c b/target/arm/tcg-stubs.c
index f3f45d54f28..93a15cad610 100644
--- a/target/arm/tcg-stubs.c
+++ b/target/arm/tcg-stubs.c
@@ -30,3 +30,25 @@ void assert_hflags_rebuild_correctly(CPUARMState *env)
 void define_tlb_insn_regs(ARMCPU *cpu)
 {
 }
+
+/* With KVM, we never use float_status, so these can be no-ops */
+void arm_set_default_fp_behaviours(float_status *s)
+{
+}
+
+void arm_set_ah_fp_behaviours(float_status *s)
+{
+}
+
+uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
+{
+return 0;
+}
+
+void vfp_clear_float_status_exc_flags(CPUARMState *env)
+{
+}
+
+void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
+{
+}
diff --git a/target/arm/tcg/vfp_helper.c b/target/arm/tcg/vfp_helper.c
index cd6e0d0edab..b32e2f4e27c 100644
--- a/target/arm/tcg/vfp_helper.c
+++ b/target/arm/tcg/vfp_helper.c
@@ -25,6 +25,234 @@
 #include "fpu/softfloat.h"
 #include "qemu/log.h"
 
+/*
+ * Set the float_status behaviour to match the Arm defaults:
+ *  * tininess-before-rounding
+ *  * 2-input NaN propagation prefers SNaN over QNaN, and then
+ *operand A over operand B (see FPProcessNaNs() pseudocode)
+ *  * 3-input NaN propagation prefers SNaN over QNaN, and then
+ *operand C over A over B (see FPProcessNaNs3() pseudocode,
+ *but note that for QEMU muladd is a * b + c, whereas for
+ *the pseudocode function the arguments are in the order c, a, b.
+ *  * 0 * Inf + NaN returns the default NaN if the input NaN is quiet,
+ *and the input NaN if it is signalling
+ *  * Default NaN has sign bit clear, msb frac bit set
+ */
+void arm_set_default_fp_behaviours(float_status *s)
+{
+set_float_detect_tininess(float_tininess_before_rounding, s);
+set_float_ftz_detection(float_ftz_before_rounding, s);
+set_float_2nan_prop_rule(float_2nan_prop_s_ab, s);
+set_float_3nan_prop_rule(float_3nan_prop_s_cab, s);
+set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, s);
+set_float_default_nan_pattern(0b0100, s);
+}
+
+/*
+ * Set the float_status behaviour to match the FEAT_AFP
+ * FPCR.AH=1 requirements:
+ *  * tininess-after-rounding
+ *  * 2-input NaN propagation prefers the first NaN
+ *  * 3-input NaN propagation prefers a over b over c
+ *  * 0 * Inf + NaN always returns the input NaN and doesn't
+ *set Invalid for a QNaN
+ *  * default NaN has sign bit set, msb frac bit set
+ */
+void arm_set_ah_fp_behaviours(float_status *s)
+{
+set_float_detect_tininess(float_tininess_after_rounding, s);
+set_float_ftz_detection(float_ftz_after_rounding, s);
+set_float_2nan_prop_rule(float_2nan_prop_ab, s);
+set_float_3nan_prop_rule(float_3nan_prop_abc, s);
+set_float_infzeronan_rule(float_infzeronan_dnan_never |
+  float_infzeronan_suppress_invalid, s);
+set_float_default_nan_pattern(0b1100, s);
+}
+
+/* Convert host exception flags to vfp form.  */
+static inline uint32_t vfp_exceptbits_from_host(int host_bits, bool ah)
+{
+uint32_t target_bits = 0;
+
+if (host_bits & float_flag_invalid) {
+target_bits |= FPS

Re: [RFC PATCH v1 00/19] Factor out HVF's instruction emulator

2025-02-21 Thread Wei Liu

On Fri, Feb 21, 2025 at 04:53:26PM +, Peter Maydell wrote:
> On Fri, 21 Feb 2025 at 14:02, Wei Liu  wrote:
> >
> > Hi,
> >
> > Microsoft's Linux Systems Group developed a Linux driver for the Microsoft
> > Hypervisor (MSHV for short). The driver is being upstreamed. The first
> > supported VMM is Cloud Hypervisor. QEMU will be the second supported
> > VMM.
> >
> > The plan is to write an mshv accelerator in QEMU. The accelerator is still 
> > in
> > the works.
> >
> > MSHV doesn't emulate instructions. VMMs are supposed to bring their own
> > instruction emulator. The path we've chosen is to reuse what's already in 
> > QEMU.
> > The instruction emulator in HVF looks good for what we need.
> >
> > This patch series attempts to make the instruction emulator in HVF a common
> > component for the i386 target. It removes HVF specific code by either using 
> > a
> > set of hooks or moving it to better locations. The new incoming MSHV
> > accelerator will implement the hooks, and where necessary, enhance the 
> > emulator
> > and / or add new hooks.
> 
> If you want to make the hvf decoder more widely used you might want
> to look at this old patch to it that was never applied (issues in
> code review not addressed by the submitter):
> 
> https://lore.kernel.org/qemu-devel/CAFEAcA8yaBOD3KXc-DY94oqzC5wkCENPkePgVCybqR=9nmd...@mail.gmail.com/
> 
> which is trying to fix a problem where an overlong string of
> prefix bytes causes the decoder to misbehave.
> 

Thanks for the information.

> (PS: if in the future you should ever find yourself wanting to do an
> equivalent "decode loads/stores the hypervisor doesn't handle"
> for Arm, use decodetree, not a hand-rolled decoder...)
> 

Noted. Yep, we have plans to add ARM64 support in the future.

Thanks,
Wei.

> thanks
> -- PMM

Re: [PATCH v3 3/9] target/arm: Make CNTPS_* UNDEF from Secure EL1 when Secure EL2 is enabled

2025-02-21 Thread Peter Maydell

On Fri, 21 Feb 2025 at 18:02, Alex Bennée  wrote:
>
> Peter Maydell  writes:
>
> > When we added Secure EL2 support, we missed that this needs an update
> > to the access code for the EL3 physical timer registers.  These are
> > supposed to UNDEF from Secure EL1 when Secure EL2 is enabled.
> >
> > Cc: qemu-sta...@nongnu.org
> > Signed-off-by: Peter Maydell 
> > ---
> >  target/arm/helper.c | 3 +++
> >  1 file changed, 3 insertions(+)
> >
> > diff --git a/target/arm/helper.c b/target/arm/helper.c
> > index ac8cb428925..7ec1e6cfaab 100644
> > --- a/target/arm/helper.c
> > +++ b/target/arm/helper.c
> > @@ -2387,6 +2387,9 @@ static CPAccessResult gt_stimer_access(CPUARMState 
> > *env,
> >  if (!arm_is_secure(env)) {
> >  return CP_ACCESS_UNDEFINED;
> >  }
>
> Hmm this failed to apply as b4d3978c2f (target-arm: Add the AArch64 view
> of the Secure physical timer) has the above as CP_ACCESS_TRAP. I guess
> because I didn't apply 20250130182309.717346-1-peter.mayd...@linaro.org.
> I guess this needs fixing up for stable.

There is a Based-on: tag in the cover letter which will tell you
what this series should be based on if you want to apply it.

Yes, we'll need to either tweak this commit for stable
(i.e. use CP_ACCESS_TRAP_UNCATEGORIZED instead of UNDEFINED)
or else pull in the refactoring patches it depends on.

thanks
-- PMM

Re: [PATCH 1/1] [RISC-V/RVV] Generate strided vector loads/stores with tcg nodes.

2025-02-21 Thread Daniel Henrique Barboza





On 2/11/25 3:20 PM, Paolo Savini wrote:

This commit improves the performance of QEMU when emulating strided vector
loads and stores by substituting the call for the helper function with the
generation of equivalend TCG operations.


s/equivalend/equivalent



Signed-off-by: Paolo Savini 
---
  target/riscv/insn_trans/trans_rvv.c.inc | 294 
  1 file changed, 244 insertions(+), 50 deletions(-)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index b9883a5d32..01798b0f7f 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -802,32 +802,257 @@ GEN_VEXT_TRANS(vlm_v, MO_8, vlm_v, ld_us_mask_op, 
ld_us_mask_check)
  GEN_VEXT_TRANS(vsm_v, MO_8, vsm_v, st_us_mask_op, st_us_mask_check)
  
  /*

- *** stride load and store
+ * MAXSZ returns the maximum vector size can be operated in bytes,
+ * which is used in GVEC IR when vl_eq_vlmax flag is set to true
+ * to accelerate vector operation.
   */
-typedef void gen_helper_ldst_stride(TCGv_ptr, TCGv_ptr, TCGv,
-TCGv, TCGv_env, TCGv_i32);
+static inline uint32_t MAXSZ(DisasContext *s)
+{
+int max_sz = s->cfg_ptr->vlenb << 3;
+return max_sz >> (3 - s->lmul);
+}
+
+static inline uint32_t get_log2(uint32_t a)
+{
+uint32_t i = 0;
+for (; a > 0;) {
+a >>= 1;
+i++;
+}
+return i;
+}
+
+typedef void gen_tl_store(TCGv, TCGv_ptr, tcg_target_long);
+typedef void gen_tl_load(TCGv, TCGv_ptr, tcg_target_long);
  
  static bool ldst_stride_trans(uint32_t vd, uint32_t rs1, uint32_t rs2,

-  uint32_t data, gen_helper_ldst_stride *fn,
-  DisasContext *s)
+  uint32_t data, DisasContext *s, bool is_load)
  {
-TCGv_ptr dest, mask;
-TCGv base, stride;
-TCGv_i32 desc;
+if (!s->vstart_eq_zero) {
+return false;
+}
  
-dest = tcg_temp_new_ptr();

-mask = tcg_temp_new_ptr();
-base = get_gpr(s, rs1, EXT_NONE);
-stride = get_gpr(s, rs2, EXT_NONE);
-desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlenb,
-  s->cfg_ptr->vlenb, data));
+TCGv addr = tcg_temp_new();
+TCGv base = get_gpr(s, rs1, EXT_NONE);
+TCGv stride = get_gpr(s, rs2, EXT_NONE);
+TCGv dest = tcg_temp_new();
+TCGv mask = tcg_temp_new();
+
+uint32_t nf = FIELD_EX32(data, VDATA, NF);
+uint32_t vm = FIELD_EX32(data, VDATA, VM);
+uint32_t max_elems = MAXSZ(s) >> s->sew;
+uint32_t max_elems_log2 = get_log2(max_elems);
+uint32_t esz = 1 << s->sew;
+
+TCGv i = tcg_temp_new();
+TCGv i_esz = tcg_temp_new();
+TCGv k = tcg_temp_new();
+TCGv k_esz = tcg_temp_new();
+TCGv k_max = tcg_temp_new();
+TCGv vreg = tcg_temp_new();
+TCGv dest_offs = tcg_temp_new();
+TCGv stride_offs = tcg_temp_new();
+TCGv mask_offs = tcg_temp_new();
+TCGv mask_offs_64 = tcg_temp_new();
+TCGv mask_elem = tcg_temp_new();
+TCGv mask_offs_rem = tcg_temp_new();
+TCGv tail_cnt = tcg_temp_new();
+TCGv tail_tot = tcg_temp_new();
+TCGv tail_addr = tcg_temp_new();
+
+TCGLabel *start = gen_new_label();
+TCGLabel *end = gen_new_label();
+TCGLabel *start_k = gen_new_label();
+TCGLabel *inc_k = gen_new_label();
+TCGLabel *end_k = gen_new_label();
+TCGLabel *start_tail = gen_new_label();
+TCGLabel *end_tail = gen_new_label();
+TCGLabel *start_tail_st = gen_new_label();
+TCGLabel *end_tail_st = gen_new_label();


The code LGTM but IMO there's too much stuff going in the same function, as it 
is noticeable
by the amount of labels and tcg temps being created right off the bat.

I would divide this function in at least 2 helpers:

- static void gen_ldst_main_loop():  all the tcgops that implement the main 
strided
load/store loop, and all the relevant logic that is required to do so (e.g. 
atomicity)

- static void gen_ldst_tail_bytes(): the tcgops code in the end where the tail 
is set
to 1


And then ldst_stride_trans() would use the helpers. This makes the code easier 
to
follow up. Thanks,

Daniel



+
+/* Destination register and mask register */
+tcg_gen_addi_tl(dest, (TCGv)tcg_env, vreg_ofs(s, vd));
+tcg_gen_addi_tl(mask, (TCGv)tcg_env, vreg_ofs(s, 0));
+
+MemOp atomicity = MO_ATOM_NONE;
+if (s->sew == 0) {
+atomicity = MO_ATOM_NONE;
+} else {
+atomicity = MO_ATOM_IFALIGN_PAIR;
+}
  
-tcg_gen_addi_ptr(dest, tcg_env, vreg_ofs(s, vd));

-tcg_gen_addi_ptr(mask, tcg_env, vreg_ofs(s, 0));
+/*
+ * Select the appropriate load/tore to retrieve data from the vector
+ * register given a specific sew.
+ */
+static gen_tl_load * const ld_fns[4] = {
+tcg_gen_ld8u_tl, tcg_gen_ld16u_tl,
+tcg_gen_ld32u_tl, tcg_gen_ld_tl
+};
+gen_tl_load *ld_fn = ld_fns[s->sew];
+
+static gen_tl_store * const st_fns[4] = {
+

Re: [PATCH 00/42] docs: add sphinx-domain rST generator to qapidoc

2025-02-21 Thread John Snow

On Fri, Feb 21, 2025 at 1:42 AM Markus Armbruster  wrote:

> John Snow  writes:
>
> > On Wed, Feb 19, 2025 at 8:22 AM Markus Armbruster 
> wrote:
> >
> >> John Snow  writes:
> >>
> >> > "The text handler you add looks just like the existing latex handler.
> Does
> >> > LaTeX output lack "little headings", too?"
> >> >
> >> > Yes, almost certainly. Can you let me know which output formats we
> actually
> >> > "care about"? I'll have to test them all.
> >>
> >> As far as I can tell, our build system runs sphinx-build -b html and -b
> >> man.
> >>
> >> I run it with -b text manually all the time to hunt for and review
> >> changes in output.  I'd prefer to keep it working if practical.
> >>
> >> For what it's worth, there is a bit of LaTeX configuration in
> >> docs/conf.py.
> >>
> >> >   In the meantime, I upgraded
> my
> >> > patch so that the text translator properly handles branches with
> headings
> >> > that delineate the different branches so that the text output is fully
> >> > reasonable. I will need to do the same for any format we care about.
> >> >
> >> > I've re-pushed as of "about 30 minutes before I wrote this email" --
> >> > https://gitlab.com/jsnow/qemu/-/commits/sphinx-domain-blergh2
> >> >
> >> > This branch includes the text generator fixes (which technically
> belong
> >> > with the predecessor series we skipped, but I'll refactor that later.)
> >> > it also includes fixes to the branch inliner, generated return
> statements,
> >> > and generated out-of-band feature sections.
> >>
> >> I'll fetch it, thanks!
> >>
> >> > (Long story short: inserting new sections in certain spots was broken
> >> > because of cache. Oops. We can discuss more why I wrote that part of
> the
> >> > code like I did in review for the patch that introduced that problem.
> It's
> >> > the "basic inliner" patch.)
> >> >
> >> > Below, I'm going to try a new communication approach where I
> explicitly say
> >> > if I have added something to my tasklist or not so that it's clear to
> you
> >> > what I believe is actionable (and what I am agreeing to change) and
> what I
> >> > believe needs stronger input from you before I do anything. Apologies
> if it
> >> > seems a little robotic, just trying new things O:-)
> >> >
> >> > On that note: not added to tasklist: do we need the LaTeX handler? Do
> we
> >> > need any others? Please confirm O:-)
> >>
> >> See above.
> >>
> >
> > I've got html and text working, text wasn't hard. I will give it a good
> > college try on the LaTeX and man formats. Might be easy. The issue here
> is
> > the custom node I introduced for the collapsible details sections which
> has
> > no default handler in the generators. I'll have to learn more about that
> > part of the API, I haven't interfaced with it much yet.
>
> Understand.
>
> Have you considered cutting the series in half before the inliner?
> First part emits "The members of ..." like the old doc generator.
> Second part replaces that with inlined material.
>
> We could totally release with just the first half!  Inlining is great,
> but even without it, your work looks so much better and is so much more
> usable.
>

I may indeed just do that... though we still need to solve "where to put
the ifcond data?" question. The documentation culling also must be held
back in this case too, which I am fine with.

Let me fork my work (again) and see how complicated an inlinerless version
would be... maybe that's a great way to flush the queue. maybe.


>
> >> > On Fri, Feb 14, 2025 at 7:05 AM Markus Armbruster 
> wrote:
> >> >
> >> >> I started to eyeball old and new generated output side by side.
> >> >>
> >> >> New table of contents shows one level, old two.  No objection; the
> >> >> navigation thingie on the left is more useful anyway.
> >> >>
> >> >
> >> > Unintentional, but if you like it, it's fine by me. Nothing added to
> my
> >> > tasklist.
> >>
> >> Mention in a commit message.
> >>
> >
> > Sure. I... just need to figure out which commit to mention it in. Added
> to
> > my list, anyway.
>

It turns out this happens in the "example" doc patch, it's just a setting
in index.rst. I didn't even intend to commit that patch anyway. So this is
a nothing-burger.


> >
> >
> >>
> >> >> The new generator elides unreferenced types.  Generally good, but two
> >> >> observations:
> >> >>
> >> >> * QapiErrorClass is unreferenced, but its members are mentioned in
> >> >>   Errors sections.  QapiErrorClass serves as better than nothing
> error
> >> >>   code documentation, but it's gone in the new doc.  So this is a
> minor
> >> >>   regression.  We can figure out what to do about it later.
> >> >>
> >> >
> >> > Right. I debated making the members references to that class, but
> recalled
> >> > that you disliked this class and figured you'd not like such a
> change, so I
> >> > just left it alone. I do not have cross-references for individual
> members
> >> > of objects at all yet anyway, so this is definitely

Re: [RFC PATCH v1 00/19] Factor out HVF's instruction emulator

2025-02-21 Thread Paolo Bonzini


On 2/21/25 09:36, Wei Liu wrote:

This patch series attempts to make the instruction emulator in HVF a common
component for the i386 target. It removes HVF specific code by either using a
set of hooks or moving it to better locations. The new incoming MSHV
accelerator will implement the hooks, and where necessary, enhance the emulator
and / or add new hooks.


Good!


This patch series is in RFC state. The patches have been lightly tested by
running a Linux VM on an Intel-based Mac.  We hope to get some feedback on the
overall approach, and let the community bikeshed a bit about names and
location.


For the bikeshedding my only suggestion is to replace mmio_buf with 
emu_mmio_buf, and replace x86-insn-emul, with just "emulate" or 
something like that.  That is, no need to repeat x86 inside the 
target/i386 directory, especially since the filenames also start with x86.



First two patches fix issues in the existing code. They can be applied
regardless of the discussion around the overall approach.


These four can also be applied:

 target/i386/hvf: use x86_segment in x86_decode.c
 target/i386/hvf: move and rename {load, store}_regs
 target/i386/hvf: move and rename simulate_{rdmsr, wrmsr}
 target/i386/hvf: drop some dead code


The checkpatch script complains about a few things. Some are from the original
code I didn't touch. For the code I changed or moved, it complains that some
lines are long (>80). Seeing that the rule was not followed strictly in the old
code base, I held off fixing that class of issues. The other thing it complains
is there is no entry for the new directory in MAINTAINERS. We can fix these
issues if they are deemed important.


Yes, no problem.  The new directory thing is just a warning but I think 
you could add a new entry with both MSHV and HVF people on it.



Please let us know what you think. The alternative is to duplicate the
instruction emulator code in the mshv accelerator. That looks to be a worse
option.

Yes, definitely.

Paolo

[Stable-9.2.2 01/14] rust: add --rust-target option for bindgen

2025-02-21 Thread Michael Tokarev

From: Paolo Bonzini 

Without it, recent bindgen will give an error

   error: extern block cannot be declared unsafe

if rustc is not new enough to support the "unsafe extern" construct.

Cc: qemu-r...@nongnu.org
Cc: qemu-sta...@nongnu.org
Signed-off-by: Paolo Bonzini 
Message-ID: <20250206111514.2134895-1-pbonz...@redhat.com>
Signed-off-by: Stefan Hajnoczi 
(cherry picked from commit 131c58469f6fb68c89b38fee6aba8bbb20c7f4bf)
Signed-off-by: Michael Tokarev 

diff --git a/meson.build b/meson.build
index 7a3faca61d..7f6f638676 100644
--- a/meson.build
+++ b/meson.build
@@ -4050,6 +4050,9 @@ if have_rust
   bindgen_args += ['--formatter', 'none']
 endif
   endif
+  if bindgen.version().version_compare('>=0.66.0')
+bindgen_args += ['--rust-target', '1.59']
+  endif
   if bindgen.version().version_compare('<0.61.0')
 # default in 0.61+
 bindgen_args += ['--size_t-is-usize']
-- 
2.39.5

[PATCH 08/15] rust: timer: wrap QEMUTimer with Opaque<>

2025-02-21 Thread Paolo Bonzini

Signed-off-by: Paolo Bonzini 
---
 meson.build|  7 ---
 rust/qemu-api/src/timer.rs | 24 +---
 2 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/meson.build b/meson.build
index 8ed10b6624e..16c76c493f3 100644
--- a/meson.build
+++ b/meson.build
@@ -4087,13 +4087,6 @@ if have_rust
   foreach enum : c_bitfields
 bindgen_args += ['--bitfield-enum', enum]
   endforeach
-  c_nocopy = [
-'QEMUTimer',
-  ]
-  # Used to customize Drop trait
-  foreach struct : c_nocopy
-bindgen_args += ['--no-copy', struct]
-  endforeach
 
   # TODO: Remove this comment when the clang/libclang mismatch issue is solved.
   #
diff --git a/rust/qemu-api/src/timer.rs b/rust/qemu-api/src/timer.rs
index a593538917a..0305a0385ad 100644
--- a/rust/qemu-api/src/timer.rs
+++ b/rust/qemu-api/src/timer.rs
@@ -7,10 +7,23 @@
 use crate::{
 bindings::{self, qemu_clock_get_ns, timer_del, timer_init_full, timer_mod, 
QEMUClockType},
 callbacks::FnCall,
+cell::Opaque,
 };
 
-pub type Timer = bindings::QEMUTimer;
-pub type TimerListGroup = bindings::QEMUTimerListGroup;
+/// A safe wrapper around [`bindings::QEMUTimer`].
+#[repr(transparent)]
+#[derive(Debug, Default, qemu_api_macros::Wrapper)]
+pub struct Timer(Opaque);
+
+unsafe impl Send for Timer {}
+unsafe impl Sync for Timer {}
+
+#[repr(transparent)]
+#[derive(qemu_api_macros::Wrapper)]
+pub struct TimerListGroup(Opaque);
+
+unsafe impl Send for TimerListGroup {}
+unsafe impl Sync for TimerListGroup {}
 
 impl Timer {
 pub const MS: u32 = bindings::SCALE_MS;
@@ -21,10 +34,6 @@ pub fn new() -> Self {
 Default::default()
 }
 
-const fn as_mut_ptr(&self) -> *mut Self {
-self as *const Timer as *mut _
-}
-
 pub fn init_full<'timer, 'opaque: 'timer, T, F>(
 &'timer mut self,
 timer_list_group: Option<&TimerListGroup>,
@@ -51,7 +60,7 @@ pub fn init_full<'timer, 'opaque: 'timer, T, F>(
 // SAFETY: the opaque outlives the timer
 unsafe {
 timer_init_full(
-self,
+self.as_mut_ptr(),
 if let Some(g) = timer_list_group {
 g as *const TimerListGroup as *mut _
 } else {
@@ -75,6 +84,7 @@ pub fn delete(&self) {
 }
 }
 
+// FIXME: use something like PinnedDrop from the pinned_init crate
 impl Drop for Timer {
 fn drop(&mut self) {
 self.delete()
-- 
2.48.1

Re: [RFC PATCH] hw/display: add blocklist for known bad drivers

2025-02-21 Thread Philippe Mathieu-Daudé


On 21/2/25 17:01, Alex Bennée wrote:

While running the new GPU tests it was noted that the proprietary
nVidia driver barfed when run under the sanitiser:

   2025-02-20 11:13:08,226: [11:13:07.782] Output 'headless' attempts
   EOTF mode SDR and colorimetry mode default.
   2025-02-20 11:13:08,227: [11:13:07.784] Output 'headless' using color
   profile: stock sRGB color profile

   and that's the last thing it outputs.

   The sanitizer reports that when the framework sends the SIGTERM
   because of the timeout we get a write to a NULL pointer (but
   interesting not this time in an atexit callback):

   UndefinedBehaviorSanitizer:DEADLYSIGNAL
   ==471863==ERROR: UndefinedBehaviorSanitizer: SEGV on unknown address
   0x (pc 0x7a18ceaafe80 bp 0x sp 0x7ffe8e3ff6d0
   T471863)
   ==471863==The signal is caused by a WRITE memory access.
   ==471863==Hint: address points to the zero page.
   #0 0x7a18ceaafe80
   (/lib/x86_64-linux-gnu/libnvidia-eglcore.so.535.183.01+0x16afe80)
   (BuildId: 24b0d0b90369112e3de888a93eb8d7e00304a6db)
   #1 0x7a18ce9e72c0
   (/lib/x86_64-linux-gnu/libnvidia-eglcore.so.535.183.01+0x15e72c0)
   (BuildId: 24b0d0b90369112e3de888a93eb8d7e00304a6db)
   #2 0x7a18ce9f11bb
   (/lib/x86_64-linux-gnu/libnvidia-eglcore.so.535.183.01+0x15f11bb)
   (BuildId: 24b0d0b90369112e3de888a93eb8d7e00304a6db)
   #3 0x7a18ce6dc9d1
   (/lib/x86_64-linux-gnu/libnvidia-eglcore.so.535.183.01+0x12dc9d1)
   (BuildId: 24b0d0b90369112e3de888a93eb8d7e00304a6db)
   #4 0x7a18e7d15326 in vrend_renderer_create_fence
   
/usr/src/virglrenderer-1.0.0-1ubuntu2/obj-x86_64-linux-gnu/../src/vrend_renderer.c:10883:26
   #5 0x55bfb6621871 in virtio_gpu_virgl_process_cmd

The #dri-devel channel confirmed:

stsquad: nv driver is known to not work with venus, don't use
   it for testing

So lets implement a blocklist to stop users starting a known bad
setup.

Reported-by: Peter Maydell 
Signed-off-by: Alex Bennée 
Cc: Dmitry Osipenko 
---
  meson.build   |   4 +
  include/qemu/host-gpu.h   |  23 +
  hw/display/virtio-gpu.c   |   4 +
  stubs/host-gpu.c  |  17 
  util/host-gpu.c   | 102 ++
  stubs/meson.build |   4 +
  tests/functional/test_aarch64_virt_gpu.py |   2 +
  util/meson.build  |   2 +
  8 files changed, 158 insertions(+)
  create mode 100644 include/qemu/host-gpu.h
  create mode 100644 stubs/host-gpu.c
  create mode 100644 util/host-gpu.c

diff --git a/meson.build b/meson.build
index 4588bfd864..8f4a431445 100644
--- a/meson.build
+++ b/meson.build
@@ -1373,12 +1373,16 @@ if not get_option('qatzip').auto() or have_system
  endif
  
  virgl = not_found

+vulkan = not_found
  
  have_vhost_user_gpu = have_tools and host_os == 'linux' and pixman.found()

  if not get_option('virglrenderer').auto() or have_system or 
have_vhost_user_gpu
virgl = dependency('virglrenderer',
   method: 'pkg-config',
   required: get_option('virglrenderer'))
+  vulkan = dependency('vulkan',
+  method: 'pkg-config',
+  required: get_option('virglrenderer'))
  endif
  rutabaga = not_found
  if not get_option('rutabaga_gfx').auto() or have_system or have_vhost_user_gpu
diff --git a/include/qemu/host-gpu.h b/include/qemu/host-gpu.h
new file mode 100644
index 00..45053c2f77
--- /dev/null
+++ b/include/qemu/host-gpu.h
@@ -0,0 +1,23 @@
+/*
+ * Utility functions to probe host GPU
+ *
+ * Copyright (c) 2025 Linaro Ltd
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#ifndef HOST_GPU_H
+#define HOST_GPU_H
+
+#include "qapi/error.h"
+
+/**
+ * validate_vulkan_backend() - verify working backend
+ *
+ * errp: error pointer
+ *
+ * If the system vulkan implementation is known to not work return
+ * false otherwise true.
+ */
+bool validate_vulkan_backend(Error **errp);
+
+#endif /* HOST_GPU_H */
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index 11a7a85750..816eedf838 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -32,6 +32,7 @@
  #include "qemu/module.h"
  #include "qapi/error.h"
  #include "qemu/error-report.h"
+#include "qemu/host-gpu.h"
  
  #define VIRTIO_GPU_VM_VERSION 1
  
@@ -1498,6 +1499,9 @@ void virtio_gpu_device_realize(DeviceState *qdev, Error **errp)

  error_setg(errp, "venus requires enabled blob and hostmem 
options");
  return;
  }


Why don't we check VIRTIO_GPU_FLAG_VENUS_ENABLED in 
virtio_gpu_gl_device_realize()?



+if (!validate_vulkan_backend(errp)) {
+return;
+}
  #else
  error_setg(errp, "old virglrenderer, venus unsupported");
  return;
diff --git a/stubs/host-gpu.c b/stubs/host-gpu.c
new file mode 100644
index 00..7bf76ee4f6
--- /dev/null
+++ b/stubs/host-gpu.c
@@ -0,0

[PATCH 09/15] rust: irq: wrap IRQState with Opaque<>

2025-02-21 Thread Paolo Bonzini

Signed-off-by: Paolo Bonzini 
---
 rust/qemu-api/src/irq.rs| 15 ++-
 rust/qemu-api/src/sysbus.rs |  1 +
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/rust/qemu-api/src/irq.rs b/rust/qemu-api/src/irq.rs
index d1c9dc96eff..aec2825b2f9 100644
--- a/rust/qemu-api/src/irq.rs
+++ b/rust/qemu-api/src/irq.rs
@@ -9,10 +9,16 @@
 
 use crate::{
 bindings::{self, qemu_set_irq},
+cell::Opaque,
 prelude::*,
 qom::ObjectClass,
 };
 
+/// An opaque wrapper around [`bindings::IRQState`].
+#[repr(transparent)]
+#[derive(Debug, qemu_api_macros::Wrapper)]
+pub struct IRQState(Opaque);
+
 /// Interrupt sources are used by devices to pass changes to a value (typically
 /// a boolean).  The interrupt sink is usually an interrupt controller or
 /// GPIO controller.
@@ -22,8 +28,7 @@
 /// method sends a `true` value to the sink.  If the guest has to see a
 /// different polarity, that change is performed by the board between the
 /// device and the interrupt controller.
-pub type IRQState = bindings::IRQState;
-
+///
 /// Interrupts are implemented as a pointer to the interrupt "sink", which has
 /// type [`IRQState`].  A device exposes its source as a QOM link property 
using
 /// a function such as [`SysBusDeviceMethods::init_irq`], and
@@ -41,7 +46,7 @@ pub struct InterruptSource
 where
 c_int: From,
 {
-cell: BqlCell<*mut IRQState>,
+cell: BqlCell<*mut bindings::IRQState>,
 _marker: PhantomData,
 }
 
@@ -80,11 +85,11 @@ pub fn set(&self, level: T) {
 }
 }
 
-pub(crate) const fn as_ptr(&self) -> *mut *mut IRQState {
+pub(crate) const fn as_ptr(&self) -> *mut *mut bindings::IRQState {
 self.cell.as_ptr()
 }
 
-pub(crate) const fn slice_as_ptr(slice: &[Self]) -> *mut *mut IRQState {
+pub(crate) const fn slice_as_ptr(slice: &[Self]) -> *mut *mut 
bindings::IRQState {
 assert!(!slice.is_empty());
 slice[0].as_ptr()
 }
diff --git a/rust/qemu-api/src/sysbus.rs b/rust/qemu-api/src/sysbus.rs
index 04821a2b9b3..48803a655f9 100644
--- a/rust/qemu-api/src/sysbus.rs
+++ b/rust/qemu-api/src/sysbus.rs
@@ -79,6 +79,7 @@ fn mmio_map(&self, id: u32, addr: u64) {
 fn connect_irq(&self, id: u32, irq: &Owned) {
 assert!(bql_locked());
 let id: i32 = id.try_into().unwrap();
+let irq: &IRQState = irq;
 unsafe {
 bindings::sysbus_connect_irq(self.as_mut_ptr(), id, 
irq.as_mut_ptr());
 }
-- 
2.48.1

[Stable-9.2.2 00/14] Patch Round-up for stable 9.2.2, freeze on 2025-02-23

2025-02-21 Thread Michael Tokarev

The following patches are queued for QEMU stable v9.2.2:

  https://gitlab.com/qemu-project/qemu/-/commits/staging-9.2

Patch freeze is 2025-02-23, and the release is planned for 2025-02-24:

  https://wiki.qemu.org/Planning/9.2

This is a short-cycle release to fix issues with the uploaded 9.2.1
tarball - due to some caching on the site and two versions of the
9.2.1 tarball, there are some issues with signature verification.
Since there were a few important patches already queued up, so I'm
including these too.

Please respond here or CC qemu-sta...@nongnu.org on any additional patches
you think should (or shouldn't) be included in the release.

The changes which are staging for inclusion, with the original commit hash
from master branch, are given below the bottom line.

Thanks!

/mjt

--
01 131c58469f6f Paolo Bonzini:
   rust: add --rust-target option for bindgen
02 23ea425c14d3 Fabiano Rosas:
   block: Fix leak in send_qmp_error_event
03 107c551de0d7 Peter Krempa:
   block-backend: Fix argument order when calling 
   'qapi_event_send_block_io_error()'
04 27a8d899c7a1 Khem Raj:
   linux-user: Do not define struct sched_attr if libc headers do
05 1e3d4d9a1a32 Laurent Vivier:
   qmp: update vhost-user protocol feature maps
06 66a1b4991c32 Thomas Huth:
   gitlab-ci.d/cirrus: Update the FreeBSD job to v14.2
07 7b3d5b84cbd7 Zhenzhong Duan:
   vfio/iommufd: Fix SIGSEV in iommufd_cdev_attach()
08 4dafba778aa3 Volker Rümelin:
   ui/sdl2: reenable the SDL2 Windows keyboard hook procedure
09 b79b05d1a06a Michael Roth:
   make-release: don't rely on $CWD when excluding subproject directories
10 937df81af675 Peter Maydell:
   hw/net/smc91c111: Ignore attempt to pop from empty RX fifo
11 4b7b20a3b72c Fabiano Rosas:
   elfload: Fix alignment when unmapping excess reservation
12 807c3ebd1e3f Mikael Szreder:
   target/sparc: Fix register selection for all F*TOx and FxTO* instructions
13 7a74e468089a Mikael Szreder:
   target/sparc: Fix gdbstub incorrectly handling registers f32-f62
14 f141caa270af Michael Tokarev:
   net/slirp: libslirp 4.9.0 compatibility

[Stable-9.2.2 05/14] qmp: update vhost-user protocol feature maps

2025-02-21 Thread Michael Tokarev

From: Laurent Vivier 

Add VHOST_USER_PROTOCOL_F_SHARED_OBJECT and
VHOST_USER_PROTOCOL_F_DEVICE_STATE protocol feature maps to
the virtio introspection.

Cc: jonah.pal...@oracle.com
Fixes: 160947666276 ("vhost-user: add shared_object msg")
Cc: aest...@redhat.com
Fixes: cda83adc62b6 ("vhost-user: Interface for migration state transfer")
Cc: hre...@redhat.com
Signed-off-by: Laurent Vivier 
Signed-off-by: Michael Tokarev 
(cherry picked from commit 1e3d4d9a1a32ac6835f0d295a5117851c421fb5d)
Signed-off-by: Michael Tokarev 

diff --git a/hw/virtio/virtio-qmp.c b/hw/virtio/virtio-qmp.c
index 6fe761..8a32a3b105 100644
--- a/hw/virtio/virtio-qmp.c
+++ b/hw/virtio/virtio-qmp.c
@@ -121,6 +121,12 @@ static const qmp_virtio_feature_map_t 
vhost_user_protocol_map[] = {
 FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_STATUS, \
 "VHOST_USER_PROTOCOL_F_STATUS: Querying and notifying back-end "
 "device status supported"),
+FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_SHARED_OBJECT, \
+"VHOST_USER_PROTOCOL_F_SHARED_OBJECT: Backend shared object "
+"supported"),
+FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_DEVICE_STATE, \
+"VHOST_USER_PROTOCOL_F_DEVICE_STATE: Backend device state transfer 
"
+"supported"),
 { -1, "" }
 };
 
-- 
2.39.5

[Stable-9.2.2 09/14] make-release: don't rely on $CWD when excluding subproject directories

2025-02-21 Thread Michael Tokarev

From: Michael Roth 

The current logic scans qemu.git/subprojects/ from *.wrap files to
determine whether or not to include the associated directories in the
release tarballs. However, the script assumes that it is being run from
the top-level of the source directory, which may not always be the case.
In particular, when generating releases via, e.g.:

  make qemu-9.2.1.tar.xz

the $CWD will either be an arbitrary external build directory, or
qemu.git/build, and the exclusions will not be processed as expected.
Fix this by using the $src parameter passed to the script as the root
directory for the various subproject/ paths referenced by this logic.

Also, the error case at the beginning of the subproject_dir() will not
result in the error message being printed, and will instead produce an
error message about "error" not being a valid command. Fix this by using
basic shell commands.

Fixes: be27b5149c86 ("make-release: only leave tarball of wrap-file 
subprojects")
Cc: Paolo Bonzini 
Cc: Michael Tokarev 
Cc: qemu-sta...@nongnu.org
Signed-off-by: Michael Roth 
Signed-off-by: Michael Tokarev 
(cherry picked from commit b79b05d1a06a013447ea93b81c07612766b735f2)
Signed-off-by: Michael Tokarev 

diff --git a/scripts/make-release b/scripts/make-release
index 2885e87210..1b89b3423a 100755
--- a/scripts/make-release
+++ b/scripts/make-release
@@ -11,8 +11,9 @@
 # See the COPYING file in the top-level directory.
 
 function subproject_dir() {
-if test ! -f "subprojects/$1.wrap"; then
-  error "scripts/archive-source.sh should only process wrap subprojects"
+if test ! -f "$src/subprojects/$1.wrap"; then
+  echo "scripts/archive-source.sh should only process wrap subprojects"
+  exit 1
 fi
 
 # Print the directory key of the wrap file, defaulting to the
@@ -26,7 +27,7 @@ function subproject_dir() {
   -e's///p' \
   -e'q' \
   -e '}' \
-  "subprojects/$1.wrap")
+  "$src/subprojects/$1.wrap")
 
 echo "${dir:-$1}"
 }
@@ -76,7 +77,7 @@ popd
 exclude=(--exclude=.git)
 # include the tarballs in subprojects/packagecache but not their expansion
 for sp in $SUBPROJECTS; do
-if grep -xqF "[wrap-file]" subprojects/$sp.wrap; then
+if grep -xqF "[wrap-file]" $src/subprojects/$sp.wrap; then
   exclude+=(--exclude=subprojects/"$(subproject_dir $sp)")
 fi
 done
-- 
2.39.5

[Stable-9.2.2 07/14] vfio/iommufd: Fix SIGSEV in iommufd_cdev_attach()

2025-02-21 Thread Michael Tokarev

From: Zhenzhong Duan 

When iommufd_cdev_ram_block_discard_disable() fails for whatever reason,
errp should be set or else SIGSEV is triggered in vfio_realize() when
error_prepend() is called.

By this chance, use the same error message for both legacy and iommufd
backend.

Fixes: 5ee3dc7af785 ("vfio/iommufd: Implement the iommufd backend")
Signed-off-by: Zhenzhong Duan 
Reviewed-by: Eric Auger 
Link: https://lore.kernel.org/r/20250116102307.260849-1-zhenzhong.d...@intel.com
Signed-off-by: Cédric Le Goater 
(cherry picked from commit 7b3d5b84cbd742356a1afc6b0fa489d0663f235d)
Signed-off-by: Michael Tokarev 

diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index e7bece4ea1..a60c899dc6 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -515,8 +515,8 @@ static bool iommufd_cdev_attach(const char *name, 
VFIODevice *vbasedev,
 } else {
 ret = iommufd_cdev_ram_block_discard_disable(true);
 if (ret) {
-error_setg(errp,
-  "Cannot set discarding of RAM broken (%d)", ret);
+error_setg_errno(errp, -ret,
+ "Cannot set discarding of RAM broken");
 goto err_discard_disable;
 }
 goto found_container;
@@ -544,6 +544,7 @@ static bool iommufd_cdev_attach(const char *name, 
VFIODevice *vbasedev,
 
 ret = iommufd_cdev_ram_block_discard_disable(true);
 if (ret) {
+error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken");
 goto err_discard_disable;
 }
 
-- 
2.39.5

[Stable-9.2.2 12/14] target/sparc: Fix register selection for all FTOx and FxTO instructions

2025-02-21 Thread Michael Tokarev

From: Mikael Szreder 

A bug was introduced in commit 0bba7572d40d which causes the fdtox
and fqtox instructions to incorrectly select the destination registers.
More information and a test program can be found in issue #2802.

Cc: qemu-sta...@nongnu.org
Fixes: 0bba7572d40d ("target/sparc: Perform DFPREG/QFPREG in decodetree")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2802
Signed-off-by: Mikael Szreder 
Acked-by: Artyom Tarasenko 
[rth: Squash patches together, since the second fixes a typo in the first.]
Signed-off-by: Richard Henderson 
Message-ID: <20250205090333.19626-3-...@miszr.win>
(cherry picked from commit 807c3ebd1e3fc2a1be6cdfc702ccea3fa0d2d9b2)
Signed-off-by: Michael Tokarev 

diff --git a/target/sparc/insns.decode b/target/sparc/insns.decode
index 989c20b44a..cfcdf6690e 100644
--- a/target/sparc/insns.decode
+++ b/target/sparc/insns.decode
@@ -321,12 +321,12 @@ FdMULq  10 . 110100 . 0 0110 1110 .   
 @q_d_d
 FNHADDs 10 . 110100 . 0 0111 0001 .@r_r_r
 FNHADDd 10 . 110100 . 0 0111 0010 .@d_d_d
 FNsMULd 10 . 110100 . 0 0111 1001 .@d_r_r
-FsTOx   10 . 110100 0 0 1000 0001 .@r_r2
-FdTOx   10 . 110100 0 0 1000 0010 .@r_d2
-FqTOx   10 . 110100 0 0 1000 0011 .@r_q2
-FxTOs   10 . 110100 0 0 1000 0100 .@r_r2
-FxTOd   10 . 110100 0 0 1000 1000 .@d_r2
-FxTOq   10 . 110100 0 0 1000 1100 .@q_r2
+FsTOx   10 . 110100 0 0 1000 0001 .@d_r2
+FdTOx   10 . 110100 0 0 1000 0010 .@d_d2
+FqTOx   10 . 110100 0 0 1000 0011 .@d_q2
+FxTOs   10 . 110100 0 0 1000 0100 .@r_d2
+FxTOd   10 . 110100 0 0 1000 1000 .@d_d2
+FxTOq   10 . 110100 0 0 1000 1100 .@q_d2
 FiTOs   10 . 110100 0 0 1100 0100 .@r_r2
 FdTOs   10 . 110100 0 0 1100 0110 .@r_d2
 FqTOs   10 . 110100 0 0 1100 0111 .@r_q2
-- 
2.39.5

[Stable-9.2.2 02/14] block: Fix leak in send_qmp_error_event

2025-02-21 Thread Michael Tokarev

From: Fabiano Rosas 

ASAN detected a leak when running the ahci-test
/ahci/io/dma/lba28/retry:

Direct leak of 35 byte(s) in 1 object(s) allocated from:
#0 in malloc
#1 in __vasprintf_internal
#2 in vasprintf
#3 in g_vasprintf
#4 in g_strdup_vprintf
#5 in g_strdup_printf
#6 in object_get_canonical_path ../qom/object.c:2096:19
#7 in blk_get_attached_dev_id_or_path ../block/block-backend.c:1033:12
#8 in blk_get_attached_dev_path ../block/block-backend.c:1047:12
#9 in send_qmp_error_event ../block/block-backend.c:2140:36
#10 in blk_error_action ../block/block-backend.c:2172:9
#11 in ide_handle_rw_error ../hw/ide/core.c:875:5
#12 in ide_dma_cb ../hw/ide/core.c:894:13
#13 in dma_complete ../system/dma-helpers.c:107:9
#14 in dma_blk_cb ../system/dma-helpers.c:129:9
#15 in blk_aio_complete ../block/block-backend.c:1552:9
#16 in blk_aio_write_entry ../block/block-backend.c:1619:5
#17 in coroutine_trampoline ../util/coroutine-ucontext.c:175:9

Plug the leak by freeing the device path string.

Signed-off-by: Fabiano Rosas 
Reviewed-by: Philippe Mathieu-Daudé 
Message-ID: <2024145214.8261-1-faro...@suse.de>
[PMD: Use g_autofree]
Signed-off-by: Philippe Mathieu-Daudé 
Message-ID: <2024170333.43833-3-phi...@linaro.org>
Signed-off-by: Kevin Wolf 
(cherry picked from commit 23ea425c14d3b89a002e0127b17456eee3102ab7)
Signed-off-by: Michael Tokarev 

diff --git a/block/block-backend.c b/block/block-backend.c
index 85bcdedcef..0c28091ef1 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -2134,10 +2134,10 @@ static void send_qmp_error_event(BlockBackend *blk,
 {
 IoOperationType optype;
 BlockDriverState *bs = blk_bs(blk);
+g_autofree char *path = blk_get_attached_dev_path(blk);
 
 optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
-qapi_event_send_block_io_error(blk_name(blk),
-   blk_get_attached_dev_path(blk),
+qapi_event_send_block_io_error(blk_name(blk), path,
bs ? bdrv_get_node_name(bs) : NULL, optype,
action, blk_iostatus_is_enabled(blk),
error == ENOSPC, strerror(error));
-- 
2.39.5

[Stable-9.2.2 11/14] elfload: Fix alignment when unmapping excess reservation

2025-02-21 Thread Michael Tokarev

From: Fabiano Rosas 

When complying with the alignment requested in the ELF and unmapping
the excess reservation, having align_end not aligned to the guest page
causes the unmap to be rejected by the alignment check at
target_munmap and later brk adjustments hit an EEXIST.

Fix by aligning the start of region to be unmapped.

Fixes: c81d1fafa6 ("linux-user: Honor elf alignment when placing images")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1913
Signed-off-by: Fabiano Rosas 
[rth: Align load_end as well.]
Signed-off-by: Richard Henderson 
Message-ID: <20250213143558.10504-1-faro...@suse.de>
(cherry picked from commit 4b7b20a3b72c5000ea71bef505c16e6e628268b6)
Signed-off-by: Michael Tokarev 

diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 471a384b22..aa3607f3ac 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -3349,8 +3349,8 @@ static void load_elf_image(const char *image_name, const 
ImageSource *src,
 
 if (align_size != reserve_size) {
 abi_ulong align_addr = ROUND_UP(load_addr, align);
-abi_ulong align_end = align_addr + reserve_size;
-abi_ulong load_end = load_addr + align_size;
+abi_ulong align_end = TARGET_PAGE_ALIGN(align_addr + reserve_size);
+abi_ulong load_end = TARGET_PAGE_ALIGN(load_addr + align_size);
 
 if (align_addr != load_addr) {
 target_munmap(load_addr, align_addr - load_addr);
-- 
2.39.5

[Stable-9.2.2 03/14] block-backend: Fix argument order when calling 'qapi_event_send_block_io_error()'

2025-02-21 Thread Michael Tokarev

From: Peter Krempa 

Commit 7452162adec25c10 introduced 'qom-path' argument to BLOCK_IO_ERROR
event but when the event is instantiated in 'send_qmp_error_event()' the
arguments for 'device' and 'qom_path' in
qapi_event_send_block_io_error() were reversed :

Generated code for sending event:

  void qapi_event_send_block_io_error(const char *qom_path,
  const char *device,
  const char *node_name,
  IoOperationType operation,
  [...]

Call inside send_qmp_error_event():

 qapi_event_send_block_io_error(blk_name(blk),
blk_get_attached_dev_path(blk),
bs ? bdrv_get_node_name(bs) : NULL, optype,
[...]

This results into reporting the QOM path as the device alias and vice
versa which in turn breaks libvirt, which expects the device alias being
either a valid alias or empty (which would make libvirt do the lookup by
node-name instead).

Cc: qemu-sta...@nongnu.org
Fixes: 7452162adec2 ("qapi: add qom-path to BLOCK_IO_ERROR event")
Signed-off-by: Peter Krempa 
Message-ID: 
<09728d784888b38d7a8f09ee5e9e9c542c875e1e.1737973614.git.pkre...@redhat.com>
Reviewed-by: Daniel P. Berrangé 
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
(cherry picked from commit 107c551de0d7bc3aa8e926c557b66b9549616f42)
Signed-off-by: Michael Tokarev 

diff --git a/block/block-backend.c b/block/block-backend.c
index 0c28091ef1..61b935e3c4 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -2137,7 +2137,7 @@ static void send_qmp_error_event(BlockBackend *blk,
 g_autofree char *path = blk_get_attached_dev_path(blk);
 
 optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
-qapi_event_send_block_io_error(blk_name(blk), path,
+qapi_event_send_block_io_error(path, blk_name(blk),
bs ? bdrv_get_node_name(bs) : NULL, optype,
action, blk_iostatus_is_enabled(blk),
error == ENOSPC, strerror(error));
-- 
2.39.5

[PATCH 06/15] rust: cell: add wrapper for FFI types

2025-02-21 Thread Paolo Bonzini

Inspired by the same-named type in Linux.  This type provides the compiler
with a correct view of what goes on with FFI types.  In addition, it
separates the glue code from the bindgen-generated code, allowing
traits such as Send, Sync or Zeroable to be specified independently
for C and Rust structs.

Signed-off-by: Paolo Bonzini 
---
 docs/devel/rust.rst   |  34 +--
 rust/qemu-api/src/cell.rs | 191 --
 2 files changed, 210 insertions(+), 15 deletions(-)

diff --git a/docs/devel/rust.rst b/docs/devel/rust.rst
index e3f9e16aacb..9a621648e72 100644
--- a/docs/devel/rust.rst
+++ b/docs/devel/rust.rst
@@ -295,15 +295,33 @@ of ``&mut self``; access to internal fields must use 
*interior mutability*
 to go from a shared reference to a ``&mut``.
 
 Whenever C code provides you with an opaque ``void *``, avoid converting it
-to a Rust mutable reference, and use a shared reference instead.  Rust code
-will then have to use QEMU's ``BqlRefCell`` and ``BqlCell`` type, which
-enforce that locking rules for the "Big QEMU Lock" are respected.  These cell
-types are also known to the ``vmstate`` crate, which is able to "look inside"
-them when building an in-memory representation of a ``struct``'s layout.
-Note that the same is not true of a ``RefCell`` or ``Mutex``.
+to a Rust mutable reference, and use a shared reference instead.  The
+``qemu_api::cell`` module provides wrappers that can be used to tell the
+Rust compiler about interior mutability, and optionally to enforce locking
+rules for the "Big QEMU Lock".  In the future, similar cell types might
+also be provided for ``AioContext``-based locking as well.
 
-In the future, similar cell types might also be provided for 
``AioContext``-based
-locking as well.
+In particular, device code will usually rely on the ``BqlRefCell`` and
+``BqlCell`` type to ensure that data is accessed correctly under the
+"Big QEMU Lock".  These cell types are also known to the ``vmstate``
+crate, which is able to "look inside" them when building an in-memory
+representation of a ``struct``'s layout.  Note that the same is not true
+of a ``RefCell`` or ``Mutex``.
+
+Bindings code instead will usually use the ``Opaque`` type, which hides
+the contents of the underlying struct and can be easily converted to
+a raw pointer, for use in calls to C functions.  It can be used for
+example as follows::
+
+#[repr(transparent)]
+#[derive(Debug)]
+pub struct Object(Opaque);
+
+The bindings will then manually check for the big QEMU lock with
+assertions, which allows the wrapper to be declared thread-safe::
+
+unsafe impl Send for Object {}
+unsafe impl Sync for Object {}
 
 Writing bindings to C code
 ''
diff --git a/rust/qemu-api/src/cell.rs b/rust/qemu-api/src/cell.rs
index eae4e2ce786..84b9eb07467 100644
--- a/rust/qemu-api/src/cell.rs
+++ b/rust/qemu-api/src/cell.rs
@@ -27,7 +27,7 @@
 // IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 // DEALINGS IN THE SOFTWARE.
 
-//! BQL-protected mutable containers.
+//! QEMU-specific mutable containers
 //!
 //! Rust memory safety is based on this rule: Given an object `T`, it is only
 //! possible to have one of the following:
@@ -43,8 +43,10 @@
 //! usually have their pointer shared with the "outside world very early in
 //! their lifetime", for example when they create their
 //! [`MemoryRegion`s](crate::bindings::MemoryRegion).  Therefore, individual
-//! parts of a  device must be made mutable in a controlled manner through the
-//! use of cell types.
+//! parts of a  device must be made mutable in a controlled manner; this module
+//! provides the tools to do so.
+//!
+//! ## Cell types
 //!
 //! [`BqlCell`] and [`BqlRefCell`] allow doing this via the Big QEMU 
Lock.
 //! While they are essentially the same single-threaded primitives that are
@@ -71,7 +73,7 @@
 //! QEMU device implementations is usually incorrect and can lead to
 //! thread-safety issues.
 //!
-//! ## `BqlCell`
+//! ### `BqlCell`
 //!
 //! [`BqlCell`] implements interior mutability by moving values in and out 
of
 //! the cell. That is, an `&mut T` to the inner value can never be obtained as
@@ -91,7 +93,7 @@
 //!- [`set`](BqlCell::set): this method replaces the interior value,
 //!  dropping the replaced value.
 //!
-//! ## `BqlRefCell`
+//! ### `BqlRefCell`
 //!
 //! [`BqlRefCell`] uses Rust's lifetimes to implement "dynamic borrowing", a
 //! process whereby one can claim temporary, exclusive, mutable access to the
@@ -111,13 +113,82 @@
 //! Multiple immutable borrows are allowed via [`borrow`](BqlRefCell::borrow),
 //! or a single mutable borrow via [`borrow_mut`](BqlRefCell::borrow_mut).  The
 //! thread will panic if these rules are violated or if the BQL is not held.
+//!
+//! ## Opaque wrappers
+//!
+//! The cell types from the previous section are useful at the boundaries
+//! of code that requires interior mutability.  When writing glue code that
+//! interacts directly wit

[PATCH 12/15] rust: sysbus: wrap SysBusDevice with Opaque<>

2025-02-21 Thread Paolo Bonzini

Signed-off-by: Paolo Bonzini 
---
 rust/hw/timer/hpet/src/hpet.rs |  2 +-
 rust/qemu-api/src/bindings.rs  |  3 ---
 rust/qemu-api/src/sysbus.rs| 25 ++---
 3 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/rust/hw/timer/hpet/src/hpet.rs b/rust/hw/timer/hpet/src/hpet.rs
index be27eb0eff4..19e63465cff 100644
--- a/rust/hw/timer/hpet/src/hpet.rs
+++ b/rust/hw/timer/hpet/src/hpet.rs
@@ -741,7 +741,7 @@ fn reset_hold(&self, _type: ResetType) {
 HPETFwConfig::update_hpet_cfg(
 self.hpet_id.get(),
 self.capability.get() as u32,
-sbd.mmio[0].addr,
+unsafe { *sbd.as_ptr() }.mmio[0].addr,
 );
 
 // to document that the RTC lowers its output on reset as well
diff --git a/rust/qemu-api/src/bindings.rs b/rust/qemu-api/src/bindings.rs
index 6e70a75a0e6..b791ca6d87f 100644
--- a/rust/qemu-api/src/bindings.rs
+++ b/rust/qemu-api/src/bindings.rs
@@ -40,9 +40,6 @@ unsafe impl Sync for MemoryRegion {}
 unsafe impl Send for ObjectClass {}
 unsafe impl Sync for ObjectClass {}
 
-unsafe impl Send for SysBusDevice {}
-unsafe impl Sync for SysBusDevice {}
-
 // SAFETY: this is a pure data struct
 unsafe impl Send for CoalescedMemoryRange {}
 unsafe impl Sync for CoalescedMemoryRange {}
diff --git a/rust/qemu-api/src/sysbus.rs b/rust/qemu-api/src/sysbus.rs
index 48803a655f9..78909fb9931 100644
--- a/rust/qemu-api/src/sysbus.rs
+++ b/rust/qemu-api/src/sysbus.rs
@@ -6,11 +6,11 @@
 
 use std::{ffi::CStr, ptr::addr_of_mut};
 
-pub use bindings::{SysBusDevice, SysBusDeviceClass};
+pub use bindings::SysBusDeviceClass;
 
 use crate::{
 bindings,
-cell::bql_locked,
+cell::{bql_locked, Opaque},
 irq::{IRQState, InterruptSource},
 memory::MemoryRegion,
 prelude::*,
@@ -18,6 +18,14 @@
 qom::Owned,
 };
 
+/// A safe wrapper around [`bindings::SysBusDevice`].
+#[repr(transparent)]
+#[derive(Debug, qemu_api_macros::Wrapper)]
+pub struct SysBusDevice(Opaque);
+
+unsafe impl Send for SysBusDevice {}
+unsafe impl Sync for SysBusDevice {}
+
 unsafe impl ObjectType for SysBusDevice {
 type Class = SysBusDeviceClass;
 const TYPE_NAME: &'static CStr =
@@ -49,7 +57,7 @@ pub trait SysBusDeviceMethods: ObjectDeref
 fn init_mmio(&self, iomem: &MemoryRegion) {
 assert!(bql_locked());
 unsafe {
-bindings::sysbus_init_mmio(self.as_mut_ptr(), iomem.as_mut_ptr());
+bindings::sysbus_init_mmio(self.upcast().as_mut_ptr(), 
iomem.as_mut_ptr());
 }
 }
 
@@ -60,7 +68,7 @@ fn init_mmio(&self, iomem: &MemoryRegion) {
 fn init_irq(&self, irq: &InterruptSource) {
 assert!(bql_locked());
 unsafe {
-bindings::sysbus_init_irq(self.as_mut_ptr(), irq.as_ptr());
+bindings::sysbus_init_irq(self.upcast().as_mut_ptr(), 
irq.as_ptr());
 }
 }
 
@@ -69,7 +77,7 @@ fn mmio_map(&self, id: u32, addr: u64) {
 assert!(bql_locked());
 let id: i32 = id.try_into().unwrap();
 unsafe {
-bindings::sysbus_mmio_map(self.as_mut_ptr(), id, addr);
+bindings::sysbus_mmio_map(self.upcast().as_mut_ptr(), id, addr);
 }
 }
 
@@ -81,7 +89,7 @@ fn connect_irq(&self, id: u32, irq: &Owned) {
 let id: i32 = id.try_into().unwrap();
 let irq: &IRQState = irq;
 unsafe {
-bindings::sysbus_connect_irq(self.as_mut_ptr(), id, 
irq.as_mut_ptr());
+bindings::sysbus_connect_irq(self.upcast().as_mut_ptr(), id, 
irq.as_mut_ptr());
 }
 }
 
@@ -89,7 +97,10 @@ fn sysbus_realize(&self) {
 // TODO: return an Error
 assert!(bql_locked());
 unsafe {
-bindings::sysbus_realize(self.as_mut_ptr(), 
addr_of_mut!(bindings::error_fatal));
+bindings::sysbus_realize(
+self.upcast().as_mut_ptr(),
+addr_of_mut!(bindings::error_fatal),
+);
 }
 }
 }
-- 
2.48.1

[PATCH 10/15] rust: qom: wrap Object with Opaque<>

2025-02-21 Thread Paolo Bonzini

Signed-off-by: Paolo Bonzini 
---
 rust/qemu-api/src/bindings.rs |  3 ---
 rust/qemu-api/src/memory.rs   |  2 +-
 rust/qemu-api/src/qdev.rs |  6 +++---
 rust/qemu-api/src/qom.rs  | 35 ++-
 4 files changed, 26 insertions(+), 20 deletions(-)

diff --git a/rust/qemu-api/src/bindings.rs b/rust/qemu-api/src/bindings.rs
index d2868639ff6..be6dd68c09c 100644
--- a/rust/qemu-api/src/bindings.rs
+++ b/rust/qemu-api/src/bindings.rs
@@ -46,9 +46,6 @@ unsafe impl Sync for MemoryRegion {}
 unsafe impl Send for ObjectClass {}
 unsafe impl Sync for ObjectClass {}
 
-unsafe impl Send for Object {}
-unsafe impl Sync for Object {}
-
 unsafe impl Send for SysBusDevice {}
 unsafe impl Sync for SysBusDevice {}
 
diff --git a/rust/qemu-api/src/memory.rs b/rust/qemu-api/src/memory.rs
index 682951ab44e..713c494ca2e 100644
--- a/rust/qemu-api/src/memory.rs
+++ b/rust/qemu-api/src/memory.rs
@@ -157,7 +157,7 @@ unsafe fn do_init_io(
 let cstr = CString::new(name).unwrap();
 memory_region_init_io(
 slot,
-owner.cast::(),
+owner.cast::(),
 ops,
 owner.cast::(),
 cstr.as_ptr(),
diff --git a/rust/qemu-api/src/qdev.rs b/rust/qemu-api/src/qdev.rs
index c136457090c..1a4d1f38762 100644
--- a/rust/qemu-api/src/qdev.rs
+++ b/rust/qemu-api/src/qdev.rs
@@ -52,7 +52,7 @@ pub trait ResettablePhasesImpl {
 /// can be downcasted to type `T`. We also expect the device is
 /// readable/writeable from one thread at any time.
 unsafe extern "C" fn rust_resettable_enter_fn(
-obj: *mut Object,
+obj: *mut bindings::Object,
 typ: ResetType,
 ) {
 let state = NonNull::new(obj).unwrap().cast::();
@@ -65,7 +65,7 @@ pub trait ResettablePhasesImpl {
 /// can be downcasted to type `T`. We also expect the device is
 /// readable/writeable from one thread at any time.
 unsafe extern "C" fn rust_resettable_hold_fn(
-obj: *mut Object,
+obj: *mut bindings::Object,
 typ: ResetType,
 ) {
 let state = NonNull::new(obj).unwrap().cast::();
@@ -78,7 +78,7 @@ pub trait ResettablePhasesImpl {
 /// can be downcasted to type `T`. We also expect the device is
 /// readable/writeable from one thread at any time.
 unsafe extern "C" fn rust_resettable_exit_fn(
-obj: *mut Object,
+obj: *mut bindings::Object,
 typ: ResetType,
 ) {
 let state = NonNull::new(obj).unwrap().cast::();
diff --git a/rust/qemu-api/src/qom.rs b/rust/qemu-api/src/qom.rs
index 5488643a2fd..0bca36336ba 100644
--- a/rust/qemu-api/src/qom.rs
+++ b/rust/qemu-api/src/qom.rs
@@ -101,16 +101,24 @@
 ptr::NonNull,
 };
 
-pub use bindings::{Object, ObjectClass};
+pub use bindings::ObjectClass;
 
 use crate::{
 bindings::{
 self, object_class_dynamic_cast, object_dynamic_cast, object_get_class,
 object_get_typename, object_new, object_ref, object_unref, TypeInfo,
 },
-cell::bql_locked,
+cell::{bql_locked, Opaque},
 };
 
+/// A safe wrapper around [`bindings::Object`].
+#[repr(transparent)]
+#[derive(Debug, qemu_api_macros::Wrapper)]
+pub struct Object(Opaque);
+
+unsafe impl Send for Object {}
+unsafe impl Sync for Object {}
+
 /// Marker trait: `Self` can be statically upcasted to `P` (i.e. `P` is a 
direct
 /// or indirect parent of `Self`).
 ///
@@ -199,7 +207,7 @@ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), 
fmt::Error> {
 }
 }
 
-unsafe extern "C" fn rust_instance_init(obj: *mut Object) {
+unsafe extern "C" fn rust_instance_init(obj: *mut 
bindings::Object) {
 let mut state = NonNull::new(obj).unwrap().cast::();
 // SAFETY: obj is an instance of T, since rust_instance_init
 // is called from QOM core as the instance_init function
@@ -209,7 +217,7 @@ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), 
fmt::Error> {
 }
 }
 
-unsafe extern "C" fn rust_instance_post_init(obj: *mut Object) {
+unsafe extern "C" fn rust_instance_post_init(obj: *mut 
bindings::Object) {
 let state = NonNull::new(obj).unwrap().cast::();
 // SAFETY: obj is an instance of T, since rust_instance_post_init
 // is called from QOM core as the instance_post_init function
@@ -230,7 +238,7 @@ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), 
fmt::Error> {
 ::CLASS_INIT(unsafe { klass.as_mut() })
 }
 
-unsafe extern "C" fn drop_object(obj: *mut Object) {
+unsafe extern "C" fn drop_object(obj: *mut bindings::Object) {
 // SAFETY: obj is an instance of T, since drop_object is called
 // from the QOM core function object_deinit() as the instance_finalize
 // function for class T.  Note that while object_deinit() will drop the
@@ -280,14 +288,14 @@ pub unsafe trait ObjectType: Sized {
 /// Return the receiver as an Object.  This is always safe, even
 /// if this type represents an interface.
 fn as_object(&self) -> &Object {
-unsafe { &*self.as_object_ptr() }
+unsafe { &*self.as_ptr().cast() }
 }
 
 /// Return

[PATCH 14/15] rust: chardev: wrap Chardev with Opaque<>

2025-02-21 Thread Paolo Bonzini

Signed-off-by: Paolo Bonzini 
---
 rust/qemu-api/src/bindings.rs | 3 ---
 rust/qemu-api/src/chardev.rs  | 8 ++--
 rust/qemu-api/src/qdev.rs | 1 +
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/rust/qemu-api/src/bindings.rs b/rust/qemu-api/src/bindings.rs
index 26cc8de0cf2..c3f36108bd5 100644
--- a/rust/qemu-api/src/bindings.rs
+++ b/rust/qemu-api/src/bindings.rs
@@ -31,9 +31,6 @@ unsafe impl Sync for BusState {}
 unsafe impl Send for CharBackend {}
 unsafe impl Sync for CharBackend {}
 
-unsafe impl Send for Chardev {}
-unsafe impl Sync for Chardev {}
-
 unsafe impl Send for ObjectClass {}
 unsafe impl Sync for ObjectClass {}
 
diff --git a/rust/qemu-api/src/chardev.rs b/rust/qemu-api/src/chardev.rs
index 74cfb634e5f..a35b9217e90 100644
--- a/rust/qemu-api/src/chardev.rs
+++ b/rust/qemu-api/src/chardev.rs
@@ -6,9 +6,13 @@
 
 use std::ffi::CStr;
 
-use crate::{bindings, prelude::*};
+use crate::{bindings, cell::Opaque, prelude::*};
+
+/// A safe wrapper around [`bindings::Chardev`].
+#[repr(transparent)]
+#[derive(qemu_api_macros::Wrapper)]
+pub struct Chardev(Opaque);
 
-pub type Chardev = bindings::Chardev;
 pub type ChardevClass = bindings::ChardevClass;
 
 unsafe impl ObjectType for Chardev {
diff --git a/rust/qemu-api/src/qdev.rs b/rust/qemu-api/src/qdev.rs
index ed5dce08216..1ff6c1ca7c2 100644
--- a/rust/qemu-api/src/qdev.rs
+++ b/rust/qemu-api/src/qdev.rs
@@ -334,6 +334,7 @@ fn init_clock_out(&self, name: &str) -> Owned {
 fn prop_set_chr(&self, propname: &str, chr: &Owned) {
 assert!(bql_locked());
 let c_propname = CString::new(propname).unwrap();
+let chr: &Chardev = chr;
 unsafe {
 bindings::qdev_prop_set_chr(
 self.upcast().as_mut_ptr(),
-- 
2.48.1

[Stable-9.2.2 14/14] net/slirp: libslirp 4.9.0 compatibility

2025-02-21 Thread Michael Tokarev

Update the code in net/slirp.c to be compatible with
libslirp 4.9.0, which deprecated slirp_pollfds_fill()
and started using slirp_os_socket type for sockets
(which is a 64-bit integer on win64) for all callbacks
starting with version 6 of the interface.

Signed-off-by: Michael Tokarev 
Reviewed-by: Samuel Thibault 
Message-ID: <20250130123253.864681-1-...@tls.msk.ru>
[thuth: Added some spaces to make checkpatch.pl happy]
Signed-off-by: Thomas Huth 
(cherry picked from commit f141caa270af536b4d5b7c8540820f1bdd245d71)
Signed-off-by: Michael Tokarev 

diff --git a/net/slirp.c b/net/slirp.c
index eb9a456ed4..102bec7b57 100644
--- a/net/slirp.c
+++ b/net/slirp.c
@@ -247,7 +247,14 @@ static void net_slirp_timer_mod(void *timer, int64_t 
expire_timer,
 timer_mod(&t->timer, expire_timer);
 }
 
-static void net_slirp_register_poll_fd(int fd, void *opaque)
+#if !SLIRP_CHECK_VERSION(4, 9, 0)
+# define slirp_os_socket int
+# define slirp_pollfds_fill_socket slirp_pollfds_fill
+# define register_poll_socket register_poll_fd
+# define unregister_poll_socket unregister_poll_fd
+#endif
+
+static void net_slirp_register_poll_sock(slirp_os_socket fd, void *opaque)
 {
 #ifdef WIN32
 AioContext *ctxt = qemu_get_aio_context();
@@ -260,7 +267,7 @@ static void net_slirp_register_poll_fd(int fd, void *opaque)
 #endif
 }
 
-static void net_slirp_unregister_poll_fd(int fd, void *opaque)
+static void net_slirp_unregister_poll_sock(slirp_os_socket fd, void *opaque)
 {
 #ifdef WIN32
 if (WSAEventSelect(fd, NULL, 0) != 0) {
@@ -286,8 +293,8 @@ static const SlirpCb slirp_cb = {
 #endif
 .timer_free = net_slirp_timer_free,
 .timer_mod = net_slirp_timer_mod,
-.register_poll_fd = net_slirp_register_poll_fd,
-.unregister_poll_fd = net_slirp_unregister_poll_fd,
+.register_poll_socket = net_slirp_register_poll_sock,
+.unregister_poll_socket = net_slirp_unregister_poll_sock,
 .notify = net_slirp_notify,
 };
 
@@ -314,7 +321,7 @@ static int slirp_poll_to_gio(int events)
 return ret;
 }
 
-static int net_slirp_add_poll(int fd, int events, void *opaque)
+static int net_slirp_add_poll(slirp_os_socket fd, int events, void *opaque)
 {
 GArray *pollfds = opaque;
 GPollFD pfd = {
@@ -363,8 +370,8 @@ static void net_slirp_poll_notify(Notifier *notifier, void 
*data)
 
 switch (poll->state) {
 case MAIN_LOOP_POLL_FILL:
-slirp_pollfds_fill(s->slirp, &poll->timeout,
-   net_slirp_add_poll, poll->pollfds);
+slirp_pollfds_fill_socket(s->slirp, &poll->timeout,
+  net_slirp_add_poll, poll->pollfds);
 break;
 case MAIN_LOOP_POLL_OK:
 case MAIN_LOOP_POLL_ERR:
@@ -629,7 +636,9 @@ static int net_slirp_init(NetClientState *peer, const char 
*model,
 
 s = DO_UPCAST(SlirpState, nc, nc);
 
-cfg.version = SLIRP_CHECK_VERSION(4,7,0) ? 4 : 1;
+cfg.version =
+ SLIRP_CHECK_VERSION(4, 9, 0) ? 6 :
+ SLIRP_CHECK_VERSION(4, 7, 0) ? 4 : 1;
 cfg.restricted = restricted;
 cfg.in_enabled = ipv4;
 cfg.vnetwork = net;
-- 
2.39.5

Re: [PATCH v3 3/9] target/arm: Make CNTPS_* UNDEF from Secure EL1 when Secure EL2 is enabled

2025-02-21 Thread Alex Bennée

Peter Maydell  writes:

> When we added Secure EL2 support, we missed that this needs an update
> to the access code for the EL3 physical timer registers.  These are
> supposed to UNDEF from Secure EL1 when Secure EL2 is enabled.
>
> Cc: qemu-sta...@nongnu.org
> Signed-off-by: Peter Maydell 
> ---
>  target/arm/helper.c | 3 +++
>  1 file changed, 3 insertions(+)
>
> diff --git a/target/arm/helper.c b/target/arm/helper.c
> index ac8cb428925..7ec1e6cfaab 100644
> --- a/target/arm/helper.c
> +++ b/target/arm/helper.c
> @@ -2387,6 +2387,9 @@ static CPAccessResult gt_stimer_access(CPUARMState *env,
>  if (!arm_is_secure(env)) {
>  return CP_ACCESS_UNDEFINED;
>  }

Hmm this failed to apply as b4d3978c2f (target-arm: Add the AArch64 view
of the Secure physical timer) has the above as CP_ACCESS_TRAP. I guess
because I didn't apply 20250130182309.717346-1-peter.mayd...@linaro.org.
I guess this needs fixing up for stable.


> +if (arm_is_el2_enabled(env)) {
> +return CP_ACCESS_UNDEFINED;
> +}
>  if (!(env->cp15.scr_el3 & SCR_ST)) {
>  return CP_ACCESS_TRAP_EL3;
>  }


Anyway:

Reviewed-by: Alex Bennée 

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro

Re: [RFC PATCH v1 00/19] Factor out HVF's instruction emulator

2025-02-21 Thread Wei Liu

On Fri, Feb 21, 2025 at 05:36:39PM +0100, Paolo Bonzini wrote:
> On 2/21/25 09:36, Wei Liu wrote:
> > This patch series attempts to make the instruction emulator in HVF a common
> > component for the i386 target. It removes HVF specific code by either using 
> > a
> > set of hooks or moving it to better locations. The new incoming MSHV
> > accelerator will implement the hooks, and where necessary, enhance the 
> > emulator
> > and / or add new hooks.
> 
> Good!
> 
> > This patch series is in RFC state. The patches have been lightly tested by
> > running a Linux VM on an Intel-based Mac.  We hope to get some feedback on 
> > the
> > overall approach, and let the community bikeshed a bit about names and
> > location.
> 
> For the bikeshedding my only suggestion is to replace mmio_buf with
> emu_mmio_buf, and replace x86-insn-emul, with just "emulate" or something
> like that.  That is, no need to repeat x86 inside the target/i386 directory,
> especially since the filenames also start with x86.
> 

No problem. We can make the changes in the next version.

> > First two patches fix issues in the existing code. They can be applied
> > regardless of the discussion around the overall approach.
> 
> These four can also be applied:
> 
>  target/i386/hvf: use x86_segment in x86_decode.c
>  target/i386/hvf: move and rename {load, store}_regs
>  target/i386/hvf: move and rename simulate_{rdmsr, wrmsr}
>  target/i386/hvf: drop some dead code
> 
> > The checkpatch script complains about a few things. Some are from the 
> > original
> > code I didn't touch. For the code I changed or moved, it complains that some
> > lines are long (>80). Seeing that the rule was not followed strictly in the 
> > old
> > code base, I held off fixing that class of issues. The other thing it 
> > complains
> > is there is no entry for the new directory in MAINTAINERS. We can fix these
> > issues if they are deemed important.
> 
> Yes, no problem.  The new directory thing is just a warning but I think you
> could add a new entry with both MSHV and HVF people on it.
> 

Okay, that works, too.

> > Please let us know what you think. The alternative is to duplicate the
> > instruction emulator code in the mshv accelerator. That looks to be a worse
> > option.
> Yes, definitely.

Thank you for the feedback.

Wei.

[PATCH v2] vdpa: Allow vDPA to work on big-endian machine

2025-02-21 Thread Konstantin Shkolnyy

Add .set_vnet_le() function that always returns success, assuming that
vDPA h/w always implements LE data format. Otherwise, QEMU disables vDPA and
outputs the message:
"backend does not support LE vnet headers; falling back on userspace virtio"

Signed-off-by: Konstantin Shkolnyy 
---
Changes in V2: Add code comment.

 net/vhost-vdpa.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 231b45246c..6e7cec4d45 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -270,6 +270,18 @@ static bool vhost_vdpa_has_ufo(NetClientState *nc)
 
 }
 
+/*
+ * FIXME: vhost_vdpa doesn't have an API to "set h/w endianness". But it's
+ * reasonable to assume that h/w is LE by default, because LE is what
+ * virtio 1.0 and later ask for. So, this function just says "yes, the h/w is
+ * LE". Otherwise, on a BE machine, higher-level code would mistakely think
+ * the h/w is BE and can't support VDPA for a virtio 1.0 client.
+ */
+static int vhost_vdpa_set_vnet_le(NetClientState *nc, bool enable)
+{
+return 0;
+}
+
 static bool vhost_vdpa_check_peer_type(NetClientState *nc, ObjectClass *oc,
Error **errp)
 {
@@ -437,6 +449,7 @@ static NetClientInfo net_vhost_vdpa_info = {
 .cleanup = vhost_vdpa_cleanup,
 .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
 .has_ufo = vhost_vdpa_has_ufo,
+.set_vnet_le = vhost_vdpa_set_vnet_le,
 .check_peer_type = vhost_vdpa_check_peer_type,
 .set_steering_ebpf = vhost_vdpa_set_steering_ebpf,
 };
-- 
2.34.1

Re: [RFC PATCH] MAINTAINERS: remove widely sanctioned entities

2025-02-21 Thread Paolo Bonzini


On 2/21/25 17:14, Alex Bennée wrote:

The following organisations appear on the US sanctions list:

   Yadro: https://sanctionssearch.ofac.treas.gov/Details.aspx?id=41125
   ISPRAS: https://sanctionssearch.ofac.treas.gov/Details.aspx?id=50890

As a result maintainers interacting with such entities would face
legal risk in a number of jurisdictions. To reduce the risk of
inadvertent non-compliance remove entries from these organisations
from the MAINTAINERS file.

Mark the pcf8574 system as orphaned until someone volunteers to step
up as a maintainer. Add myself as a second reviewer to record/replay
so I can help with what odd fixes I can.


pcf8574 could actually be removed because it's unused in the boards we 
have; it could be added on the command line but its usefulness is 
doubtful without GPIO connections.


I'm sure everyone would rather avoid this but, given that neither person 
was particularly active, it doesn't change much to go for the safer option.


Acked-by: Paolo Bonzini 

Paolo


Signed-off-by: Alex Bennée 
---
  MAINTAINERS | 7 +++
  1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 3848d37a38..55b2ef219e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2523,8 +2523,7 @@ F: hw/i2c/i2c_mux_pca954x.c
  F: include/hw/i2c/i2c_mux_pca954x.h
  
  pcf8574

-M: Dmitrii Sharikhin 
-S: Maintained
+S: Orphaned
  F: hw/gpio/pcf8574.c
  F: include/gpio/pcf8574.h
  
@@ -3621,10 +3620,10 @@ F: net/filter-mirror.c

  F: tests/qtest/test-filter*
  
  Record/replay

-M: Pavel Dovgalyuk 
  R: Paolo Bonzini 
+R: Alex Bennée 
  W: https://wiki.qemu.org/Features/record-replay
-S: Supported
+S: Odd Fixes
  F: replay/*
  F: block/blkreplay.c
  F: net/filter-replay.c

[Stable-9.2.2 06/14] gitlab-ci.d/cirrus: Update the FreeBSD job to v14.2

2025-02-21 Thread Michael Tokarev

From: Thomas Huth 

The FreeBSD job started to fail since the 14-1 image disappeared
from the cloud. Update the job to v14.2 to fix it.

Message-ID: <20250211120817.35050-1-th...@redhat.com>
Reviewed-by: Daniel P. Berrangé 
Signed-off-by: Thomas Huth 
(cherry picked from commit 66a1b4991c32e370a4e0ddabf496aa1563aff286)
Signed-off-by: Michael Tokarev 

diff --git a/.gitlab-ci.d/cirrus.yml b/.gitlab-ci.d/cirrus.yml
index a9e43e21d0..c43380ea3d 100644
--- a/.gitlab-ci.d/cirrus.yml
+++ b/.gitlab-ci.d/cirrus.yml
@@ -52,7 +52,7 @@ x64-freebsd-14-build:
 NAME: freebsd-14
 CIRRUS_VM_INSTANCE_TYPE: freebsd_instance
 CIRRUS_VM_IMAGE_SELECTOR: image_family
-CIRRUS_VM_IMAGE_NAME: freebsd-14-1
+CIRRUS_VM_IMAGE_NAME: freebsd-14-2
 CIRRUS_VM_CPUS: 8
 CIRRUS_VM_RAM: 8G
 UPDATE_COMMAND: pkg update; pkg upgrade -y
-- 
2.39.5

[Stable-9.2.2 10/14] hw/net/smc91c111: Ignore attempt to pop from empty RX fifo

2025-02-21 Thread Michael Tokarev

From: Peter Maydell 

The SMC91C111 includes an MMU Command register which permits
the guest to remove entries from the RX FIFO. The datasheet
does not specify what happens if the guest tries to do this
when the FIFO is already empty; there are no status registers
containing error bits which might be applicable.

Currently we don't guard at all against pop of an empty
RX FIFO, with the result that we allow the guest to drive
the rx_fifo_len index to negative values, which will cause
smc91c111_receive() to write to the rx_fifo[] array out of
bounds when we receive the next packet.

Instead ignore attempts to pop an empty RX FIFO.

Cc: qemu-sta...@nongnu.org
Fixes: 80337b66a8e7 ("NIC emulation for qemu arm-softmmu")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2780
Signed-off-by: Peter Maydell 
Reviewed-by: Philippe Mathieu-Daudé 
Message-ID: <20250207151157.3151776-1-peter.mayd...@linaro.org>
Signed-off-by: Philippe Mathieu-Daudé 
(cherry picked from commit 937df81af6757638a7f1908747560dd342947213)
Signed-off-by: Michael Tokarev 

diff --git a/hw/net/smc91c111.c b/hw/net/smc91c111.c
index 180ba5c791..2a652885c9 100644
--- a/hw/net/smc91c111.c
+++ b/hw/net/smc91c111.c
@@ -182,6 +182,15 @@ static void smc91c111_pop_rx_fifo(smc91c111_state *s)
 {
 int i;
 
+if (s->rx_fifo_len == 0) {
+/*
+ * The datasheet doesn't document what the behaviour is if the
+ * guest tries to pop an empty RX FIFO, and there's no obvious
+ * error status register to report it. Just ignore the attempt.
+ */
+return;
+}
+
 s->rx_fifo_len--;
 if (s->rx_fifo_len) {
 for (i = 0; i < s->rx_fifo_len; i++)
-- 
2.39.5

[Stable-9.2.2 08/14] ui/sdl2: reenable the SDL2 Windows keyboard hook procedure

2025-02-21 Thread Michael Tokarev

From: Volker Rümelin 

Windows only:

The libSDL2 Windows message loop needs the libSDL2 Windows low
level keyboard hook procedure to grab the left and right Windows
keys correctly. Reenable the SDL2 Windows keyboard hook procedure.

Since SDL2 2.30.4 the SDL2 keyboard hook procedure also filters
out the special left Control key event for every Alt Gr key event
on keyboards with an international layout. This means the QEMU low
level keyboard hook procedure is no longer needed. Remove the QEMU
Windows keyboard hook procedure.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2139
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2323
Signed-off-by: Volker Rümelin 
Link: https://lore.kernel.org/r/20241231115950.6732-1-vr_q...@t-online.de
Signed-off-by: Paolo Bonzini 
(cherry picked from commit 4dafba778aa3e5f5fd3b2c6333afd7650dcf54e2)
Signed-off-by: Michael Tokarev 
(Mjt: context fix in ui/sdl2.c (includes))

diff --git a/ui/meson.build b/ui/meson.build
index 28c7381dd1..35fb04cadf 100644
--- a/ui/meson.build
+++ b/ui/meson.build
@@ -120,10 +120,6 @@ if gtk.found()
 endif
 
 if sdl.found()
-  if host_os == 'windows'
-system_ss.add(files('win32-kbd-hook.c'))
-  endif
-
   sdl_ss = ss.source_set()
   sdl_ss.add(sdl, sdl_image, pixman, glib, files(
 'sdl2-2d.c',
diff --git a/ui/sdl2.c b/ui/sdl2.c
index bd4f5a9da1..3d70eaebfa 100644
--- a/ui/sdl2.c
+++ b/ui/sdl2.c
@@ -32,7 +32,6 @@
 #include "sysemu/runstate.h"
 #include "sysemu/runstate-action.h"
 #include "sysemu/sysemu.h"
-#include "ui/win32-kbd-hook.h"
 #include "qemu/log.h"
 
 static int sdl2_num_outputs;
@@ -262,7 +261,6 @@ static void sdl_grab_start(struct sdl2_console *scon)
 }
 SDL_SetWindowGrab(scon->real_window, SDL_TRUE);
 gui_grab = 1;
-win32_kbd_set_grab(true);
 sdl_update_caption(scon);
 }
 
@@ -270,7 +268,6 @@ static void sdl_grab_end(struct sdl2_console *scon)
 {
 SDL_SetWindowGrab(scon->real_window, SDL_FALSE);
 gui_grab = 0;
-win32_kbd_set_grab(false);
 sdl_show_cursor(scon);
 sdl_update_caption(scon);
 }
@@ -371,19 +368,6 @@ static int get_mod_state(void)
 }
 }
 
-static void *sdl2_win32_get_hwnd(struct sdl2_console *scon)
-{
-#ifdef CONFIG_WIN32
-SDL_SysWMinfo info;
-
-SDL_VERSION(&info.version);
-if (SDL_GetWindowWMInfo(scon->real_window, &info)) {
-return info.info.win.window;
-}
-#endif
-return NULL;
-}
-
 static void handle_keydown(SDL_Event *ev)
 {
 int win;
@@ -608,10 +592,6 @@ static void handle_windowevent(SDL_Event *ev)
 sdl2_redraw(scon);
 break;
 case SDL_WINDOWEVENT_FOCUS_GAINED:
-win32_kbd_set_grab(gui_grab);
-if (qemu_console_is_graphic(scon->dcl.con)) {
-win32_kbd_set_window(sdl2_win32_get_hwnd(scon));
-}
 /* fall through */
 case SDL_WINDOWEVENT_ENTER:
 if (!gui_grab && (qemu_input_is_absolute(scon->dcl.con) || 
absolute_enabled)) {
@@ -627,9 +607,6 @@ static void handle_windowevent(SDL_Event *ev)
 scon->ignore_hotkeys = get_mod_state();
 break;
 case SDL_WINDOWEVENT_FOCUS_LOST:
-if (qemu_console_is_graphic(scon->dcl.con)) {
-win32_kbd_set_window(NULL);
-}
 if (gui_grab && !gui_fullscreen) {
 sdl_grab_end(scon);
 }
@@ -869,10 +846,7 @@ static void sdl2_display_init(DisplayState *ds, 
DisplayOptions *o)
 #ifdef SDL_HINT_VIDEO_X11_NET_WM_BYPASS_COMPOSITOR /* only available since SDL 
2.0.8 */
 SDL_SetHint(SDL_HINT_VIDEO_X11_NET_WM_BYPASS_COMPOSITOR, "0");
 #endif
-#ifndef CONFIG_WIN32
-/* QEMU uses its own low level keyboard hook procedure on Windows */
 SDL_SetHint(SDL_HINT_GRAB_KEYBOARD, "1");
-#endif
 #ifdef SDL_HINT_ALLOW_ALT_TAB_WHILE_GRABBED
 SDL_SetHint(SDL_HINT_ALLOW_ALT_TAB_WHILE_GRABBED, "0");
 #endif
-- 
2.39.5

[PATCH 07/15] rust: qemu_api_macros: add Wrapper derive macro

2025-02-21 Thread Paolo Bonzini

Add a derive macro that makes it easy to peel off all the layers of
specialness (UnsafeCell, MaybeUninit, etc.) and just get a pointer
to the wrapped type; and likewise add them back starting from a
*mut.

Signed-off-by: Paolo Bonzini 
---
 docs/devel/rust.rst |  8 ++--
 rust/qemu-api-macros/src/lib.rs | 82 -
 rust/qemu-api/meson.build   |  7 +--
 rust/qemu-api/src/cell.rs   | 31 +
 4 files changed, 119 insertions(+), 9 deletions(-)

diff --git a/docs/devel/rust.rst b/docs/devel/rust.rst
index 9a621648e72..db2b427ebd2 100644
--- a/docs/devel/rust.rst
+++ b/docs/devel/rust.rst
@@ -314,11 +314,13 @@ a raw pointer, for use in calls to C functions.  It can 
be used for
 example as follows::
 
 #[repr(transparent)]
-#[derive(Debug)]
+#[derive(Debug, qemu_api_macros::Wrapper)]
 pub struct Object(Opaque);
 
-The bindings will then manually check for the big QEMU lock with
-assertions, which allows the wrapper to be declared thread-safe::
+where the special ``derive`` macro provides useful methods such as
+``from_raw``, ``as_ptr`` and ``as_mut_ptr``.  The bindings will then
+manually check for the big QEMU lock with assertions, which allows
+the wrapper to be declared thread-safe::
 
 unsafe impl Send for Object {}
 unsafe impl Sync for Object {}
diff --git a/rust/qemu-api-macros/src/lib.rs b/rust/qemu-api-macros/src/lib.rs
index 7ec218202f4..781e5271562 100644
--- a/rust/qemu-api-macros/src/lib.rs
+++ b/rust/qemu-api-macros/src/lib.rs
@@ -6,7 +6,7 @@
 use quote::quote;
 use syn::{
 parse_macro_input, parse_quote, punctuated::Punctuated, spanned::Spanned, 
token::Comma, Data,
-DeriveInput, Field, Fields, Ident, Meta, Path, Token, Type, Variant, 
Visibility,
+DeriveInput, Field, Fields, FieldsUnnamed, Ident, Meta, Path, Token, Type, 
Variant, Visibility,
 };
 
 mod utils;
@@ -33,6 +33,35 @@ fn get_fields<'a>(
 }
 }
 
+fn get_unnamed_field<'a>(input: &'a DeriveInput, msg: &str) -> Result<&'a 
Field, MacroError> {
+if let Data::Struct(s) = &input.data {
+let unnamed = match &s.fields {
+Fields::Unnamed(FieldsUnnamed {
+unnamed: ref fields,
+..
+}) => fields,
+_ => {
+return Err(MacroError::Message(
+format!("Tuple struct required for {}", msg),
+s.fields.span(),
+))
+}
+};
+if unnamed.len() != 1 {
+return Err(MacroError::Message(
+format!("A single field is required for {}", msg),
+s.fields.span(),
+));
+}
+Ok(&unnamed[0])
+} else {
+Err(MacroError::Message(
+format!("Struct required for {}", msg),
+input.ident.span(),
+))
+}
+}
+
 fn is_c_repr(input: &DeriveInput, msg: &str) -> Result<(), MacroError> {
 let expected = parse_quote! { #[repr(C)] };
 
@@ -46,6 +75,19 @@ fn is_c_repr(input: &DeriveInput, msg: &str) -> Result<(), 
MacroError> {
 }
 }
 
+fn is_transparent_repr(input: &DeriveInput, msg: &str) -> Result<(), 
MacroError> {
+let expected = parse_quote! { #[repr(transparent)] };
+
+if input.attrs.iter().any(|attr| attr == &expected) {
+Ok(())
+} else {
+Err(MacroError::Message(
+format!("#[repr(transparent)] required for {}", msg),
+input.ident.span(),
+))
+}
+}
+
 fn derive_object_or_error(input: DeriveInput) -> 
Result {
 is_c_repr(&input, "#[derive(Object)]")?;
 
@@ -72,6 +114,44 @@ pub fn derive_object(input: TokenStream) -> TokenStream {
 TokenStream::from(expanded)
 }
 
+fn derive_opaque_or_error(input: DeriveInput) -> 
Result {
+is_transparent_repr(&input, "#[derive(Wrapper)]")?;
+
+let name = &input.ident;
+let field = &get_unnamed_field(&input, "#[derive(Wrapper)]")?;
+let typ = &field.ty;
+
+// TODO: how to add "::qemu_api"?  For now, this is only used in the
+// qemu_api crate so it's not a problem.
+Ok(quote! {
+unsafe impl crate::cell::Wrapper for #name {
+type Wrapped = <#typ as crate::cell::Wrapper>::Wrapped;
+}
+impl #name {
+pub unsafe fn from_raw<'a>(ptr: *mut ::Wrapped) -> &'a Self {
+let ptr = 
::std::ptr::NonNull::new(ptr).unwrap().cast::();
+unsafe { ptr.as_ref() }
+}
+
+pub const fn as_mut_ptr(&self) -> *mut ::Wrapped {
+self.0.as_mut_ptr()
+}
+
+pub const fn as_ptr(&self) -> *const ::Wrapped {
+self.0.as_ptr()
+}
+}
+})
+}
+
+#[proc_macro_derive(Wrapper)]
+pub fn derive_opaque(input: TokenStream) -> TokenStream {
+let input = parse_macro_input!(input as DeriveInput);
+let expanded = derive_opaque_or_error(input).unwrap_or_else(Into::into);
+
+TokenStream::from(expanded)
+}
+
 #[rustfmt::skip:

[PATCH 0/4] target/arm: Move vfp_helper.c TCG code into tcg/ subdir

2025-02-21 Thread Peter Maydell

target/arm/vfp_helper.c is almost entirely TCG-specific code,
guarded by #ifdef CONFIG_TCG. The only parts that aren't
TCG-specific are the handling of the FPSCR/FPSR/FPCR registers.
(I noticed this while I was working on the FEAT_AFP code, and
it felt a bit awkward but I left it as-is at the time.)

This series moves all the TCG code into tcg/vfp_helper.c.
Once only the FPSCR etc code is left, we rename the old
file to vfp_fpscr.c.

Series structure:
 * move the easy stuff as copy-n-paste to create new file
 * move the FPSCR get/set helpers
 * move the softfloat-specific parts of FPSR/FPCR handling
 * rename

This was just a quick last-thing-Friday tidyup, so I'm not
strongly attached to it if people don't think it's worth the
churn. I do think at least the first patch or some variant
on it is worth doing, though.

thanks
-- PMM

Peter Maydell (4):
  target/arm: Move TCG-only VFP code into tcg/ subdir
  target/arm: Move FPSCR get/set helpers to tcg/vfp_helper.c
  target/arm: Move softfloat specific FPCR/FPSR handling to tcg/
  target/arm: Rename vfp_helper.c to vfp_fpscr.c

 target/arm/internals.h|   9 ++
 target/arm/tcg-stubs.c|  22 
 target/arm/{ => tcg}/vfp_helper.c | 189 +++---
 target/arm/vfp_fpscr.c| 155 
 target/arm/meson.build|   2 +-
 target/arm/tcg/meson.build|   1 +
 6 files changed, 205 insertions(+), 173 deletions(-)
 rename target/arm/{ => tcg}/vfp_helper.c (90%)
 create mode 100644 target/arm/vfp_fpscr.c

-- 
2.43.0

[PATCH 1/4] target/arm: Move TCG-only VFP code into tcg/ subdir

2025-02-21 Thread Peter Maydell

Most of the target/arm/vfp_helper.c file is purely TCG helper code,
guarded by #ifdef CONFIG_TCG.  Move this into a new file in
target/arm/tcg/.

This leaves only the code relating to getting and setting the
FPCR/FPSR/FPSCR in the original file. (Some of this also is
TCG-only, but that needs more careful disentangling.)

Having two vfp_helper.c files might seem a bit confusing,
but once we've finished moving all the helper code out
of the old file we are going to rename it to vfp_fpscr.c.

Signed-off-by: Peter Maydell 
---
The diff might look a little confusing; git has opted
to show it as "copy vfp_helper.c to tcg/vfp_helper.c;
remove moved code from old file; remove not-moved
code from new file".
---
 target/arm/{ => tcg}/vfp_helper.c |  399 +--
 target/arm/vfp_helper.c   | 1109 -
 target/arm/tcg/meson.build|1 +
 3 files changed, 4 insertions(+), 1505 deletions(-)
 copy target/arm/{ => tcg}/vfp_helper.c (71%)

diff --git a/target/arm/vfp_helper.c b/target/arm/tcg/vfp_helper.c
similarity index 71%
copy from target/arm/vfp_helper.c
copy to target/arm/tcg/vfp_helper.c
index 5d424477a2d..aa580ff64c7 100644
--- a/target/arm/vfp_helper.c
+++ b/target/arm/tcg/vfp_helper.c
@@ -23,404 +23,13 @@
 #include "internals.h"
 #include "cpu-features.h"
 #include "fpu/softfloat.h"
-#ifdef CONFIG_TCG
 #include "qemu/log.h"
-#endif
-
-/* VFP support.  We follow the convention used for VFP instructions:
-   Single precision routines have a "s" suffix, double precision a
-   "d" suffix.  */
 
 /*
- * Set the float_status behaviour to match the Arm defaults:
- *  * tininess-before-rounding
- *  * 2-input NaN propagation prefers SNaN over QNaN, and then
- *operand A over operand B (see FPProcessNaNs() pseudocode)
- *  * 3-input NaN propagation prefers SNaN over QNaN, and then
- *operand C over A over B (see FPProcessNaNs3() pseudocode,
- *but note that for QEMU muladd is a * b + c, whereas for
- *the pseudocode function the arguments are in the order c, a, b.
- *  * 0 * Inf + NaN returns the default NaN if the input NaN is quiet,
- *and the input NaN if it is signalling
- *  * Default NaN has sign bit clear, msb frac bit set
+ * VFP support.  We follow the convention used for VFP instructions:
+ * Single precision routines have a "s" suffix, double precision a
+ * "d" suffix.
  */
-void arm_set_default_fp_behaviours(float_status *s)
-{
-set_float_detect_tininess(float_tininess_before_rounding, s);
-set_float_ftz_detection(float_ftz_before_rounding, s);
-set_float_2nan_prop_rule(float_2nan_prop_s_ab, s);
-set_float_3nan_prop_rule(float_3nan_prop_s_cab, s);
-set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, s);
-set_float_default_nan_pattern(0b0100, s);
-}
-
-/*
- * Set the float_status behaviour to match the FEAT_AFP
- * FPCR.AH=1 requirements:
- *  * tininess-after-rounding
- *  * 2-input NaN propagation prefers the first NaN
- *  * 3-input NaN propagation prefers a over b over c
- *  * 0 * Inf + NaN always returns the input NaN and doesn't
- *set Invalid for a QNaN
- *  * default NaN has sign bit set, msb frac bit set
- */
-void arm_set_ah_fp_behaviours(float_status *s)
-{
-set_float_detect_tininess(float_tininess_after_rounding, s);
-set_float_ftz_detection(float_ftz_after_rounding, s);
-set_float_2nan_prop_rule(float_2nan_prop_ab, s);
-set_float_3nan_prop_rule(float_3nan_prop_abc, s);
-set_float_infzeronan_rule(float_infzeronan_dnan_never |
-  float_infzeronan_suppress_invalid, s);
-set_float_default_nan_pattern(0b1100, s);
-}
-
-#ifdef CONFIG_TCG
-
-/* Convert host exception flags to vfp form.  */
-static inline uint32_t vfp_exceptbits_from_host(int host_bits, bool ah)
-{
-uint32_t target_bits = 0;
-
-if (host_bits & float_flag_invalid) {
-target_bits |= FPSR_IOC;
-}
-if (host_bits & float_flag_divbyzero) {
-target_bits |= FPSR_DZC;
-}
-if (host_bits & float_flag_overflow) {
-target_bits |= FPSR_OFC;
-}
-if (host_bits & (float_flag_underflow | 
float_flag_output_denormal_flushed)) {
-target_bits |= FPSR_UFC;
-}
-if (host_bits & float_flag_inexact) {
-target_bits |= FPSR_IXC;
-}
-if (host_bits & float_flag_input_denormal_flushed) {
-target_bits |= FPSR_IDC;
-}
-/*
- * With FPCR.AH, IDC is set when an input denormal is used,
- * and flushing an output denormal to zero sets both IXC and UFC.
- */
-if (ah && (host_bits & float_flag_input_denormal_used)) {
-target_bits |= FPSR_IDC;
-}
-if (ah && (host_bits & float_flag_output_denormal_flushed)) {
-target_bits |= FPSR_IXC;
-}
-return target_bits;
-}
-
-static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
-{
-uint32_t a32_flags = 0, a64_flags = 0;
-
-a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_A32]);
-a32_flags |= get_float_exce

[PATCH 2/4] target/arm: Move FPSCR get/set helpers to tcg/vfp_helper.c

2025-02-21 Thread Peter Maydell

Currently the helper_vfp_get_fpscr() and helper_vfp_set_fpscr()
functions do the actual work of updating the FPSCR, and we have
wrappers vfp_get_fpscr() and vfp_set_fpscr() which we use for calls
from other QEMU C code.

Flip these around so that it is vfp_get_fpscr() and vfp_set_fpscr()
which do the actual work, and helper_vfp_get_fpscr() and
helper_vfp_set_fpscr() which are the wrappers; this allows us to move
them to tcg/vfp_helper.c.

Since this is the last HELPER() we had in arm/vfp_helper.c, we can
drop the include of helper-proto.h.

Signed-off-by: Peter Maydell 
---
 target/arm/tcg/vfp_helper.c | 10 ++
 target/arm/vfp_helper.c | 15 ++-
 2 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/target/arm/tcg/vfp_helper.c b/target/arm/tcg/vfp_helper.c
index aa580ff64c7..cd6e0d0edab 100644
--- a/target/arm/tcg/vfp_helper.c
+++ b/target/arm/tcg/vfp_helper.c
@@ -1128,3 +1128,13 @@ void HELPER(check_hcr_el2_trap)(CPUARMState *env, 
uint32_t rt, uint32_t reg)
 
 raise_exception(env, EXCP_HYP_TRAP, syndrome, 2);
 }
+
+uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env)
+{
+return vfp_get_fpscr(env);
+}
+
+void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val)
+{
+vfp_set_fpscr(env, val);
+}
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
index 0e849d8d4dc..0919acb7b89 100644
--- a/target/arm/vfp_helper.c
+++ b/target/arm/vfp_helper.c
@@ -19,7 +19,6 @@
 
 #include "qemu/osdep.h"
 #include "cpu.h"
-#include "exec/helper-proto.h"
 #include "internals.h"
 #include "cpu-features.h"
 #include "fpu/softfloat.h"
@@ -298,17 +297,12 @@ uint32_t vfp_get_fpsr(CPUARMState *env)
 return fpsr;
 }
 
-uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env)
+uint32_t vfp_get_fpscr(CPUARMState *env)
 {
 return (vfp_get_fpcr(env) & FPSCR_FPCR_MASK) |
 (vfp_get_fpsr(env) & FPSCR_FPSR_MASK);
 }
 
-uint32_t vfp_get_fpscr(CPUARMState *env)
-{
-return HELPER(vfp_get_fpscr)(env);
-}
-
 void vfp_set_fpsr(CPUARMState *env, uint32_t val)
 {
 ARMCPU *cpu = env_archcpu(env);
@@ -402,13 +396,8 @@ void vfp_set_fpcr(CPUARMState *env, uint32_t val)
 vfp_set_fpcr_masked(env, val, MAKE_64BIT_MASK(0, 32));
 }
 
-void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val)
+void vfp_set_fpscr(CPUARMState *env, uint32_t val)
 {
 vfp_set_fpcr_masked(env, val, FPSCR_FPCR_MASK);
 vfp_set_fpsr(env, val & FPSCR_FPSR_MASK);
 }
-
-void vfp_set_fpscr(CPUARMState *env, uint32_t val)
-{
-HELPER(vfp_set_fpscr)(env, val);
-}
-- 
2.43.0

[PATCH 4/4] target/arm: Rename vfp_helper.c to vfp_fpscr.c

2025-02-21 Thread Peter Maydell

The vfp_helper.c in the target/arm directory now only has
code for handling FPSCR/FPCR/FPSR in it, and no helper
functions. Rename it to vfp_fpscr.c; this helps keep it
distinct from tcg/vfp_helper.c.

Signed-off-by: Peter Maydell 
---
 target/arm/{vfp_helper.c => vfp_fpscr.c} | 2 +-
 target/arm/meson.build   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
 rename target/arm/{vfp_helper.c => vfp_fpscr.c} (98%)

diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_fpscr.c
similarity index 98%
rename from target/arm/vfp_helper.c
rename to target/arm/vfp_fpscr.c
index cc0f055ef0d..92ea60ebbf2 100644
--- a/target/arm/vfp_helper.c
+++ b/target/arm/vfp_fpscr.c
@@ -1,5 +1,5 @@
 /*
- * ARM VFP floating-point operations
+ * ARM VFP floating-point: handling of FPSCR/FPCR/FPSR
  *
  *  Copyright (c) 2003 Fabrice Bellard
  *
diff --git a/target/arm/meson.build b/target/arm/meson.build
index 2e10464dbb6..3065081d241 100644
--- a/target/arm/meson.build
+++ b/target/arm/meson.build
@@ -4,7 +4,7 @@ arm_ss.add(files(
   'debug_helper.c',
   'gdbstub.c',
   'helper.c',
-  'vfp_helper.c',
+  'vfp_fpscr.c',
 ))
 arm_ss.add(zlib)
 
-- 
2.43.0

[PATCH v4 10/14] tests/acpi: virt: allow acpi table changes for a new table: HEST

2025-02-21 Thread Mauro Carvalho Chehab

The DSDT table will also be affected by such change.

Signed-off-by: Mauro Carvalho Chehab 
---
 tests/qtest/bios-tables-test-allowed-diff.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/qtest/bios-tables-test-allowed-diff.h 
b/tests/qtest/bios-tables-test-allowed-diff.h
index dfb8523c8bf4..1a4c2277bd5a 100644
--- a/tests/qtest/bios-tables-test-allowed-diff.h
+++ b/tests/qtest/bios-tables-test-allowed-diff.h
@@ -1 +1,2 @@
 /* List of comma-separated changed AML files to ignore */
+"tests/data/acpi/aarch64/virt/DSDT",
-- 
2.48.1

[PATCH v3 04/19] intel_iommu: Fill the PASID field when creating an IOMMUTLBEntry

2025-02-21 Thread CLEMENT MATHIEU--DRIF

From: Clement Mathieu--Drif 

PASID value must be used by devices as a key (or part of a key)
when populating their ATC with the IOTLB entries returned by the IOMMU.

Signed-off-by: Clement Mathieu--Drif 
---
 hw/i386/intel_iommu.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 1b4aaffedc..a360119fbe 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -2511,6 +2511,7 @@ static void 
vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
 .translated_addr = 0,
 .addr_mask = size - 1,
 .perm = IOMMU_NONE,
+.pasid = vtd_as->pasid,
 },
 };
 memory_region_notify_iommu(&vtd_as->iommu, 0, event);
@@ -3098,6 +3099,7 @@ static void do_invalidate_device_tlb(VTDAddressSpace 
*vtd_dev_as,
 event.entry.iova = addr;
 event.entry.perm = IOMMU_NONE;
 event.entry.translated_addr = 0;
+event.entry.pasid = vtd_dev_as->pasid;
 memory_region_notify_iommu(&vtd_dev_as->iommu, 0, event);
 }
 
@@ -3680,6 +3682,7 @@ static IOMMUTLBEntry 
vtd_iommu_translate(IOMMUMemoryRegion *iommu, hwaddr addr,
 IOMMUTLBEntry iotlb = {
 /* We'll fill in the rest later. */
 .target_as = &address_space_memory,
+.pasid = vtd_as->pasid,
 };
 bool success;
 
-- 
2.48.1

[PATCH v3 03/19] memory: Allow to store the PASID in IOMMUTLBEntry

2025-02-21 Thread CLEMENT MATHIEU--DRIF

From: Clement Mathieu--Drif 

This will be useful for devices that support ATS
and need to store entries in an ATC (device IOTLB).

Signed-off-by: Clement Mathieu--Drif 
---
 include/exec/memory.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/exec/memory.h b/include/exec/memory.h
index 29f5d31eef..eee625a9c6 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -145,6 +145,7 @@ struct IOMMUTLBEntry {
 hwaddr   translated_addr;
 hwaddr   addr_mask;  /* 0xfff = 4k translation */
 IOMMUAccessFlags perm;
+uint32_t pasid;
 };
 
 /*
-- 
2.48.1

[PATCH v3 13/19] atc: Generic ATC that can be used by PCIe devices that support SVM

2025-02-21 Thread CLEMENT MATHIEU--DRIF

From: Clement Mathieu--Drif 

As the SVM-capable devices will need to cache translations, we provide
an first implementation.

This cache uses a two-level design based on hash tables.
The first level is indexed by a PASID and the second by a virtual addresse.

Signed-off-by: Clement Mathieu--Drif 
---
 util/atc.c   | 211 +++
 util/atc.h   | 117 ++
 util/meson.build |   1 +
 3 files changed, 329 insertions(+)
 create mode 100644 util/atc.c
 create mode 100644 util/atc.h

diff --git a/util/atc.c b/util/atc.c
new file mode 100644
index 00..584ce045db
--- /dev/null
+++ b/util/atc.c
@@ -0,0 +1,211 @@
+/*
+ * QEMU emulation of an ATC
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see .
+ */
+
+#include "util/atc.h"
+
+
+#define PAGE_TABLE_ENTRY_SIZE 8
+
+/* a pasid is hashed using the identity function */
+static guint atc_pasid_key_hash(gconstpointer v)
+{
+return (guint)(uintptr_t)v; /* pasid */
+}
+
+/* pasid equality */
+static gboolean atc_pasid_key_equal(gconstpointer v1, gconstpointer v2)
+{
+return v1 == v2;
+}
+
+/* Hash function for IOTLB entries */
+static guint atc_addr_key_hash(gconstpointer v)
+{
+hwaddr addr = (hwaddr)v;
+return (guint)((addr >> 32) ^ (addr & 0xU));
+}
+
+/* Equality test for IOTLB entries */
+static gboolean atc_addr_key_equal(gconstpointer v1, gconstpointer v2)
+{
+return (hwaddr)v1 == (hwaddr)v2;
+}
+
+static void atc_address_space_free(void *as)
+{
+g_hash_table_unref(as);
+}
+
+/* return log2(val), or UINT8_MAX if val is not a power of 2 */
+static uint8_t ilog2(uint64_t val)
+{
+uint8_t result = 0;
+while (val != 1) {
+if (val & 1) {
+return UINT8_MAX;
+}
+
+val >>= 1;
+result += 1;
+}
+return result;
+}
+
+ATC *atc_new(uint64_t page_size, uint8_t address_width)
+{
+ATC *atc;
+uint8_t log_page_size = ilog2(page_size);
+/* number of bits each used to store all the intermediate indexes */
+uint64_t addr_lookup_indexes_size;
+
+if (log_page_size == UINT8_MAX) {
+return NULL;
+}
+/*
+ * We only support page table entries of 8 (PAGE_TABLE_ENTRY_SIZE) bytes
+ * log2(page_size / 8) = log2(page_size) - 3
+ * is the level offset
+ */
+if (log_page_size <= 3) {
+return NULL;
+}
+
+atc = g_new0(ATC, 1);
+atc->address_spaces = g_hash_table_new_full(atc_pasid_key_hash,
+atc_pasid_key_equal,
+NULL, atc_address_space_free);
+atc->level_offset = log_page_size - 3;
+/* at this point, we know that page_size is a power of 2 */
+atc->min_addr_mask = page_size - 1;
+addr_lookup_indexes_size = address_width - log_page_size;
+if ((addr_lookup_indexes_size % atc->level_offset) != 0) {
+goto error;
+}
+atc->levels = addr_lookup_indexes_size / atc->level_offset;
+atc->page_size = page_size;
+return atc;
+
+error:
+g_free(atc);
+return NULL;
+}
+
+static inline GHashTable *atc_get_address_space_cache(ATC *atc, uint32_t pasid)
+{
+return g_hash_table_lookup(atc->address_spaces,
+   (gconstpointer)(uintptr_t)pasid);
+}
+
+void atc_create_address_space_cache(ATC *atc, uint32_t pasid)
+{
+GHashTable *as_cache;
+
+as_cache = atc_get_address_space_cache(atc, pasid);
+if (!as_cache) {
+as_cache = g_hash_table_new_full(atc_addr_key_hash,
+ atc_addr_key_equal,
+ NULL, g_free);
+g_hash_table_replace(atc->address_spaces,
+ (gpointer)(uintptr_t)pasid, as_cache);
+}
+}
+
+void atc_delete_address_space_cache(ATC *atc, uint32_t pasid)
+{
+g_hash_table_remove(atc->address_spaces, (gpointer)(uintptr_t)pasid);
+}
+
+int atc_update(ATC *atc, IOMMUTLBEntry *entry)
+{
+IOMMUTLBEntry *value;
+GHashTable *as_cache = atc_get_address_space_cache(atc, entry->pasid);
+if (!as_cache) {
+return -ENODEV;
+}
+value = g_memdup2(entry, sizeof(*value));
+g_hash_table_replace(as_cache, (gpointer)(entry->iova), value);
+return 0;
+}
+
+IOMMUTLBEntry *atc_lookup(ATC *atc, uint32_t pasid, hwaddr addr)
+{
+IOMMUTLBEntry *entry;
+hwaddr mas

[PATCH v3 12/19] pci: Add a pci-level initialization function for iommu notifiers

2025-02-21 Thread CLEMENT MATHIEU--DRIF

From: Clement Mathieu--Drif 

We add a convenient way to initialize an device-iotlb notifier.
This is meant to be used by ATS-capable devices.

pci_device_iommu_memory_region_pasid is introduces in this commit and
will be used in several other SVM-related functions exposed in
the PCI API.

Signed-off-by: Clement Mathieu--Drif 
---
 hw/pci/pci.c | 40 
 include/hw/pci/pci.h | 15 +++
 2 files changed, 55 insertions(+)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 164bb22e05..be29c0375f 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2825,6 +2825,46 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice 
*dev)
 return &address_space_memory;
 }
 
+static IOMMUMemoryRegion *pci_device_iommu_memory_region_pasid(PCIDevice *dev,
+   uint32_t pasid)
+{
+PCIBus *bus;
+PCIBus *iommu_bus;
+int devfn;
+
+/*
+ * This function is for internal use in the module,
+ * we can call it with PCI_NO_PASID
+ */
+if (!dev->is_master ||
+((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev))) {
+return NULL;
+}
+
+pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+if (iommu_bus && iommu_bus->iommu_ops->get_memory_region_pasid) {
+return iommu_bus->iommu_ops->get_memory_region_pasid(bus,
+ iommu_bus->iommu_opaque, devfn, pasid);
+}
+return NULL;
+}
+
+bool pci_iommu_init_iotlb_notifier(PCIDevice *dev, uint32_t pasid,
+   IOMMUNotifier *n, IOMMUNotify fn,
+   void *opaque)
+{
+IOMMUMemoryRegion *iommu_mr = pci_device_iommu_memory_region_pasid(dev,
+pasid);
+if (!iommu_mr) {
+return false;
+}
+iommu_notifier_init(n, fn, IOMMU_NOTIFIER_DEVIOTLB_EVENTS, 0, HWADDR_MAX,
+memory_region_iommu_attrs_to_index(iommu_mr,
+   
MEMTXATTRS_UNSPECIFIED));
+n->opaque = opaque;
+return true;
+}
+
 bool pci_device_set_iommu_device(PCIDevice *dev, HostIOMMUDevice *hiod,
  Error **errp)
 {
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 644551550b..a11366e08d 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -446,6 +446,21 @@ bool pci_device_set_iommu_device(PCIDevice *dev, 
HostIOMMUDevice *hiod,
  Error **errp);
 void pci_device_unset_iommu_device(PCIDevice *dev);
 
+/**
+ * pci_iommu_init_iotlb_notifier: initialize an IOMMU notifier
+ *
+ * This function is used by devices before registering an IOTLB notifier
+ *
+ * @dev: the device
+ * @pasid: the pasid of the address space to watch
+ * @n: the notifier to initialize
+ * @fn: the callback to be installed
+ * @opaque: user pointer that can be used to store a state
+ */
+bool pci_iommu_init_iotlb_notifier(PCIDevice *dev, uint32_t pasid,
+   IOMMUNotifier *n, IOMMUNotify fn,
+   void *opaque);
+
 /**
  * pci_setup_iommu: Initialize specific IOMMU handlers for a PCIBus
  *
-- 
2.48.1

[PATCH v3 14/19] atc: Add unit tests

2025-02-21 Thread CLEMENT MATHIEU--DRIF

From: Clement Mathieu--Drif 

Signed-off-by: Clement Mathieu--Drif 
---
 tests/unit/meson.build |   1 +
 tests/unit/test-atc.c  | 527 +
 2 files changed, 528 insertions(+)
 create mode 100644 tests/unit/test-atc.c

diff --git a/tests/unit/meson.build b/tests/unit/meson.build
index d5248ae51d..810197d5e1 100644
--- a/tests/unit/meson.build
+++ b/tests/unit/meson.build
@@ -48,6 +48,7 @@ tests = {
   'test-qapi-util': [],
   'test-interval-tree': [],
   'test-fifo': [],
+  'test-atc': [],
 }
 
 if have_system or have_tools
diff --git a/tests/unit/test-atc.c b/tests/unit/test-atc.c
new file mode 100644
index 00..0d1c1b7ca7
--- /dev/null
+++ b/tests/unit/test-atc.c
@@ -0,0 +1,527 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see .
+ */
+
+#include "util/atc.h"
+
+static inline bool tlb_entry_equal(IOMMUTLBEntry *e1, IOMMUTLBEntry *e2)
+{
+if (!e1 || !e2) {
+return !e1 && !e2;
+}
+return e1->iova == e2->iova &&
+e1->addr_mask == e2->addr_mask &&
+e1->pasid == e2->pasid &&
+e1->perm == e2->perm &&
+e1->target_as == e2->target_as &&
+e1->translated_addr == e2->translated_addr;
+}
+
+static void assert_lookup_equals(ATC *atc, IOMMUTLBEntry *target,
+ uint32_t pasid, hwaddr iova)
+{
+IOMMUTLBEntry *result;
+result = atc_lookup(atc, pasid, iova);
+g_assert(tlb_entry_equal(result, target));
+}
+
+static void check_creation(uint64_t page_size, uint8_t address_width,
+   uint8_t levels, uint8_t level_offset,
+   bool should_work) {
+ATC *atc = atc_new(page_size, address_width);
+if (atc) {
+g_assert(atc->levels == levels);
+g_assert(atc->level_offset == level_offset);
+
+atc_destroy(atc);
+g_assert(should_work);
+} else {
+g_assert(!should_work);
+}
+}
+
+static void test_creation_parameters(void)
+{
+check_creation(8, 39, 3, 9, false);
+check_creation(4095, 39, 3, 9, false);
+check_creation(4097, 39, 3, 9, false);
+check_creation(8192, 48, 0, 0, false);
+
+check_creation(4096, 38, 0, 0, false);
+check_creation(4096, 39, 3, 9, true);
+check_creation(4096, 40, 0, 0, false);
+check_creation(4096, 47, 0, 0, false);
+check_creation(4096, 48, 4, 9, true);
+check_creation(4096, 49, 0, 0, false);
+check_creation(4096, 56, 0, 0, false);
+check_creation(4096, 57, 5, 9, true);
+check_creation(4096, 58, 0, 0, false);
+
+check_creation(16384, 35, 0, 0, false);
+check_creation(16384, 36, 2, 11, true);
+check_creation(16384, 37, 0, 0, false);
+check_creation(16384, 46, 0, 0, false);
+check_creation(16384, 47, 3, 11, true);
+check_creation(16384, 48, 0, 0, false);
+check_creation(16384, 57, 0, 0, false);
+check_creation(16384, 58, 4, 11, true);
+check_creation(16384, 59, 0, 0, false);
+}
+
+static void test_single_entry(void)
+{
+IOMMUTLBEntry entry = {
+.iova = 0x123456789000ULL,
+.addr_mask = 0xfffULL,
+.pasid = 5,
+.perm = IOMMU_RW,
+.translated_addr = 0xdeadbeefULL,
+};
+
+ATC *atc = atc_new(4096, 48);
+g_assert(atc);
+
+assert_lookup_equals(atc, NULL, entry.pasid,
+ entry.iova + (entry.addr_mask / 2));
+
+atc_create_address_space_cache(atc, entry.pasid);
+g_assert(atc_update(atc, &entry) == 0);
+
+assert_lookup_equals(atc, NULL, entry.pasid + 1,
+ entry.iova + (entry.addr_mask / 2));
+assert_lookup_equals(atc, &entry, entry.pasid,
+ entry.iova + (entry.addr_mask / 2));
+
+atc_destroy(atc);
+}
+
+static void test_single_entry_2(void)
+{
+static uint64_t page_size = 4096;
+IOMMUTLBEntry e1 = {
+.iova = 0xabcdef20ULL,
+.addr_mask = 0xfffULL,
+.pasid = 1,
+.perm = IOMMU_RW,
+.translated_addr = 0x5eedULL,
+};
+
+ATC *atc = atc_new(page_size , 48);
+atc_create_address_space_cache(atc, e1.pasid);
+atc_update(atc, &e1);
+
+assert_lookup_equals(atc, NULL, e1.pasid, 0xabcdef201000ULL);
+
+atc_destroy(atc);
+}
+
+static void test_page_boundaries(void)
+{
+static const uint32_t pasid = 5;
+static const hwaddr page_size = 4096;
+
+/* 2 consecutive entrie

Re: [RFC 1/2] system/memory: Allow creating IOMMU mappings from RAM discard populate notifiers

2025-02-21 Thread David Hildenbrand


On 21.02.25 03:25, Chenyi Qiang wrote:



On 2/21/2025 3:39 AM, David Hildenbrand wrote:

On 20.02.25 17:13, Jean-Philippe Brucker wrote:

For Arm CCA we'd like the guest_memfd discard notifier to call the IOMMU
notifiers and create e.g. VFIO mappings. The default VFIO discard
notifier isn't sufficient for CCA because the DMA addresses need a
translation (even without vIOMMU).

At the moment:
* guest_memfd_state_change() calls the populate() notifier
* the populate notifier() calls IOMMU notifiers
* the IOMMU notifier handler calls memory_get_xlat_addr() to get a VA
* it calls ram_discard_manager_is_populated() which fails.

guest_memfd_state_change() only changes the section's state after
calling the populate() notifier. We can't easily invert the order of
operation because it uses the old state bitmap to know which pages need
the populate() notifier.


I assume we talk about this code: [1]

[1] https://lkml.kernel.org/r/20250217081833.21568-1-chenyi.qi...@intel.com


+static int memory_attribute_state_change(MemoryAttributeManager *mgr,
uint64_t offset,
+ uint64_t size, bool
shared_to_private)
+{
+    int block_size = memory_attribute_manager_get_block_size(mgr);
+    int ret = 0;
+
+    if (!memory_attribute_is_valid_range(mgr, offset, size)) {
+    error_report("%s, invalid range: offset 0x%lx, size 0x%lx",
+ __func__, offset, size);
+    return -1;
+    }
+
+    if ((shared_to_private && memory_attribute_is_range_discarded(mgr,
offset, size)) ||
+    (!shared_to_private && memory_attribute_is_range_populated(mgr,
offset, size))) {
+    return 0;
+    }
+
+    if (shared_to_private) {
+    memory_attribute_notify_discard(mgr, offset, size);
+    } else {
+    ret = memory_attribute_notify_populate(mgr, offset, size);
+    }
+
+    if (!ret) {
+    unsigned long first_bit = offset / block_size;
+    unsigned long nbits = size / block_size;
+
+    g_assert((first_bit + nbits) <= mgr->bitmap_size);
+
+    if (shared_to_private) {
+    bitmap_clear(mgr->shared_bitmap, first_bit, nbits);
+    } else {
+    bitmap_set(mgr->shared_bitmap, first_bit, nbits);
+    }
+
+    return 0;
+    }
+
+    return ret;
+}

Then, in memory_attribute_notify_populate(), we walk the bitmap again.

Why?

We just checked that it's all in the expected state, no?


virtio-mem doesn't handle it that way, so I'm curious why we would have
to do it here?


I was concerned about the case where the guest issues a request that
only partial of the range is in the desired state.
I think the main problem is the policy for the guest conversion request.
My current handling is:

1. When a conversion request is made for a range already in the desired
   state, the helper simply returns success.


Yes.


2. For requests involving a range partially in the desired state, only
   the necessary segments are converted, ensuring the entire range
   complies with the request efficiently.



Ah, now I get:

+if ((shared_to_private && memory_attribute_is_range_discarded(mgr,
offset, size)) ||
+(!shared_to_private && memory_attribute_is_range_populated(mgr,
offset, size))) {
+return 0;
+}
+

We're not failing if it might already partially be in the other state.


3. In scenarios where a conversion request is declined by other systems,
   such as a failure from VFIO during notify_populate(), the helper will
   roll back the request, maintaining consistency.

And the policy of virtio-mem is to refuse the state change if not all
blocks are in the opposite state.


Yes.



Actually, this part is still a uncertain to me.



IIUC, the problem does not exist if we only convert a single page at a time.

Is there a known use case where such partial conversions could happen?


BTW, per the status/bitmap track, the virtio-mem also changes the bitmap
after the plug/unplug notifier. This is the same, correct?
Right. But because we reject these partial requests, we don't have to 
traverse the bitmap and could just adjust the bitmap operations.


--
Cheers,

David / dhildenb

[PATCH v3 00/19] intel_iommu: Add ATS support

2025-02-21 Thread CLEMENT MATHIEU--DRIF

From: Clement Mathieu--Drif 

This patch set belongs to a list of series that add SVM support for VT-d.

Here we focus on implementing ATS support in the IOMMU and adding a
PCI-level API to be used by virtual devices.

This work is based on the VT-d specification version 4.1 (March 2023).

Here is a link to our GitHub repository where you can find the following 
elements:
- Qemu with all the patches for SVM
- ATS
- PRI
- Device IOTLB invalidations
- Requests with already pre-translated addresses
- A demo device
- A simple driver for the demo device
- A userspace program (for testing and demonstration purposes)

https://github.com/BullSequana/Qemu-in-guest-SVM-demo

===

Context and design notes


The main purpose of this work is to enable vVT-d users to make
translation requests to the vIOMMU as described in the PCIe Gen 5.0
specification (section 10). Moreover, we aim to implement a
PCI/Memory-level framework that could be used by other vIOMMUs
to implement the same features.

What is ATS?


ATS (Address Translation Service) is a PCIe-level protocol that
enables PCIe devices to query an IOMMU for virtual to physical
address translations in a specific address space (such as a userland
process address space). When a device receives translation responses
from an IOMMU, it may decide to store them in an internal cache,
often known as "ATC" (Address Translation Cache) or "Device IOTLB".
To keep page tables and caches consistent, the IOMMU is allowed to 
send asynchronous invalidation requests to its client devices.

To avoid introducing an unnecessarily complex API, this series simply
exposes 3 functions. The first 2 are a pair of setup functions that
are called to install and remove the ATS invalidation callback during
the initialization phase of a process. The third one will be
used to request translations. The callback setup API introduced in
this series calls the IOMMUNotifier API under the hood.

API design
''

- int pci_register_iommu_tlb_event_notifier(PCIDevice *dev,
uint32_t pasid,
IOMMUNotifier *n);

- int pci_unregister_iommu_tlb_event_notifier(PCIDevice *dev, uint32_t pasid,
  IOMMUNotifier *n);

- ssize_t pci_ats_request_translation_pasid(PCIDevice *dev, uint32_t pasid,
bool priv_req, bool exec_req,
hwaddr addr, size_t length,
bool no_write,
IOMMUTLBEntry *result,
size_t result_length,
uint32_t *err_count);

Although device developers may want to implement custom ATC for
testing or performance measurement purposes, we provide a generic
implementation as a utility module.

Overview


Here are the interactions between an ATS-capable PCIe device and the vVT-d:


  

  
  ┌───┐ ┌┐  
  
  │Device │ │PCI / Memory│  
  
  │   │ pci_ats_request_│abstraction │ iommu_ats_   
  
  │   │ translation_││ request_ 
  
  │┌─┐│ pasid   │ AS lookup  │ translation  
  
  ││Logic││>│╶╶╶>│──┐   
  
  │└─┘│<│<╶╶╶│<──┐  │   
  
  │┌─┐│ ││   │  │   
  
  ││inv func ││<───┐││   │  │   
  
  │└─┘││││   │  │   
  
  ││  ││││   │  │   
  
  │∨  ││││   │  │   
  
  │┌─┐││││   │  │   
  
  ││ATC  │││││   │  │   
  
  │└─┘││││   │  │   
  
  └───┘│└┘   │  │   
  
   │ │  │   
  
   │ │  │   
  
   │

[PATCH v3 19/19] intel_iommu: Add support for ATS

2025-02-21 Thread CLEMENT MATHIEU--DRIF

From: Clement Mathieu--Drif 

Signed-off-by: Clement Mathieu--Drif 
---
 hw/i386/intel_iommu.c  | 74 --
 hw/i386/intel_iommu_internal.h |  1 +
 2 files changed, 72 insertions(+), 3 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 9daf8025cc..2b1c733d86 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -4159,12 +4159,10 @@ static void 
vtd_report_ir_illegal_access(VTDAddressSpace *vtd_as,
 bool is_fpd_set = false;
 VTDContextEntry ce;
 
-assert(vtd_as->pasid != PCI_NO_PASID);
-
 /* Try out best to fetch FPD, we can't do anything more */
 if (vtd_dev_to_context_entry(s, bus_n, vtd_as->devfn, &ce) == 0) {
 is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD;
-if (!is_fpd_set && s->root_scalable) {
+if (!is_fpd_set && s->root_scalable && vtd_as->pasid != PCI_NO_PASID) {
 vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set, vtd_as->pasid);
 }
 }
@@ -4738,6 +4736,74 @@ static IOMMUMemoryRegion 
*vtd_get_memory_region_pasid(PCIBus *bus,
 return &vtd_as->iommu;
 }
 
+static IOMMUTLBEntry vtd_iommu_ats_do_translate(IOMMUMemoryRegion *iommu,
+hwaddr addr,
+IOMMUAccessFlags flags,
+int iommu_idx)
+{
+IOMMUTLBEntry entry;
+VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
+
+if (vtd_is_interrupt_addr(addr)) {
+vtd_report_ir_illegal_access(vtd_as, addr, flags & IOMMU_WO);
+entry.target_as = &address_space_memory;
+entry.iova = 0;
+entry.translated_addr = 0;
+entry.addr_mask = ~VTD_PAGE_MASK_4K;
+entry.perm = IOMMU_NONE;
+entry.pasid = PCI_NO_PASID;
+} else {
+entry = vtd_iommu_translate(iommu, addr, flags, iommu_idx);
+}
+
+return entry;
+}
+
+static ssize_t vtd_iommu_ats_request_translation(IOMMUMemoryRegion *iommu,
+ bool priv_req, bool exec_req,
+ hwaddr addr, size_t length,
+ bool no_write,
+ IOMMUTLBEntry *result,
+ size_t result_length,
+ uint32_t *err_count)
+{
+IOMMUAccessFlags flags = IOMMU_ACCESS_FLAG_FULL(true, !no_write, exec_req,
+priv_req, false, false);
+ssize_t res_index = 0;
+hwaddr target_address = addr + length;
+IOMMUTLBEntry entry;
+
+*err_count = 0;
+
+while ((addr < target_address) && (res_index < result_length)) {
+entry = vtd_iommu_ats_do_translate(iommu, addr, flags, 0);
+if (!IOMMU_TLB_ENTRY_TRANSLATION_ERROR(&entry)) { /* Translation done 
*/
+/*
+ * 4.1.2 : Global Mapping (G) : Remapping hardware provides a value
+ * of 0 in this field
+ */
+entry.perm &= ~IOMMU_GLOBAL;
+} else {
+*err_count += 1;
+}
+result[res_index] = entry;
+res_index += 1;
+addr = (addr & (~entry.addr_mask)) + (entry.addr_mask + 1);
+}
+
+/* Buffer too small */
+if (addr < target_address) {
+return -ENOMEM;
+}
+
+return res_index;
+}
+
+static uint64_t vtd_get_min_page_size(IOMMUMemoryRegion *iommu)
+{
+return VTD_PAGE_SIZE;
+}
+
 static PCIIOMMUOps vtd_iommu_ops = {
 .get_address_space = vtd_host_dma_iommu,
 .get_memory_region_pasid = vtd_get_memory_region_pasid,
@@ -4913,6 +4979,8 @@ static void 
vtd_iommu_memory_region_class_init(ObjectClass *klass,
 imrc->translate = vtd_iommu_translate;
 imrc->notify_flag_changed = vtd_iommu_notify_flag_changed;
 imrc->replay = vtd_iommu_replay;
+imrc->iommu_ats_request_translation = vtd_iommu_ats_request_translation;
+imrc->get_min_page_size = vtd_get_min_page_size;
 }
 
 static const TypeInfo vtd_iommu_memory_region_info = {
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 238f1f443f..7e2071cd4d 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -192,6 +192,7 @@
 #define VTD_ECAP_SC (1ULL << 7)
 #define VTD_ECAP_MHMV   (15ULL << 20)
 #define VTD_ECAP_SRS(1ULL << 31)
+#define VTD_ECAP_NWFS   (1ULL << 33)
 #define VTD_ECAP_PSS(19ULL << 35)
 #define VTD_ECAP_PASID  (1ULL << 40)
 #define VTD_ECAP_SMTS   (1ULL << 43)
-- 
2.48.1

[PATCH v3 01/19] memory: Add permissions in IOMMUAccessFlags

2025-02-21 Thread CLEMENT MATHIEU--DRIF

From: Clement Mathieu--Drif 

This will be necessary for devices implementing ATS.
We also define a new macro IOMMU_ACCESS_FLAG_FULL in addition to
IOMMU_ACCESS_FLAG to support more access flags.
IOMMU_ACCESS_FLAG is kept for convenience and backward compatibility.

Here are the flags added (defined by the PCIe 5 specification) :
- Execute Requested
- Privileged Mode Requested
- Global
- Untranslated Only

IOMMU_ACCESS_FLAG sets the additional flags to 0

Signed-off-by: Clement Mathieu--Drif 
---
 include/exec/memory.h | 23 +--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/include/exec/memory.h b/include/exec/memory.h
index 78c4e0aec8..29f5d31eef 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -110,15 +110,34 @@ struct MemoryRegionSection {
 
 typedef struct IOMMUTLBEntry IOMMUTLBEntry;
 
-/* See address_space_translate: bit 0 is read, bit 1 is write.  */
+/*
+ * See address_space_translate:
+ *  - bit 0 : read
+ *  - bit 1 : write
+ *  - bit 2 : exec
+ *  - bit 3 : priv
+ *  - bit 4 : global
+ *  - bit 5 : untranslated only
+ */
 typedef enum {
 IOMMU_NONE = 0,
 IOMMU_RO   = 1,
 IOMMU_WO   = 2,
 IOMMU_RW   = 3,
+IOMMU_EXEC = 4,
+IOMMU_PRIV = 8,
+IOMMU_GLOBAL = 16,
+IOMMU_UNTRANSLATED_ONLY = 32,
 } IOMMUAccessFlags;
 
-#define IOMMU_ACCESS_FLAG(r, w) (((r) ? IOMMU_RO : 0) | ((w) ? IOMMU_WO : 0))
+#define IOMMU_ACCESS_FLAG(r, w) (((r) ? IOMMU_RO : 0) | \
+((w) ? IOMMU_WO : 0))
+#define IOMMU_ACCESS_FLAG_FULL(r, w, x, p, g, uo) \
+(IOMMU_ACCESS_FLAG(r, w) | \
+((x) ? IOMMU_EXEC : 0) | \
+((p) ? IOMMU_PRIV : 0) | \
+((g) ? IOMMU_GLOBAL : 0) | \
+((uo) ? IOMMU_UNTRANSLATED_ONLY : 0))
 
 struct IOMMUTLBEntry {
 AddressSpace*target_as;
-- 
2.48.1

[PATCH v3 10/19] intel_iommu: Implement the get_memory_region_pasid iommu operation

2025-02-21 Thread CLEMENT MATHIEU--DRIF

From: Clement Mathieu--Drif 

Signed-off-by: Clement Mathieu--Drif 
---
 hw/i386/intel_iommu.c | 17 -
 include/hw/i386/intel_iommu.h |  2 +-
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index a360119fbe..d3772d8902 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -4202,7 +4202,7 @@ static const MemoryRegionOps vtd_mem_ir_fault_ops = {
 };
 
 VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
- int devfn, unsigned int pasid)
+ int devfn, uint32_t pasid)
 {
 /*
  * We can't simply use sid here since the bus number might not be
@@ -4719,8 +4719,23 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, 
void *opaque, int devfn)
 return &vtd_as->as;
 }
 
+static IOMMUMemoryRegion *vtd_get_memory_region_pasid(PCIBus *bus,
+  void *opaque,
+  int devfn,
+  uint32_t pasid)
+{
+IntelIOMMUState *s = opaque;
+VTDAddressSpace *vtd_as;
+
+assert(0 <= devfn && devfn < PCI_DEVFN_MAX);
+
+vtd_as = vtd_find_add_as(s, bus, devfn, pasid);
+return &vtd_as->iommu;
+}
+
 static PCIIOMMUOps vtd_iommu_ops = {
 .get_address_space = vtd_host_dma_iommu,
+.get_memory_region_pasid = vtd_get_memory_region_pasid,
 .set_iommu_device = vtd_dev_set_iommu_device,
 .unset_iommu_device = vtd_dev_unset_iommu_device,
 };
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index e95477e855..08f71c262e 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -324,6 +324,6 @@ struct IntelIOMMUState {
  * create a new one if none exists
  */
 VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
- int devfn, unsigned int pasid);
+ int devfn, uint32_t pasid);
 
 #endif
-- 
2.48.1

[PATCH v3 08/19] pci: Cache the bus mastering status in the device

2025-02-21 Thread CLEMENT MATHIEU--DRIF

From: Clement Mathieu--Drif 

The cached is_master value is necessary to know if a device is
allowed to issue ATS requests or not.
This behavior is implemented in an upcoming patch.

Signed-off-by: Clement Mathieu--Drif 
---
 hw/pci/pci.c| 25 +++--
 include/hw/pci/pci_device.h |  1 +
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 2afa423925..164bb22e05 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -134,6 +134,12 @@ static GSequence *pci_acpi_index_list(void)
 return used_acpi_index_list;
 }
 
+static void pci_set_master(PCIDevice *d, bool enable)
+{
+memory_region_set_enabled(&d->bus_master_enable_region, enable);
+d->is_master = enable; /* cache the status */
+}
+
 static void pci_init_bus_master(PCIDevice *pci_dev)
 {
 AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev);
@@ -141,7 +147,7 @@ static void pci_init_bus_master(PCIDevice *pci_dev)
 memory_region_init_alias(&pci_dev->bus_master_enable_region,
  OBJECT(pci_dev), "bus master",
  dma_as->root, 0, 
memory_region_size(dma_as->root));
-memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
+pci_set_master(pci_dev, false);
 memory_region_add_subregion(&pci_dev->bus_master_container_region, 0,
 &pci_dev->bus_master_enable_region);
 }
@@ -727,9 +733,8 @@ static int get_pci_config_device(QEMUFile *f, void *pv, 
size_t size,
 pci_bridge_update_mappings(PCI_BRIDGE(s));
 }
 
-memory_region_set_enabled(&s->bus_master_enable_region,
-  pci_get_word(s->config + PCI_COMMAND)
-  & PCI_COMMAND_MASTER);
+pci_set_master(s, pci_get_word(s->config + PCI_COMMAND)
+  & PCI_COMMAND_MASTER);
 
 g_free(config);
 return 0;
@@ -1684,9 +1689,10 @@ void pci_default_write_config(PCIDevice *d, uint32_t 
addr, uint32_t val_in, int
 
 if (ranges_overlap(addr, l, PCI_COMMAND, 2)) {
 pci_update_irq_disabled(d, was_irq_disabled);
-memory_region_set_enabled(&d->bus_master_enable_region,
-  (pci_get_word(d->config + PCI_COMMAND)
-   & PCI_COMMAND_MASTER) && d->enabled);
+pci_set_master(d,
+   (pci_get_word(d->config + PCI_COMMAND) &
+PCI_COMMAND_MASTER) &&
+   d->enabled);
 }
 
 msi_write_config(d, addr, val_in, l);
@@ -2974,9 +2980,8 @@ void pci_set_enabled(PCIDevice *d, bool state)
 
 d->enabled = state;
 pci_update_mappings(d);
-memory_region_set_enabled(&d->bus_master_enable_region,
-  (pci_get_word(d->config + PCI_COMMAND)
-   & PCI_COMMAND_MASTER) && d->enabled);
+pci_set_master(d, (pci_get_word(d->config + PCI_COMMAND)
+  & PCI_COMMAND_MASTER) && d->enabled);
 if (!d->enabled) {
 pci_device_reset(d);
 }
diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h
index add208edfa..40606baa5d 100644
--- a/include/hw/pci/pci_device.h
+++ b/include/hw/pci/pci_device.h
@@ -88,6 +88,7 @@ struct PCIDevice {
 char name[64];
 PCIIORegion io_regions[PCI_NUM_REGIONS];
 AddressSpace bus_master_as;
+bool is_master;
 MemoryRegion bus_master_container_region;
 MemoryRegion bus_master_enable_region;
 
-- 
2.48.1

[PATCH v3 18/19] intel_iommu: Return page walk level even when the translation fails

2025-02-21 Thread CLEMENT MATHIEU--DRIF

From: Clement Mathieu--Drif 

We use this information in vtd_do_iommu_translate to populate the
IOMMUTLBEntry and indicate the correct page mask. This prevents ATS
devices from sending many useless translation requests when a megapage
or gigapage iova is not mapped to a physical address.

Signed-off-by: Clement Mathieu--Drif 
---
 hw/i386/intel_iommu.c | 17 -
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index b9b5d492f5..9daf8025cc 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -1995,9 +1995,9 @@ static int vtd_iova_to_flpte(IntelIOMMUState *s, 
VTDContextEntry *ce,
  uint32_t pasid)
 {
 dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce, pasid);
-uint32_t level = vtd_get_iova_level(s, ce, pasid);
 uint32_t offset;
 uint64_t flpte, flag_ad = VTD_FL_A;
+*flpte_level = vtd_get_iova_level(s, ce, pasid);
 
 if (!vtd_iova_fl_check_canonical(s, iova, ce, pasid)) {
 error_report_once("%s: detected non canonical IOVA (iova=0x%" PRIx64 
","
@@ -2006,11 +2006,11 @@ static int vtd_iova_to_flpte(IntelIOMMUState *s, 
VTDContextEntry *ce,
 }
 
 while (true) {
-offset = vtd_iova_level_offset(iova, level);
+offset = vtd_iova_level_offset(iova, *flpte_level);
 flpte = vtd_get_pte(addr, offset);
 
 if (flpte == (uint64_t)-1) {
-if (level == vtd_get_iova_level(s, ce, pasid)) {
+if (*flpte_level == vtd_get_iova_level(s, ce, pasid)) {
 /* Invalid programming of pasid-entry */
 return -VTD_FR_PASID_ENTRY_FSPTPTR_INV;
 } else {
@@ -2036,15 +2036,15 @@ static int vtd_iova_to_flpte(IntelIOMMUState *s, 
VTDContextEntry *ce,
 if (is_write && !(flpte & VTD_FL_RW)) {
 return -VTD_FR_SM_WRITE;
 }
-if (vtd_flpte_nonzero_rsvd(flpte, level)) {
+if (vtd_flpte_nonzero_rsvd(flpte, *flpte_level)) {
 error_report_once("%s: detected flpte reserved non-zero "
   "iova=0x%" PRIx64 ", level=0x%" PRIx32
   "flpte=0x%" PRIx64 ", pasid=0x%" PRIX32 ")",
-  __func__, iova, level, flpte, pasid);
+  __func__, iova, *flpte_level, flpte, pasid);
 return -VTD_FR_FS_PAGING_ENTRY_RSVD;
 }
 
-if (vtd_is_last_pte(flpte, level) && is_write) {
+if (vtd_is_last_pte(flpte, *flpte_level) && is_write) {
 flag_ad |= VTD_FL_D;
 }
 
@@ -2052,14 +2052,13 @@ static int vtd_iova_to_flpte(IntelIOMMUState *s, 
VTDContextEntry *ce,
 return -VTD_FR_FS_BIT_UPDATE_FAILED;
 }
 
-if (vtd_is_last_pte(flpte, level)) {
+if (vtd_is_last_pte(flpte, *flpte_level)) {
 *flptep = flpte;
-*flpte_level = level;
 return 0;
 }
 
 addr = vtd_get_pte_addr(flpte, aw_bits);
-level--;
+(*flpte_level)--;
 }
 }
 
-- 
2.48.1

[PATCH v3 05/19] pcie: Add helper to declare PASID capability for a pcie device

2025-02-21 Thread CLEMENT MATHIEU--DRIF

From: Clement Mathieu--Drif 

Signed-off-by: Clement Mathieu--Drif 
---
 hw/pci/pcie.c  | 24 
 include/hw/pci/pcie.h  |  6 +-
 include/hw/pci/pcie_regs.h |  5 +
 3 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 1b12db6fa2..f42a256f15 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -1214,3 +1214,27 @@ void pcie_acs_reset(PCIDevice *dev)
 pci_set_word(dev->config + dev->exp.acs_cap + PCI_ACS_CTRL, 0);
 }
 }
+
+/* PASID */
+void pcie_pasid_init(PCIDevice *dev, uint16_t offset, uint8_t pasid_width,
+ bool exec_perm, bool priv_mod)
+{
+assert(pasid_width <= PCI_EXT_CAP_PASID_MAX_WIDTH);
+static const uint16_t control_reg_rw_mask = 0x07;
+uint16_t capability_reg = pasid_width;
+
+pcie_add_capability(dev, PCI_EXT_CAP_ID_PASID, PCI_PASID_VER, offset,
+PCI_EXT_CAP_PASID_SIZEOF);
+
+capability_reg <<= PCI_PASID_CAP_WIDTH_SHIFT;
+capability_reg |= exec_perm ? PCI_PASID_CAP_EXEC : 0;
+capability_reg |= priv_mod  ? PCI_PASID_CAP_PRIV : 0;
+pci_set_word(dev->config + offset + PCI_PASID_CAP, capability_reg);
+
+/* Everything is disabled by default */
+pci_set_word(dev->config + offset + PCI_PASID_CTRL, 0);
+
+pci_set_word(dev->wmask + offset + PCI_PASID_CTRL, control_reg_rw_mask);
+
+dev->exp.pasid_cap = offset;
+}
diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
index b8d59732bc..aa040c3e97 100644
--- a/include/hw/pci/pcie.h
+++ b/include/hw/pci/pcie.h
@@ -72,8 +72,9 @@ struct PCIExpressDevice {
 uint16_t aer_cap;
 PCIEAERLog aer_log;
 
-/* Offset of ATS capability in config space */
+/* Offset of ATS and PASID capabilities in config space */
 uint16_t ats_cap;
+uint16_t pasid_cap;
 
 /* ACS */
 uint16_t acs_cap;
@@ -152,4 +153,7 @@ void pcie_cap_slot_unplug_cb(HotplugHandler *hotplug_dev, 
DeviceState *dev,
  Error **errp);
 void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev,
  DeviceState *dev, Error **errp);
+
+void pcie_pasid_init(PCIDevice *dev, uint16_t offset, uint8_t pasid_width,
+ bool exec_perm, bool priv_mod);
 #endif /* QEMU_PCIE_H */
diff --git a/include/hw/pci/pcie_regs.h b/include/hw/pci/pcie_regs.h
index 9d3b6868dc..4d9cf4a29c 100644
--- a/include/hw/pci/pcie_regs.h
+++ b/include/hw/pci/pcie_regs.h
@@ -86,6 +86,11 @@ typedef enum PCIExpLinkWidth {
 #define PCI_ARI_VER 1
 #define PCI_ARI_SIZEOF  8
 
+/* PASID */
+#define PCI_PASID_VER   1
+#define PCI_EXT_CAP_PASID_MAX_WIDTH 20
+#define PCI_PASID_CAP_WIDTH_SHIFT   8
+
 /* AER */
 #define PCI_ERR_VER 2
 #define PCI_ERR_SIZEOF  0x48
-- 
2.48.1

[PATCH v3 17/19] intel_iommu: Set address mask when a translation fails and adjust W permission

2025-02-21 Thread CLEMENT MATHIEU--DRIF

From: Clement Mathieu--Drif 

Implements the behavior defined in section 10.2.3.5 of PCIe spec rev 5.
This is needed by devices that support ATS.

Signed-off-by: Clement Mathieu--Drif 
---
 hw/i386/intel_iommu.c | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index d3772d8902..b9b5d492f5 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -2100,7 +2100,8 @@ static bool vtd_do_iommu_translate(VTDAddressSpace 
*vtd_as, PCIBus *bus,
 uint8_t bus_num = pci_bus_num(bus);
 VTDContextCacheEntry *cc_entry;
 uint64_t pte, page_mask;
-uint32_t level, pasid = vtd_as->pasid;
+uint32_t level = UINT32_MAX;
+uint32_t pasid = vtd_as->pasid;
 uint16_t source_id = PCI_BUILD_BDF(bus_num, devfn);
 int ret_fr;
 bool is_fpd_set = false;
@@ -2259,14 +2260,19 @@ out:
 entry->iova = addr & page_mask;
 entry->translated_addr = vtd_get_pte_addr(pte, s->aw_bits) & page_mask;
 entry->addr_mask = ~page_mask;
-entry->perm = access_flags;
+entry->perm = (is_write ? access_flags : (access_flags & (~IOMMU_WO)));
 return true;
 
 error:
 vtd_iommu_unlock(s);
 entry->iova = 0;
 entry->translated_addr = 0;
-entry->addr_mask = 0;
+/*
+ * Set the mask for ATS (the range must be present even when the
+ * translation fails : PCIe rev 5 10.2.3.5)
+ */
+entry->addr_mask = (level != UINT32_MAX) ?
+   (~vtd_pt_level_page_mask(level)) : (~VTD_PAGE_MASK_4K);
 entry->perm = IOMMU_NONE;
 return false;
 }
-- 
2.48.1

[PATCH v3 15/19] memory: Add an API for ATS support

2025-02-21 Thread CLEMENT MATHIEU--DRIF

From: Clement Mathieu--Drif 

IOMMU have to implement iommu_ats_request_translation to support ATS.

Devices can use IOMMU_TLB_ENTRY_TRANSLATION_ERROR to check the tlb
entries returned by a translation request.

We decided not to use the existing translation operation for 2 reasons.
First, ATS is designed to translate ranges and not isolated addresses.
Second, we need ATS-specific parameters.

Signed-off-by: Clement Mathieu--Drif 
---
 include/exec/memory.h | 26 ++
 system/memory.c   | 21 +
 2 files changed, 47 insertions(+)

diff --git a/include/exec/memory.h b/include/exec/memory.h
index 4d240cad1c..9a8e765909 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -148,6 +148,10 @@ struct IOMMUTLBEntry {
 uint32_t pasid;
 };
 
+/* Check if an IOMMU TLB entry indicates a translation error */
+#define IOMMU_TLB_ENTRY_TRANSLATION_ERROR(entry) entry)->perm) & IOMMU_RW) 
\
+== IOMMU_NONE)
+
 /*
  * Bitmap for different IOMMUNotifier capabilities. Each notifier can
  * register with one or multiple IOMMU Notifier capability bit(s).
@@ -535,6 +539,20 @@ struct IOMMUMemoryRegionClass {
  * @iommu: the IOMMUMemoryRegion
  */
 int (*num_indexes)(IOMMUMemoryRegion *iommu);
+
+/**
+ * @iommu_ats_request_translation:
+ * This method must be implemented if the IOMMU has ATS enabled
+ *
+ * @see pci_ats_request_translation_pasid
+ */
+ssize_t (*iommu_ats_request_translation)(IOMMUMemoryRegion *iommu,
+ bool priv_req, bool exec_req,
+ hwaddr addr, size_t length,
+ bool no_write,
+ IOMMUTLBEntry *result,
+ size_t result_length,
+ uint32_t *err_count);
 };
 
 typedef struct RamDiscardListener RamDiscardListener;
@@ -1892,6 +1910,14 @@ void memory_region_iommu_replay(IOMMUMemoryRegion 
*iommu_mr, IOMMUNotifier *n);
 void memory_region_unregister_iommu_notifier(MemoryRegion *mr,
  IOMMUNotifier *n);
 
+ssize_t memory_region_iommu_ats_request_translation(IOMMUMemoryRegion 
*iommu_mr,
+bool priv_req, bool exec_req,
+hwaddr addr, size_t length,
+bool no_write,
+IOMMUTLBEntry *result,
+size_t result_length,
+uint32_t *err_count);
+
 /**
  * memory_region_iommu_get_attr: return an IOMMU attr if get_attr() is
  * defined on the IOMMU.
diff --git a/system/memory.c b/system/memory.c
index 4c829793a0..f95c602a46 100644
--- a/system/memory.c
+++ b/system/memory.c
@@ -2011,6 +2011,27 @@ void 
memory_region_unregister_iommu_notifier(MemoryRegion *mr,
 memory_region_update_iommu_notify_flags(iommu_mr, NULL);
 }
 
+ssize_t memory_region_iommu_ats_request_translation(IOMMUMemoryRegion 
*iommu_mr,
+bool priv_req,
+bool exec_req,
+hwaddr addr, size_t length,
+bool no_write,
+IOMMUTLBEntry *result,
+size_t result_length,
+uint32_t *err_count)
+{
+IOMMUMemoryRegionClass *imrc =
+memory_region_get_iommu_class_nocheck(iommu_mr);
+
+if (!imrc->iommu_ats_request_translation) {
+return -ENODEV;
+}
+
+return imrc->iommu_ats_request_translation(iommu_mr, priv_req, exec_req,
+   addr, length, no_write, result,
+   result_length, err_count);
+}
+
 void memory_region_notify_iommu_one(IOMMUNotifier *notifier,
 const IOMMUTLBEvent *event)
 {
-- 
2.48.1

[PATCH v3 06/19] pcie: Helper functions to check if PASID is enabled

2025-02-21 Thread CLEMENT MATHIEU--DRIF

From: Clement Mathieu--Drif 

pasid_enabled checks whether the capability is
present or not. If so, we read the configuration space to get
the status of the feature (enabled or not).

Signed-off-by: Clement Mathieu--Drif 
---
 hw/pci/pcie.c | 9 +
 include/hw/pci/pcie.h | 2 ++
 2 files changed, 11 insertions(+)

diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index f42a256f15..8186d64234 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -1238,3 +1238,12 @@ void pcie_pasid_init(PCIDevice *dev, uint16_t offset, 
uint8_t pasid_width,
 
 dev->exp.pasid_cap = offset;
 }
+
+bool pcie_pasid_enabled(const PCIDevice *dev)
+{
+if (!pci_is_express(dev) || !dev->exp.pasid_cap) {
+return false;
+}
+return (pci_get_word(dev->config + dev->exp.pasid_cap + PCI_PASID_CTRL) &
+PCI_PASID_CTRL_ENABLE) != 0;
+}
diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
index aa040c3e97..63604ccc6e 100644
--- a/include/hw/pci/pcie.h
+++ b/include/hw/pci/pcie.h
@@ -156,4 +156,6 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler 
*hotplug_dev,
 
 void pcie_pasid_init(PCIDevice *dev, uint16_t offset, uint8_t pasid_width,
  bool exec_perm, bool priv_mod);
+
+bool pcie_pasid_enabled(const PCIDevice *dev);
 #endif /* QEMU_PCIE_H */
-- 
2.48.1

Re: [PATCH v2 3/3] target/riscv/kvm: add missing KVM CSRs

2025-02-21 Thread Andrew Jones

On Fri, Feb 21, 2025 at 09:26:23AM -0300, Daniel Henrique Barboza wrote:
> We're missing scounteren and senvcfg CSRs, both already present in the
> KVM UAPI.
> 
> Signed-off-by: Daniel Henrique Barboza 
> ---
>  target/riscv/kvm/kvm-cpu.c | 6 ++
>  1 file changed, 6 insertions(+)
> 
> diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c
> index f14fcc58bb..017ca82226 100644
> --- a/target/riscv/kvm/kvm-cpu.c
> +++ b/target/riscv/kvm/kvm-cpu.c
> @@ -616,6 +616,8 @@ static void kvm_riscv_reset_regs_csr(CPURISCVState *env)
>  env->stval = 0;
>  env->mip = 0;
>  env->satp = 0;
> +env->scounteren = 0;
> +env->senvcfg = 0;
>  }
>  
>  static int kvm_riscv_get_regs_csr(CPUState *cs)
> @@ -631,6 +633,8 @@ static int kvm_riscv_get_regs_csr(CPUState *cs)
>  KVM_RISCV_GET_CSR(cs, env, stval, env->stval);
>  KVM_RISCV_GET_CSR(cs, env, sip, env->mip);
>  KVM_RISCV_GET_CSR(cs, env, satp, env->satp);
> +KVM_RISCV_GET_CSR(cs, env, scounteren, env->scounteren);
> +KVM_RISCV_GET_CSR(cs, env, senvcfg, env->senvcfg);
>  
>  return 0;
>  }
> @@ -648,6 +652,8 @@ static int kvm_riscv_put_regs_csr(CPUState *cs)
>  KVM_RISCV_SET_CSR(cs, env, stval, env->stval);
>  KVM_RISCV_SET_CSR(cs, env, sip, env->mip);
>  KVM_RISCV_SET_CSR(cs, env, satp, env->satp);
> +KVM_RISCV_SET_CSR(cs, env, scounteren, env->scounteren);
> +KVM_RISCV_SET_CSR(cs, env, senvcfg, env->senvcfg);
>  
>  return 0;
>  }
> -- 
> 2.48.1
>

Reviewed-by: Andrew Jones

Re: [PATCH v2 2/3] target/riscv/kvm: add kvm_riscv_reset_regs_csr()

2025-02-21 Thread Andrew Jones

On Fri, Feb 21, 2025 at 09:26:22AM -0300, Daniel Henrique Barboza wrote:
> We're setting reset vals for KVM csrs during kvm_riscv_reset_vcpu(), but
> in no particular order and missing some of them (like env->mstatus).
> 
> Create a helper to do that, unclogging reset_vcpu(), and initialize
> env->mstatus as well. Keep the regs in the same order they appear in
> struct kvm_riscv_csr from the KVM UAPI, similar to what
> kvm_riscv_(get|put)_regs_csr are doing. This will make a bit easier to
> add new KVM CSRs and to verify which values we're writing back to KVM
> during vcpu reset.
> 
> Signed-off-by: Daniel Henrique Barboza 
> ---
>  target/riscv/kvm/kvm-cpu.c | 23 +++
>  1 file changed, 15 insertions(+), 8 deletions(-)
> 
> diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c
> index 484b6afe7c..f14fcc58bb 100644
> --- a/target/riscv/kvm/kvm-cpu.c
> +++ b/target/riscv/kvm/kvm-cpu.c
> @@ -605,6 +605,19 @@ static int kvm_riscv_put_regs_core(CPUState *cs)
>  return ret;
>  }
>  
> +static void kvm_riscv_reset_regs_csr(CPURISCVState *env)
> +{
> +env->mstatus = 0;
> +env->mie = 0;
> +env->stvec = 0;
> +env->sscratch = 0;
> +env->sepc = 0;
> +env->scause = 0;
> +env->stval = 0;
> +env->mip = 0;
> +env->satp = 0;
> +}
> +
>  static int kvm_riscv_get_regs_csr(CPUState *cs)
>  {
>  CPURISCVState *env = &RISCV_CPU(cs)->env;
> @@ -1609,14 +1622,8 @@ void kvm_riscv_reset_vcpu(RISCVCPU *cpu)
>  env->pc = cpu->env.kernel_addr;
>  env->gpr[10] = kvm_arch_vcpu_id(CPU(cpu)); /* a0 */
>  env->gpr[11] = cpu->env.fdt_addr;  /* a1 */
> -env->satp = 0;
> -env->mie = 0;
> -env->stvec = 0;
> -env->sscratch = 0;
> -env->sepc = 0;
> -env->scause = 0;
> -env->stval = 0;
> -env->mip = 0;
> +
> +kvm_riscv_reset_regs_csr(env);
>  }
>  
>  void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level)
> -- 
> 2.48.1
>

Reviewed-by: Andrew Jones

1 2 3 >

1 - 100 of 234 matches

Mail list logo