date:20250226

On Mon Feb 17, 2025 at 5:17 PM AEST, Aditya Gupta wrote:
> With all support in place, enable fadump by exporting the
> "ibm,configure-kernel-dump" RTAS call in the device tree.
>
> Presence of "ibm,configure-kernel-dump" tells the kernel that the
> platform (QEMU) supports fadump.
>
> Pass "fadump=on" to enable Linux to use firmware assisted dump.
>
> Logs of a linux boot with firmware assisted dump:
>
> ./build/qemu-system-ppc64 -M pseries,x-vof=on --cpu power10 --smp 4 -m 4G 
> -kernel some-vmlinux -initrd some-initrd -append "debug fadump=on 
> crashkernel=1G" -nographic
> [0.00] random: crng init done
> [0.00] fadump: Reserved 1024MB of memory at 0x004000 
> (System RAM: 4096MB)
> ...
> [1.084686] rtas fadump: Registration is successful!
> ...
> # cat /sys/kernel/debug/powerpc/fadump_region
> CPU :[0x004000-0x00400013d3] 0x13d4 bytes, Dumped: 0x0
> HPTE:[0x00400013d4-0x00400013d3] 0x0 bytes, Dumped: 0x0
> DUMP: Src: 0x00, Dest: 0x004001, Size: 0x4000, 
> Dumped: 0x0 bytes
>
> [0x00f800-0x00]: cmdline append: ''
> # echo c > /proc/sysrq-trigger
>
> The fadump boot after crash:
>
> [0.00] rtas fadump: Firmware-assisted dump is active.
> [0.00] fadump: Updated cmdline: debug fadump=on crashkernel=1G
> [0.00] fadump: Firmware-assisted dump is active.
> [0.00] fadump: Reserving 3072MB of memory at 0x004000 for 
> preserving crash data
> 
> # file /proc/vmcore
> /proc/vmcore: ELF 64-bit LSB core file, 64-bit PowerPC or cisco 7500, 
> OpenPOWER ELF V2 ABI, version 1 (SYSV), SVR4-style
>
> Analysing the vmcore with crash-utility:
>
>   KERNEL: vmlinux-6.14-rc2
> DUMPFILE: vmcore-a64dcfb451e2-nocma
> CPUS: 4
> DATE: Thu Jan  1 05:30:00 IST 1970
>   UPTIME: 00:00:30
> LOAD AVERAGE: 0.74, 0.21, 0.07
>TASKS: 94
> NODENAME: buildroot
>  RELEASE: 6.14.0-rc2+
>  VERSION: #1 SMP Wed Feb 12 06:49:59 CST 2025
>  MACHINE: ppc64le  (1000 Mhz)
>   MEMORY: 4 GB
>PANIC: "Kernel panic - not syncing: sysrq triggered crash"
>  PID: 270
>  COMMAND: "sh"
> TASK: c9e7cc00  [THREAD_INFO: c9e7cc00]
>  CPU: 3
>STATE: TASK_RUNNING (PANIC)
>
> Signed-off-by: Aditya Gupta 

This is very cool, nice work. Does it work with KVM? I think... probably
it could?

Are you able to add a functional test case for it? This is something
that people (including me) will forget to test...

Thanks,
Nick

> ---
>  hw/ppc/spapr_rtas.c| 6 +-
>  include/hw/ppc/spapr.h | 3 ++-
>  2 files changed, 7 insertions(+), 2 deletions(-)
>
> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
> index 0aca4270aee8..bd2ed16a46e3 100644
> --- a/hw/ppc/spapr_rtas.c
> +++ b/hw/ppc/spapr_rtas.c
> @@ -692,7 +692,7 @@ static void trigger_fadump_boot(target_ulong 
> spapr_retcode)
>  }
>  
>  /* Papr Section 7.4.9 ibm,configure-kernel-dump RTAS call */
> -static __attribute((unused)) void rtas_configure_kernel_dump(PowerPCCPU *cpu,
> +static void rtas_configure_kernel_dump(PowerPCCPU *cpu,
> SpaprMachineState *spapr,
> uint32_t token, uint32_t nargs,
> target_ulong args,
> @@ -1109,6 +1109,10 @@ static void core_rtas_register_types(void)
>  spapr_rtas_register(RTAS_IBM_NMI_INTERLOCK, "ibm,nmi-interlock",
>  rtas_ibm_nmi_interlock);
>  
> +/* Register Fadump rtas call */
> +spapr_rtas_register(RTAS_CONFIGURE_KERNEL_DUMP, 
> "ibm,configure-kernel-dump",
> +rtas_configure_kernel_dump);
> +
>  qtest_set_command_cb(spapr_qtest_callback);
>  }
>  
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index fa63008e57ec..bde3bdc4b80c 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -768,8 +768,9 @@ void push_sregs_to_kvm_pr(SpaprMachineState *spapr);
>  #define RTAS_IBM_SUSPEND_ME (RTAS_TOKEN_BASE + 0x2A)
>  #define RTAS_IBM_NMI_REGISTER   (RTAS_TOKEN_BASE + 0x2B)
>  #define RTAS_IBM_NMI_INTERLOCK  (RTAS_TOKEN_BASE + 0x2C)
> +#define RTAS_CONFIGURE_KERNEL_DUMP  (RTAS_TOKEN_BASE + 0x2D)
>  
> -#define RTAS_TOKEN_MAX  (RTAS_TOKEN_BASE + 0x2D)
> +#define RTAS_TOKEN_MAX  (RTAS_TOKEN_BASE + 0x2E)
>  
>  /* Fadump commands */
>  #define FADUMP_CMD_REGISTER1

Re: [PATCH 4/6] hw/ppc: Implement saving CPU state in Fadump


On 27/02/25 08:57, Nicholas Piggin wrote:


On Mon Feb 17, 2025 at 5:17 PM AEST, Aditya Gupta wrote:

<...snip...>
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index 9b29cadab2c9..0aca4270aee8 100644
<...snip...>
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -413,9 +416,174 @@ static bool fadump_preserve_mem(void)
  }
  
  switch (data_type) {

-case FADUMP_CPU_STATE_DATA:
-/* TODO: Add CPU state data */
+case FADUMP_CPU_STATE_DATA: {

I would split these out into their own functions if they grow more than
a few lines.

Makes sense. Will add this into a new helper function.



+struct rtas_fadump_reg_save_area_header reg_save_hdr;
+struct rtas_fadump_reg_entry **reg_entries;
+struct rtas_fadump_reg_entry *curr_reg_entry;
+
+uint32_t fadump_reg_entries_size;
+__be32 num_cpus = 0;
+uint32_t num_regs_per_cpu = 0;
+CPUState *cpu;
+CPUPPCState *env;
+PowerPCCPU *ppc_cpu;
+
+CPU_FOREACH(cpu) {
+++num_cpus;
+}
+
+reg_save_hdr.version = cpu_to_be32(1);
+reg_save_hdr.magic_number =
+cpu_to_be64(fadump_str_to_u64("REGSAVE"));
+
+/* Reg save area header is immediately followed by num cpus */
+reg_save_hdr.num_cpu_offset =
+cpu_to_be32(sizeof(struct rtas_fadump_reg_save_area_header));
+
+fadump_reg_entries_size = num_cpus *
+  FADUMP_NUM_PER_CPU_REGS *
+  sizeof(struct rtas_fadump_reg_entry);
+
+reg_entries = malloc(fadump_reg_entries_size);
+curr_reg_entry = (struct rtas_fadump_reg_entry *)reg_entries;
+
+/* This must loop num_cpus time */
+CPU_FOREACH(cpu) {
+ppc_cpu = POWERPC_CPU(cpu);
+env = cpu_env(cpu);
+num_regs_per_cpu = 0;
+
+curr_reg_entry->reg_id =
+cpu_to_be64(fadump_str_to_u64("CPUSTRT"));
+curr_reg_entry->reg_value = ppc_cpu->vcpu_id;
+++curr_reg_entry;
+
+#define REG_ENTRY(id, val) \
+do {   \
+curr_reg_entry->reg_id =   \
+cpu_to_be64(fadump_str_to_u64(#id));   \
+curr_reg_entry->reg_value = val;   \
+++curr_reg_entry;  \
+++num_regs_per_cpu;\
+} while (0)
+
+REG_ENTRY(ACOP, env->spr[SPR_ACOP]);
+REG_ENTRY(AMR, env->spr[SPR_AMR]);
+REG_ENTRY(BESCR, env->spr[SPR_BESCR]);
+REG_ENTRY(CFAR, env->spr[SPR_CFAR]);
+REG_ENTRY(CIABR, env->spr[SPR_CIABR]);
+
+/* Save the condition register */
+uint64_t cr = 0;
+cr |= (env->crf[0] & 0xf);
+cr |= (env->crf[1] & 0xf) << 1;
+cr |= (env->crf[2] & 0xf) << 2;
+cr |= (env->crf[3] & 0xf) << 3;
+cr |= (env->crf[4] & 0xf) << 4;
+cr |= (env->crf[5] & 0xf) << 5;
+cr |= (env->crf[6] & 0xf) << 6;
+cr |= (env->crf[7] & 0xf) << 7;

Shift values wrong here I think... Use ppc_get_cr()

Okay, I had some issues getting this CR. Will use 'ppc_get_cr', thanks !



+REG_ENTRY(CR, cr);
+
+REG_ENTRY(CTR, env->spr[SPR_CTR]);
+REG_ENTRY(CTRL, env->spr[SPR_CTRL]);
+REG_ENTRY(DABR, env->spr[SPR_DABR]);
+REG_ENTRY(DABRX, env->spr[SPR_DABRX]);
+REG_ENTRY(DAR, env->spr[SPR_DAR]);
+REG_ENTRY(DAWR0, env->spr[SPR_DAWR0]);
+REG_ENTRY(DAWR1, env->spr[SPR_DAWR1]);
+REG_ENTRY(DAWRX0, env->spr[SPR_DAWRX0]);
+REG_ENTRY(DAWRX1, env->spr[SPR_DAWRX1]);
+REG_ENTRY(DPDES, env->spr[SPR_DPDES]);
+REG_ENTRY(DSCR, env->spr[SPR_DSCR]);
+REG_ENTRY(DSISR, env->spr[SPR_DSISR]);
+REG_ENTRY(EBBHR, env->spr[SPR_EBBHR]);
+REG_ENTRY(EBBRR, env->spr[SPR_EBBRR]);
+
+REG_ENTRY(FPSCR, env->fpscr);
+REG_ENTRY(FSCR, env->spr[SPR_FSCR]);
+
+/* Save the GPRs */
+for (int gpr_id = 0; gpr_id < 32; ++gpr_id) {
+curr_reg_entry->reg_id =
+cpu_to_be64(fadump_gpr_id_to_u64(gpr_id));
+curr_reg_entry->reg_value = env->gpr[i];
+++curr_reg_entry;
+++num_regs_per_cpu;
+}
+
+REG_ENTRY(IAMR, env->spr[SPR_IAMR]);
+REG_ENTRY(IC, env->spr[SPR_IC]);
+

Re: [PATCH 03/10] qapi: delete un-needed python static analysis configs

2025-02-26 Thread Markus Armbruster

John Snow  writes:

> On Wed, Feb 26, 2025 at 2:28 AM Markus Armbruster  wrote:
>
>> John Snow  writes:
>>
>> > The pylint config is being left in place because the settings differ
>> > enough from the python/ directory settings that we need a chit-chat on
>> > how to merge them O:-)
>> >
>> > Everything else can go.
>> >
>> > Signed-off-by: John Snow 

[...]

>> > diff --git a/scripts/qapi/mypy.ini b/scripts/qapi/mypy.ini
>> > deleted file mode 100644
>> > index 8109470a031..000
>> > --- a/scripts/qapi/mypy.ini
>> > +++ /dev/null
>> > @@ -1,4 +0,0 @@
>> > -[mypy]
>> > -strict = True
>> > -disallow_untyped_calls = False
>> > -python_version = 3.8
>>
>> python/setup.cfg has:
>>
>>[mypy]
>>strict = True
>>python_version = 3.8
>>warn_unused_configs = True
>>namespace_packages = True
>>warn_unused_ignores = False
>>
>> Can you briefly explain the differences?
>>
>
> warn_unused_configs: Catches config values that aren't actually recognized
> or used. Was helpful once upon a time when re-arranging the Python
> directory to behave like a package to ensure that the conf files were
> working correctly.

Could this be culled now?

Hmm, according to mypy(1), strict implies warn-unused-configs.

The question does not block this patch.

> namespace_packages: Needed for the python/ directory structure (nested
> packages under a namespace, "qemu"). Doesn't impact scripts/qapi at all.
> Read up on PEP420 if you are curious. Details in commit message, see below
> if you're still curious.

mypy(1) makes me suspect this is the default.  If that's true across the
versions we care for, this could be culled.

Also does not block this patch.

> warn_unused_ignores: Needed once upon a time for cross-version mypy support
> where some versions would warn in some cases and others would not. Adding
> an ignore would effectively just invert which versions complained. Probably
> still needed, but it's hard to measure.

Harmless enough.

> python_version: Changes mypy behavior regardless of the invoking python
> interpreter to check the file as if it were to be executed by Python 3.8. I
> actually want to remove this value from setup.cfg but haven't yet. I
> removed it from the python-qemu-qmp repo and never added it for qapi.
> Removing it is actually probably correct as it will catch errors specific
> to various python versions we support, but there are some nits to iron out
> in my neck of the woods. This is a case where scripts/qapi/ is stricter
> than python/ :)
> (Not reasonable to solve for this series.)

Also present in the deleted file, so no change.

> lack of disallow_untyped_calls = False: I think this might be a remnant
> from when we gradually typed qapi; it's evidently no longer needed since
> qapi still checks fine without this affordance. The default under strict is
> True.

Fair enough.

> e941c844e444 (John Snow   2021-05-27 17:17:05 -0400  79)
> [mypy]
> e941c844e444 (John Snow   2021-05-27 17:17:05 -0400  80)
> strict = True
> ca056f4499c2 (Paolo Bonzini   2023-05-03 12:48:02 +0200  81)
> python_version = 3.8
> e941c844e444 (John Snow   2021-05-27 17:17:05 -0400  82)
> warn_unused_configs = True
> 0542a4c95767 (John Snow   2021-05-27 17:17:06 -0400  83)
> namespace_packages = True
> e7874a50ff3f (John Snow   2022-05-25 20:09:13 -0400  84)
> warn_unused_ignores = False
>
>
>>
>> python/setup.cfg additionally has a bunch of ignore_missing_imports that
>> don't apply here, as far as I can tell.
>>
>
> Right, that's all stuff for fuse and the interactive qmp shell that use
> untyped dependencies.

Good.

Let's mention the differences in the commit message.  Here's my try:

Since the previous commit, python/setup.cfg applies to scripts/qapi/
as well.  Configuration files in scripts/qapi/ override
python/setup.cfg.

scripts/qapi/.flake8 and scripts/qapi/.isort.cfg actually match
python/setup.cfg exactly, and can go.

The differences between scripts/qapi/mypy.ini and python/setup.cfg
are harmless: [list the differences, explain why they're harmless as
long as you can keep it brief, and if not, fall back to "trust me"].
So scripts/qapi/mypy.ini can go, too.

The pylint config is being left in place because the settings differ
enough from the python/ directory settings that we need a chit-chat on
how to merge them O:-)

With something like that
Reviewed-by: Markus Armbruster

Re: [PATCH 5/6] hw/ppc: Pass device tree properties for Fadump




On 27/02/25 08:58, Nicholas Piggin wrote:

On Mon Feb 17, 2025 at 5:17 PM AEST, Aditya Gupta wrote:

<...snip...>
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index f3a4b4235d43..3602e5b5d18d 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -897,9 +897,27 @@ static int spapr_dt_rng(void *fdt)
  static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt)
  {

You might be able to add a spapr_dt_rtas_fadump() function
and do it there to help keep functions small?


Sure.


Thanks,

- Aditya G


Thanks,
Nick

Re: [PATCH] trace/simple: Fix hang when using simpletrace with fork()

2025-02-26 Thread Stefan Hajnoczi

On Wed, Feb 26, 2025 at 09:53:53AM +, Daniel P. Berrangé wrote:
> On Wed, Feb 26, 2025 at 10:38:56AM +0100, Thomas Huth wrote:
> > On 26/02/2025 10.15, Daniel P. Berrangé wrote:
> > > On Wed, Feb 26, 2025 at 09:50:15AM +0100, Thomas Huth wrote:
> > > > When compiling QEMU with --enable-trace-backends=simple , the
> > > > iotest 233 is currently hanging. This happens because qemu-nbd
> > > > calls trace_init_backends() first - which causes simpletrace to
> > > > install its writer thread and the atexit() handler - before
> > > > calling fork(). But the simpletrace writer thread is then only
> > > > available in the parent process, not in the child process anymore.
> > > > Thus when the child process exits, its atexit handler waits forever
> > > > on the trace_empty_cond condition to be set by the non-existing
> > > > writer thread, so the process never finishes.
> > > > 
> > > > Fix it by installing a pthread_atfork() handler, too, which
> > > > makes sure that the trace_writeout_enabled variable gets set
> > > > to false again in the child process, so we can use it in the
> > > > atexit() handler to check whether we still need to wait on the
> > > > writer thread or not.
> > > > 
> > > > Signed-off-by: Thomas Huth 
> > > > ---
> > > >   trace/simple.c | 17 -
> > > >   1 file changed, 16 insertions(+), 1 deletion(-)
> > > > 
> > > > diff --git a/trace/simple.c b/trace/simple.c
> > > > index c0aba00cb7f..269bbda69f1 100644
> > > > --- a/trace/simple.c
> > > > +++ b/trace/simple.c
> > > > @@ -380,8 +380,22 @@ void st_print_trace_file_status(void)
> > > >   void st_flush_trace_buffer(void)
> > > >   {
> > > > -flush_trace_file(true);
> > > > +flush_trace_file(trace_writeout_enabled);
> > > > +}
> > > > +
> > > > +#ifndef _WIN32
> > > > +static void trace_thread_atfork(void)
> > > > +{
> > > > +/*
> > > > + * If we fork, the writer thread does not exist in the child, so
> > > > + * make sure to allow st_flush_trace_buffer() to clean up 
> > > > correctly.
> > > > + */
> > > > +g_mutex_lock(&trace_lock);
> > > > +trace_writeout_enabled = false;
> > > > +g_cond_signal(&trace_empty_cond);
> > > > +g_mutex_unlock(&trace_lock);
> > > >   }
> > > > +#endif
> > > 
> > > This doesn't seem right to me. This is being run in the child and while
> > > it may avoid the hang when the child exits, surely it still leaves tracing
> > > non-functional in the child as we're lacking the thread to write out the
> > > trace data.
> > 
> > Well, you cannot write to the same file from the parent and child at the
> > same time, so one of both needs to be shut up AFAIU. And the simpletrace
> > code cannot now which one of the two processes should be allowed to continue
> > with the logging, so we either have to disable tracing in one of the two
> > processes, or think of something completely different, e.g. using
> > pthread_atfork(abort, NULL, NULL) to make people aware that they are not
> > allowed to start tracing before calling fork()...? But in that case we still
> > need a qemu-nbd expert to fix qemu-nbd, so that it does not initialize the
> > trace backend before calling fork().
> 
> As precedent, in system/vl.c we delay trace_init() until after daemonizing
> which is the simple way to avoid the worst of the danger.

That sounds good to me.

Adding Daniel Henrique Barboza because he fixed a similar issue in
commit 10b6ee1616f9 ("vl.c: do not execute trace_init_backends() before
daemonizing").

Stefan


signature.asc
Description: PGP signature

Re: [PATCH v2 2/3] tests/functional: Introduce the dso_suffix() helper


On 2025/02/20 17:02, Philippe Mathieu-Daudé wrote:

Introduce a helper to get the default shared library
suffix used on the host.

Suggested-by: Pierrick Bouvier 
Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Pierrick Bouvier 
---
  tests/functional/qemu_test/__init__.py | 2 +-
  tests/functional/qemu_test/cmd.py  | 1 -
  tests/functional/qemu_test/config.py   | 6 ++
  3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/tests/functional/qemu_test/__init__.py 
b/tests/functional/qemu_test/__init__.py
index 5c972843a6d..45f7befa374 100644
--- a/tests/functional/qemu_test/__init__.py
+++ b/tests/functional/qemu_test/__init__.py
@@ -7,7 +7,7 @@
  
  
  from .asset import Asset

-from .config import BUILD_DIR
+from .config import BUILD_DIR, dso_suffix
  from .cmd import is_readable_executable_file, \
  interrupt_interactive_console_until_pattern, wait_for_console_pattern, \
  exec_command, exec_command_and_wait_for_pattern, get_qemu_img, which
diff --git a/tests/functional/qemu_test/cmd.py 
b/tests/functional/qemu_test/cmd.py
index dc5f422b77d..254e23ef748 100644
--- a/tests/functional/qemu_test/cmd.py
+++ b/tests/functional/qemu_test/cmd.py
@@ -15,7 +15,6 @@
  import os
  import os.path
  
-

  def which(tool):
  """ looks up the full path for @tool, returns None if not found
  or if @tool does not have executable permissions.
diff --git a/tests/functional/qemu_test/config.py 
b/tests/functional/qemu_test/config.py
index edd75b7fd06..0eab1baa541 100644
--- a/tests/functional/qemu_test/config.py
+++ b/tests/functional/qemu_test/config.py
@@ -13,6 +13,7 @@
  
  import os

  from pathlib import Path
+import platform
  
  
  def _source_dir():

@@ -34,3 +35,8 @@ def _build_dir():
  raise Exception("Cannot identify build dir, set QEMU_BUILD_ROOT")
  
  BUILD_DIR = _build_dir()

+
+def dso_suffix():
+'''Return the dynamic libraries suffix for the current platform'''
+DSO_SUFFIXES = { 'Linux': 'so', 'Darwin': 'dylib', 'Windows': 'dll' }> +   
 return DSO_SUFFIXES[platform.system()]


It may be too late to comment, but:
I'm a bit worried that this can break tests on platforms (BSDs?) not 
listed here though I don't know if plugin tests work on BSDs in the 
first place.

Re: [PATCH 6/6] hw/ppc: Enable Fadump for PSeries


On 27/02/25 09:03, Nicholas Piggin wrote:


On Mon Feb 17, 2025 at 5:17 PM AEST, Aditya Gupta wrote:

With all support in place, enable fadump by exporting the
"ibm,configure-kernel-dump" RTAS call in the device tree.

Presence of "ibm,configure-kernel-dump" tells the kernel that the
platform (QEMU) supports fadump.

Pass "fadump=on" to enable Linux to use firmware assisted dump.

Logs of a linux boot with firmware assisted dump:

 ./build/qemu-system-ppc64 -M pseries,x-vof=on --cpu power10 --smp 4 -m 4G -kernel 
some-vmlinux -initrd some-initrd -append "debug fadump=on crashkernel=1G" 
-nographic
 [0.00] random: crng init done
 [0.00] fadump: Reserved 1024MB of memory at 0x004000 
(System RAM: 4096MB)
 ...
 [1.084686] rtas fadump: Registration is successful!
 ...
 # cat /sys/kernel/debug/powerpc/fadump_region
 CPU :[0x004000-0x00400013d3] 0x13d4 bytes, Dumped: 0x0
 HPTE:[0x00400013d4-0x00400013d3] 0x0 bytes, Dumped: 0x0
 DUMP: Src: 0x00, Dest: 0x004001, Size: 0x4000, 
Dumped: 0x0 bytes

 [0x00f800-0x00]: cmdline append: ''
 # echo c > /proc/sysrq-trigger

The fadump boot after crash:

 [0.00] rtas fadump: Firmware-assisted dump is active.
 [0.00] fadump: Updated cmdline: debug fadump=on crashkernel=1G
 [0.00] fadump: Firmware-assisted dump is active.
 [0.00] fadump: Reserving 3072MB of memory at 0x004000 for 
preserving crash data
 
 # file /proc/vmcore
 /proc/vmcore: ELF 64-bit LSB core file, 64-bit PowerPC or cisco 7500, 
OpenPOWER ELF V2 ABI, version 1 (SYSV), SVR4-style

Analysing the vmcore with crash-utility:

   KERNEL: vmlinux-6.14-rc2
 DUMPFILE: vmcore-a64dcfb451e2-nocma
 CPUS: 4
 DATE: Thu Jan  1 05:30:00 IST 1970
   UPTIME: 00:00:30
 LOAD AVERAGE: 0.74, 0.21, 0.07
TASKS: 94
 NODENAME: buildroot
  RELEASE: 6.14.0-rc2+
  VERSION: #1 SMP Wed Feb 12 06:49:59 CST 2025
  MACHINE: ppc64le  (1000 Mhz)
   MEMORY: 4 GB
PANIC: "Kernel panic - not syncing: sysrq triggered crash"
  PID: 270
  COMMAND: "sh"
 TASK: c9e7cc00  [THREAD_INFO: c9e7cc00]
  CPU: 3
STATE: TASK_RUNNING (PANIC)

Signed-off-by: Aditya Gupta 

This is very cool, nice work. Does it work with KVM? I think... probably
it could?


Yes it does, atleast for crashing CPU :)

But there are problems with reading the CPU regs, regs don't seem 
correct for non-crashing CPUs.


Crash is able to work perfectly for the crashing CPU as of now (as the 
registers are stored by the kernel in that case).




Are you able to add a functional test case for it? This is something
that people (including me) will forget to test...


Sure, I will add a test case.


Thanks for your reviews Nick.

It might take few weeks for me to post another version, will see into 
the tests in qemu and arrange the code bit more nicely.



- Aditya G



Thanks,
Nick


---
  hw/ppc/spapr_rtas.c| 6 +-
  include/hw/ppc/spapr.h | 3 ++-
  2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index 0aca4270aee8..bd2ed16a46e3 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -692,7 +692,7 @@ static void trigger_fadump_boot(target_ulong spapr_retcode)
  }
  
  /* Papr Section 7.4.9 ibm,configure-kernel-dump RTAS call */

-static __attribute((unused)) void rtas_configure_kernel_dump(PowerPCCPU *cpu,
+static void rtas_configure_kernel_dump(PowerPCCPU *cpu,
 SpaprMachineState *spapr,
 uint32_t token, uint32_t nargs,
 target_ulong args,
@@ -1109,6 +1109,10 @@ static void core_rtas_register_types(void)
  spapr_rtas_register(RTAS_IBM_NMI_INTERLOCK, "ibm,nmi-interlock",
  rtas_ibm_nmi_interlock);
  
+/* Register Fadump rtas call */

+spapr_rtas_register(RTAS_CONFIGURE_KERNEL_DUMP, 
"ibm,configure-kernel-dump",
+rtas_configure_kernel_dump);
+
  qtest_set_command_cb(spapr_qtest_callback);
  }
  
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h

index fa63008e57ec..bde3bdc4b80c 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -768,8 +768,9 @@ void push_sregs_to_kvm_pr(SpaprMachineState *spapr);
  #define RTAS_IBM_SUSPEND_ME (RTAS_TOKEN_BASE + 0x2A)
  #define RTAS_IBM_NMI_REGISTER   (RTAS_TOKEN_BASE + 0x2B)
  #define RTAS_IBM_NMI_INTERLOCK  (RTAS_TOKEN_BASE + 0x2C)
+#define RTAS_CONFIGURE_KERNEL_DUMP  (RTAS_TOKEN_BASE + 0x2D)
  
-#define RTAS_TOKEN_MAX  (RTAS_TOKEN_BASE + 0x2D)

+#define RTAS_TOKEN_MAX  (RTAS_TOKEN_BASE + 0x2E

Re: [PATCH v5 4/4] virtio: Convert feature properties to OnOffAuto


On 2025/02/21 0:46, Michael S. Tsirkin wrote:

On Sat, Feb 08, 2025 at 04:51:10PM +0900, Akihiko Odaki wrote:

Some features are not always available with vhost. Legacy features are
not available with vp_vdpa in particular. virtio devices used to disable
them when not available even if the corresponding properties were
explicitly set to "on".

QEMU already has OnOffAuto type, which includes the "auto" value to let
it automatically decide the effective value. Convert feature properties
to OnOffAuto and set them "auto" by default to utilize it. This allows
QEMU to report an error if they are set "on" and the corresponding
features are not available.

Signed-off-by: Akihiko Odaki 



---
  include/hw/virtio/virtio.h | 38 +-
  hw/core/machine.c  |  1 +
  hw/virtio/virtio-bus.c | 14 --
  hw/virtio/virtio.c |  4 +++-
  4 files changed, 37 insertions(+), 20 deletions(-)

diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 
638691028050d2599592d8c7e95c75ac3913fbdd..b854c2cb1d04da0a35165289c28f87e8cb869df6
 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -113,7 +113,8 @@ struct VirtIODevice
  uint16_t queue_sel;
  /**
   * These fields represent a set of VirtIO features at various
- * levels of the stack. @host_features indicates the complete
+ * levels of the stack. @requested_features indicates the feature
+ * set the user requested. @host_features indicates the complete
   * feature set the VirtIO device can offer to the driver.
   * @guest_features indicates which features the VirtIO driver has
   * selected by writing to the feature register. Finally
@@ -121,6 +122,7 @@ struct VirtIODevice
   * backend (e.g. vhost) and could potentially be a subset of the
   * total feature set offered by QEMU.
   */
+OnOffAutoBit64 requested_features;
  uint64_t host_features;
  uint64_t guest_features;
  uint64_t backend_features;
@@ -149,6 +151,7 @@ struct VirtIODevice
  bool started;
  bool start_on_kick; /* when virtio 1.0 feature has not been negotiated */
  bool disable_legacy_check;
+bool force_features_auto;
  bool vhost_started;
  VMChangeStateEntry *vmstate;
  char *bus_name;
@@ -376,22 +379,23 @@ typedef struct VirtIOSCSIConf VirtIOSCSIConf;
  typedef struct VirtIORNGConf VirtIORNGConf;
  
  #define DEFINE_VIRTIO_COMMON_FEATURES(_state, _field) \

-DEFINE_PROP_BIT64("indirect_desc", _state, _field,\
-  VIRTIO_RING_F_INDIRECT_DESC, true), \
-DEFINE_PROP_BIT64("event_idx", _state, _field,\
-  VIRTIO_RING_F_EVENT_IDX, true), \
-DEFINE_PROP_BIT64("notify_on_empty", _state, _field,  \
-  VIRTIO_F_NOTIFY_ON_EMPTY, true), \
-DEFINE_PROP_BIT64("any_layout", _state, _field, \
-  VIRTIO_F_ANY_LAYOUT, true), \
-DEFINE_PROP_BIT64("iommu_platform", _state, _field, \
-  VIRTIO_F_IOMMU_PLATFORM, false), \
-DEFINE_PROP_BIT64("packed", _state, _field, \
-  VIRTIO_F_RING_PACKED, false), \
-DEFINE_PROP_BIT64("queue_reset", _state, _field, \
-  VIRTIO_F_RING_RESET, true), \
-DEFINE_PROP_BIT64("in_order", _state, _field, \
-  VIRTIO_F_IN_ORDER, false)
+DEFINE_PROP_ON_OFF_AUTO_BIT64("indirect_desc", _state, _field, \
+  VIRTIO_RING_F_INDIRECT_DESC, \
+  ON_OFF_AUTO_AUTO), \
+DEFINE_PROP_ON_OFF_AUTO_BIT64("event_idx", _state, _field, \
+  VIRTIO_RING_F_EVENT_IDX, ON_OFF_AUTO_AUTO), \
+DEFINE_PROP_ON_OFF_AUTO_BIT64("notify_on_empty", _state, _field, \
+  VIRTIO_F_NOTIFY_ON_EMPTY, ON_OFF_AUTO_AUTO), 
\
+DEFINE_PROP_ON_OFF_AUTO_BIT64("any_layout", _state, _field, \
+  VIRTIO_F_ANY_LAYOUT, ON_OFF_AUTO_AUTO), \
+DEFINE_PROP_ON_OFF_AUTO_BIT64("iommu_platform", _state, _field, \
+  VIRTIO_F_IOMMU_PLATFORM, ON_OFF_AUTO_OFF), \
+DEFINE_PROP_ON_OFF_AUTO_BIT64("packed", _state, _field, \
+  VIRTIO_F_RING_PACKED, ON_OFF_AUTO_OFF), \
+DEFINE_PROP_ON_OFF_AUTO_BIT64("queue_reset", _state, _field, \
+  VIRTIO_F_RING_RESET, ON_OFF_AUTO_AUTO), \
+DEFINE_PROP_ON_OFF_AUTO_BIT64("in_order", _state, _field, \
+  VIRTIO_F_IN_ORDER, ON_OFF_AUTO_OFF)
  
  hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n);

  bool virtio_queue_enabled_legacy(VirtIODevice *vdev, int n);
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 
c23b39949649054ac59d2a9b497f34e1b7bd8d6c..0de04baa61735ff02f797f778c626ef690625ce3
 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -38,6 +38,7 @@
  
  GlobalProperty hw_compat_9_2[] = {

Re: [PATCH v5 34/36] vfio/migration: Max in-flight VFIO device state buffer count limit


On 2/19/25 21:34, Maciej S. Szmigiero wrote:

From: "Maciej S. Szmigiero" 

Allow capping the maximum count of in-flight VFIO device state buffers
queued at the destination, otherwise a malicious QEMU source could
theoretically cause the target QEMU to allocate unlimited amounts of memory
for buffers-in-flight.

Since this is not expected to be a realistic threat in most of VFIO live
migration use cases and the right value depends on the particular setup
disable the limit by default by setting it to UINT64_MAX.

Signed-off-by: Maciej S. Szmigiero 
---
  hw/vfio/migration-multifd.c   | 14 ++
  hw/vfio/pci.c |  2 ++
  include/hw/vfio/vfio-common.h |  1 +
  3 files changed, 17 insertions(+)

diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c
index 18a5ff964a37..04aa3f4a6596 100644
--- a/hw/vfio/migration-multifd.c
+++ b/hw/vfio/migration-multifd.c
@@ -53,6 +53,7 @@ typedef struct VFIOMultifd {
  QemuMutex load_bufs_mutex; /* Lock order: this lock -> BQL */
  uint32_t load_buf_idx;
  uint32_t load_buf_idx_last;
+uint32_t load_buf_queued_pending_buffers;
  } VFIOMultifd;
  
  static void vfio_state_buffer_clear(gpointer data)

@@ -121,6 +122,15 @@ static bool vfio_load_state_buffer_insert(VFIODevice 
*vbasedev,
  
  assert(packet->idx >= multifd->load_buf_idx);
  
+multifd->load_buf_queued_pending_buffers++;

+if (multifd->load_buf_queued_pending_buffers >
+vbasedev->migration_max_queued_buffers) {
+error_setg(errp,
+   "queuing state buffer %" PRIu32 " would exceed the max of 
%" PRIu64,
+   packet->idx, vbasedev->migration_max_queued_buffers);
+return false;
+}
+
  lb->data = g_memdup2(&packet->data, packet_total_size - sizeof(*packet));
  lb->len = packet_total_size - sizeof(*packet);
  lb->is_present = true;
@@ -374,6 +384,9 @@ static bool vfio_load_bufs_thread(void *opaque, bool 
*should_quit, Error **errp)
  goto ret_signal;
  }
  
+assert(multifd->load_buf_queued_pending_buffers > 0);

+multifd->load_buf_queued_pending_buffers--;
+
  if (multifd->load_buf_idx == multifd->load_buf_idx_last - 1) {
  trace_vfio_load_state_device_buffer_end(vbasedev->name);
  }
@@ -408,6 +421,7 @@ VFIOMultifd *vfio_multifd_new(void)
  
  multifd->load_buf_idx = 0;

  multifd->load_buf_idx_last = UINT32_MAX;
+multifd->load_buf_queued_pending_buffers = 0;
  qemu_cond_init(&multifd->load_bufs_buffer_ready_cond);
  
  multifd->load_bufs_thread_running = false;

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 9111805ae06c..247418f0fce2 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3383,6 +3383,8 @@ static const Property vfio_pci_dev_properties[] = {
  vbasedev.migration_multifd_transfer,
  qdev_prop_on_off_auto_mutable, OnOffAuto,
  .set_default = true, .defval.i = ON_OFF_AUTO_AUTO),
+DEFINE_PROP_UINT64("x-migration-max-queued-buffers", VFIOPCIDevice,
+   vbasedev.migration_max_queued_buffers, UINT64_MAX),


UINT64_MAX doesn't make sense to me. What would be a reasonable value ?

Have you monitored the max ? Should we collect some statistics on this
value and raise a warning if a high water mark is reached ? I think
this would more useful.


  DEFINE_PROP_BOOL("migration-events", VFIOPCIDevice,
   vbasedev.migration_events, false),
  DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false),



Please add property documentation in vfio_pci_dev_class_init()


Thanks,

C.



diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 3006931accf6..30a5bb9af61b 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -155,6 +155,7 @@ typedef struct VFIODevice {
  bool ram_block_discard_allowed;
  OnOffAuto enable_migration;
  OnOffAuto migration_multifd_transfer;
+uint64_t migration_max_queued_buffers;
  bool migration_events;
  VFIODeviceOps *ops;
  unsigned int num_irqs;

Re: [PATCH 1/6] hw/ppc: Implement skeleton code for fadump in PSeries


Hi Nick,

On 27/02/25 08:37, Nicholas Piggin wrote:

On Mon Feb 17, 2025 at 5:17 PM AEST, Aditya Gupta wrote:

Implement the handler for "ibm,configure-kernel-dump" rtas call in QEMU.

Currently the handler just does basic checks and handles
register/unregister/invalidate requests from kernel.

Fadump will be enabled in a later patch.

Signed-off-by: Aditya Gupta 
---
  hw/ppc/spapr_rtas.c| 99 ++
  include/hw/ppc/spapr.h | 59 +
  2 files changed, 158 insertions(+)

diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index df2e837632aa..eebdf13b1552 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -341,6 +341,105 @@ static void rtas_ibm_set_system_parameter(PowerPCCPU *cpu,
  rtas_st(rets, 0, ret);
  }
  
+struct fadump_metadata fadump_metadata;

Can this (and other globals added in other patches) come under
SpaprMachineState?

And could most of the fadump code and structures go under new
spapr_fadump.[ch] files?

Yes, i can move it inside SpaprMachineState. Will put the code in new files.

+
+/* Papr Section 7.4.9 ibm,configure-kernel-dump RTAS call */
+static __attribute((unused)) void rtas_configure_kernel_dump(PowerPCCPU *cpu,
+   SpaprMachineState *spapr,
+   uint32_t token, uint32_t nargs,
+   target_ulong args,
+   uint32_t nret, target_ulong rets)

I don't know about adding a new unused function like this, is there
a way to juggle patches around to add it when it's wired up?


Ah, that is problematic agreed. I tried to move around things, but 
arrived at this.


I will spend some time thinking how to arrange this.

Will need some guidance. How should I approach arranging the code in 
such situations ?


My idea was to
* First one is the skeleton: mentions the steps, but doesn't implement 
the steps

* Middle patches implement the steps one by one
* Last patch enables it all. So in future if someone checks out the 
"Enable fadump" commit they would have all the support ready.


The major problem is "everything" remains unused till this last patch. 
But this 1st patch gave me the chance to logically build upon this, eg. 
first implement preserving memory regions, then add the fadump_trigger 
in os-term rtas call, etc.


Any advice to approach this ?


+{
+struct rtas_fadump_section_header header;
+target_ulong cmd = rtas_ld(args, 0);
+target_ulong fdm_addr = rtas_ld(args, 1);
+target_ulong fdm_size = rtas_ld(args, 2);
+
+/* Number outputs has to be 1 */
+if (nret != 1) {
+qemu_log_mask(LOG_GUEST_ERROR,
+"FADUMP: ibm,configure-kernel-dump RTAS called with nret != 
1.\n");
+return;
+}
+
+/* Number inputs has to be 3 */
+if (nargs != 3) {
+rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+return;
+}
+
+switch (cmd) {
+case FADUMP_CMD_REGISTER:
+if (fadump_metadata.fadump_registered) {
+/* Fadump already registered */
+rtas_st(rets, 0, RTAS_OUT_DUMP_ALREADY_REGISTERED);
+return;
+}
+
+if (fadump_metadata.fadump_dump_active == 1) {
+rtas_st(rets, 0, RTAS_OUT_DUMP_ACTIVE);
+return;
+}
+
+if (fdm_size < sizeof(struct rtas_fadump_section_header)) {
+qemu_log_mask(LOG_GUEST_ERROR,
+"FADUMP: Header size is invalid: %lu\n", fdm_size);
+rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+return;
+}
+
+/* XXX: Can we ensure fdm_addr points to a valid RMR-memory buffer ? */

RMR memory? There is spapr_rma_size() if that's what you need?



Thanks, will use `spapr_rma_size`. The PAPR says fdm_addr should point 
to a valid RMR buffer, I guess that means it should be in the RMA, ie. 
`< spapr_rma_size()` ?



- Aditya G



Thanks,
Nick

Re: [PATCH 2/6] hw/ppc: Trigger Fadump boot if fadump is registered


On 27/02/25 08:44, Nicholas Piggin wrote:

On Mon Feb 17, 2025 at 5:17 PM AEST, Aditya Gupta wrote:

According to PAPR:

 R1–7.3.30–3. When the platform receives an ibm,os-term RTAS call, or
 on a system reset without an ibm,nmi-interlock RTAS call, if the
 platform has a dump structure registered through the
 ibm,configure-kernel-dump call, the platform must process each
 registered kernel dump section as required and, when available,
 present the dump structure information to the operating system
 through the “ibm,kernel-dump” property, updated with status for each
 dump section, until the dump has been invalidated through the
 ibm,configure-kernel-dump RTAS call.

If Fadump has been registered, trigger an Fadump boot (memory preserving
boot), if QEMU recieves a 'ibm,os-term' rtas call.

Implementing the fadump boot as:
 * pause all vcpus (will save registers later)
 * preserve memory regions specified by fadump
 * do a memory preserving reboot (GUEST_RESET in QEMU doesn't clear
   the memory)

Memory regions registered by fadump will be handled in a later patch.

Signed-off-by: Aditya Gupta 
---
  hw/ppc/spapr_rtas.c | 42 ++
  1 file changed, 42 insertions(+)

diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index eebdf13b1552..01c82375f03d 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -342,6 +342,43 @@ static void rtas_ibm_set_system_parameter(PowerPCCPU *cpu,
  }
  
  struct fadump_metadata fadump_metadata;

+bool is_next_boot_fadump;

Here's another one for spapr state.

Sure, will add.

+
+static void trigger_fadump_boot(target_ulong spapr_retcode)
+{
+/*
+ * In PowerNV, SBE stops all clocks for cores, do similar to it
+ * QEMU's nearest equivalent is 'pause_all_vcpus'
+ * See 'stopClocksS0' in SBE source code for more info on SBE part
+ */

Can probably remove this comment here.

Sure.

+pause_all_vcpus();
+
+if (true /* TODO: Preserve memory registered for fadump */) {

If you're adding half the code to preserve memory but never actually
calling it anyway, you don't need the pause_all_vcpus() call either.

Again I would rather not adding unused code to the patches if possible.
If you're really not able to find a nice way to split and add
incrementally then okay, but try to take another look if possible.


Yes all this is unused. Will take another look to see how I can arrange it.

+/* Failed to preserve the registered memory regions */
+rtas_st(spapr_retcode, 0, RTAS_OUT_HW_ERROR);
+
+/* Cause a reboot */
+qemu_system_guest_panicked(NULL);
+return;
+}
+
+/* Mark next boot as fadump boot */
+is_next_boot_fadump = true;
+
+/* Reset fadump_registered for next boot */
+fadump_metadata.fadump_registered = false;
+fadump_metadata.fadump_dump_active = true;
+
+/* Then do a guest reset */
+/*
+ * Requirement:
+ * This guest reset should not clear the memory (which is
+ * the case when this is merged)
+ */
+qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);

Seems reasonable. What is the actual mechanism that clears the machine
RAM anyway? I'm not able to find it...


I didn't find any too. There is a devices reset which happens at this 
guest_reset, during which some devices do clear their memory registers, 
eg. 'pnv_psi_reset', since it clears it's regs it's like it's cleared 
it's memory region.


There were few like that which cleared the data they pass in their 
memory regions, but nothing clearing the whole RAM/whole memory regions 
as such.



- Aditya G



Thanks,
Nick


+
+rtas_st(spapr_retcode, 0, RTAS_OUT_SUCCESS);
+}
  
  /* Papr Section 7.4.9 ibm,configure-kernel-dump RTAS call */

  static __attribute((unused)) void rtas_configure_kernel_dump(PowerPCCPU *cpu,
@@ -449,6 +486,11 @@ static void rtas_ibm_os_term(PowerPCCPU *cpu,
  target_ulong msgaddr = rtas_ld(args, 0);
  char msg[512];
  
+if (fadump_metadata.fadump_registered) {

+/* If fadump boot works, control won't come back here */
+return trigger_fadump_boot(rets);
+}
+
  cpu_physical_memory_read(msgaddr, msg, sizeof(msg) - 1);
  msg[sizeof(msg) - 1] = 0;

Re: [PATCH v5 36/36] vfio/migration: Update VFIO migration documentation


On 2/19/25 21:34, Maciej S. Szmigiero wrote:

From: "Maciej S. Szmigiero" 

Update the VFIO documentation at docs/devel/migration describing the
changes brought by the multifd device state transfer.

Signed-off-by: Maciej S. Szmigiero 
---
  docs/devel/migration/vfio.rst | 80 +++
  1 file changed, 71 insertions(+), 9 deletions(-)

diff --git a/docs/devel/migration/vfio.rst b/docs/devel/migration/vfio.rst
index c49482eab66d..d9b169d29921 100644
--- a/docs/devel/migration/vfio.rst
+++ b/docs/devel/migration/vfio.rst
@@ -16,6 +16,37 @@ helps to reduce the total downtime of the VM. VFIO devices 
opt-in to pre-copy
  support by reporting the VFIO_MIGRATION_PRE_COPY flag in the
  VFIO_DEVICE_FEATURE_MIGRATION ioctl.


Please add a new "multifd" documentation subsection at the end of the file
with this part :


+Starting from QEMU version 10.0 there's a possibility to transfer VFIO device
+_STOP_COPY state via multifd channels. This helps reduce downtime - especially
+with multiple VFIO devices or with devices having a large migration state.
+As an additional benefit, setting the VFIO device to _STOP_COPY state and
+saving its config space is also parallelized (run in a separate thread) in
+such migration mode.
+
+The multifd VFIO device state transfer is controlled by
+"x-migration-multifd-transfer" VFIO device property. This property defaults to
+AUTO, which means that VFIO device state transfer via multifd channels is
+attempted in configurations that otherwise support it.
+


I was expecting a much more detailed explanation on the design too  :

 * in the cover letter
 * in the hw/vfio/migration-multifd.c
 * in some new file under docs/devel/migration/



This section :


+Since the target QEMU needs to load device state buffers in-order it needs to
+queue incoming buffers until they can be loaded into the device.
+This means that a malicious QEMU source could theoretically cause the target
+QEMU to allocate unlimited amounts of memory for such buffers-in-flight.
+
+The "x-migration-max-queued-buffers" property allows capping the maximum count
+of these VFIO device state buffers queued at the destination.
+
+Because a malicious QEMU source causing OOM on the target is not expected to be
+a realistic threat in most of VFIO live migration use cases and the right value
+depends on the particular setup by default this queued buffers limit is
+disabled by setting it to UINT64_MAX.


should be in patch 34. It is not obvious it will be merged.


This section :


+Some host platforms (like ARM64) require that VFIO device config is loaded only
+after all iterables were loaded.
+Such interlocking is controlled by "x-migration-load-config-after-iter" VFIO
+device property, which in its default setting (AUTO) does so only on platforms
+that actually require it.


Should be in 35. Same reason.



  When pre-copy is supported, it's possible to further reduce downtime by
  enabling "switchover-ack" migration capability.
  VFIO migration uAPI defines "initial bytes" as part of its pre-copy data 
stream
@@ -67,14 +98,39 @@ VFIO implements the device hooks for the iterative approach 
as follows:
  * A ``switchover_ack_needed`` function that checks if the VFIO device uses
"switchover-ack" migration capability when this capability is enabled.
  
-* A ``save_state`` function to save the device config space if it is present.

-
-* A ``save_live_complete_precopy`` function that sets the VFIO device in
-  _STOP_COPY state and iteratively copies the data for the VFIO device until
-  the vendor driver indicates that no data remains.
-
-* A ``load_state`` function that loads the config section and the data
-  sections that are generated by the save functions above.
+* A ``switchover_start`` function that in the multifd mode starts a thread that
+  reassembles the multifd received data and loads it in-order into the device.
+  In the non-multifd mode this function is a NOP.
+
+* A ``save_state`` function to save the device config space if it is present
+  in the non-multifd mode.
+  In the multifd mode it just emits either a dummy EOS marker or
+  "all iterables were loaded" flag for configurations that need to defer
+  loading device config space after them.
+
+* A ``save_live_complete_precopy`` function that in the non-multifd mode sets
+  the VFIO device in _STOP_COPY state and iteratively copies the data for the
+  VFIO device until the vendor driver indicates that no data remains.
+  In the multifd mode it just emits a dummy EOS marker.
+
+* A ``save_live_complete_precopy_thread`` function that in the multifd mode
+  provides thread handler performing multifd device state transfer.
+  It sets the VFIO device to _STOP_COPY state, iteratively reads the data
+  from the VFIO device and queues it for multifd transmission until the vendor
+  driver indicates that no data remains.
+  After that, it saves the device config space and queues it for multifd
+  transfer too.
+  In the non-multifd mode this

Re: [PATCH v4 0/9] vhost-user: Add SHMEM_MAP/UNMAP requests

2025-02-26 Thread Stefan Hajnoczi

On Wed, Feb 26, 2025 at 10:53:01AM +0100, David Hildenbrand wrote:
> > > As commented offline, maybe one would want the option to enable the
> > > alternative mode, where such updates (in the SHM region) are not sent to
> > > vhost-user devices. In such a configuration, the MEM_READ / MEM_WRITE
> > > would be unavoidable.
> > 
> > At first, I remember we discussed two options, having update messages
> > sent to all devices (which was deemed as potentially racy), or using
> > MEM_READ / MEM _WRITE messages. With this version of the patch there
> > is no option to avoid the mem_table update messages, which brings me
> > to my point in the previous message: it may make sense to continue
> > with this patch without MEM_READ/WRITE support, and leave that and the
> > option to make mem_table updates optional for a followup patch?
> 
> IMHO that would work for me.

I'm happy with dropping MEM_READ/WRITE. If the memslots limit becomes a
problem then it will be necessary to think about handling things
differently, but there are many possible uses of VIRTIO Shared Memory
Regions that will not hit the limit and I don't see a need to hold them
back.

Stefan

> 
> > 
> > > 
> > > What comes to mind are vhost-user devices with limited number of
> > > supported memslots.
> > > 
> > > No idea how relevant that really is, and how many SHM regions we will
> > > see in practice.
> > 
> > In general, from what I see they usually require 1 or 2 regions,
> > except for virtio-scmi which requires >256.
> 
> 1/2 regions are not a problem. Once we're in the hundreds for a single
> device, it will likely start being a problem, especially when you have more
> such devices.
> 
> BUT, it would likely be a problem even with the alternative approach where
> we don't communicate these regions to vhost-user: IIRC, vhost-net in
> the kernel is usually limited to a maximum of 509 memslots as well as
> default. Similarly, older KVM only supports a total of 509 memslots.
> 
> See https://virtio-mem.gitlab.io/user-guide/user-guide-qemu.html
> "Compatibility with vhost-net and vhost-user".
> 
> In libvhost-user, and rust-vmm, we have a similar limit of ~509.
> 
> 
> Note that for memory devices (DIMMs, virtio-mem), we'll use up to 256
> memslots in case all devices support 509 memslots.
> See MEMORY_DEVICES_SOFT_MEMSLOT_LIMIT:
> 
> /*
>  * Traditionally, KVM/vhost in many setups supported 509 memslots, whereby
>  * 253 memslots were "reserved" for boot memory and other devices (such
>  * as PCI BARs, which can get mapped dynamically) and 256 memslots were
>  * dedicated for DIMMs. These magic numbers worked reliably in the past.
>  *
>  * Further, using many memslots can negatively affect performance, so setting
>  * the soft-limit of memslots used by memory devices to the traditional
>  * DIMM limit of 256 sounds reasonable.
>  *
>  * If we have less than 509 memslots, we will instruct memory devices that
>  * support automatically deciding how many memslots to use to only use a 
> single
>  * one.
>  *
>  * Hotplugging vhost devices with at least 509 memslots is not expected to
>  * cause problems, not even when devices automatically decided how many 
> memslots
>  * to use.
>  */
> #define MEMORY_DEVICES_SOFT_MEMSLOT_LIMIT 256
> #define MEMORY_DEVICES_SAFE_MAX_MEMSLOTS 509
> 
> 
> That changes once you have some vhost-user devices consume combined with boot
> memory more than 253 memslots.
> 
> -- 
> Cheers,
> 
> David / dhildenb
> 


signature.asc
Description: PGP signature

Re: [PATCH 2/4] i386/cpu: Fix cpu number overflow in CPUID.01H.EBX[23:16]

2025-02-26 Thread Xiaoyao Li


On 2/27/2025 2:25 PM, Zhao Liu wrote:

From: Qian Wen 

The legacy topology enumerated by CPUID.1.EBX[23:16] is defined in SDM
Vol2:

Bits 23-16: Maximum number of addressable IDs for logical processors in
this physical package.

When threads_per_socket > 255, it will 1) overwrite bits[31:24] which is
apic_id, 2) bits [23:16] get truncated.

Specifically, if launching the VM with -smp 256, the value written to
EBX[23:16] is 0 because of data overflow. If the guest only supports
legacy topology, without V2 Extended Topology enumerated by CPUID.0x1f
or Extended Topology enumerated by CPUID.0x0b to support over 255 CPUs,
the return of the kernel invoking cpu_smt_allowed() is false and APs
(application processors) will fail to bring up. Then only CPU 0 is online,
and others are offline.

For example, launch VM via:
qemu-system-x86_64 -M q35,accel=kvm,kernel-irqchip=split \
 -cpu qemu64,cpuid-0xb=off -smp 256 -m 32G \
 -drive file=guest.img,if=none,id=virtio-disk0,format=raw \
 -device virtio-blk-pci,drive=virtio-disk0,bootindex=1 --nographic

The guest shows:
 CPU(s):   256
 On-line CPU(s) list:  0
 Off-line CPU(s) list: 1-255

To avoid this issue caused by overflow, limit the max value written to
EBX[23:16] to 255 as the HW does.

Cc: qemu-sta...@nongnu.org
Signed-off-by: Qian Wen 
Signed-off-by: Zhao Liu 


Reviewed-by: Xiaoyao Li 


---
Changes since original v4 [*]:
  * Rebase on addressable ID fixup.
  * Drop R/b tags since the code base changes.

[*] original v4: 
https://lore.kernel.org/qemu-devel/20230829042405.932523-2-qian@intel.com/
---
  target/i386/cpu.c | 9 +++--
  1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index b8a78276cd50..ae6c8bfd8b5e 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -6691,16 +6691,21 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
  }
  *edx = env->features[FEAT_1_EDX];
  if (threads_per_pkg > 1) {
+uint32_t num;
+
  /*
   * For CPUID.01H.EBX[Bits 23-16], AMD requires logical processor
   * count, but Intel needs maximum number of addressable IDs for
   * logical processors per package.
   */
  if (cpu->vendor_cpuid_only && IS_AMD_CPU(env)) {
-*ebx |= threads_per_pkg << 16;
+num = threads_per_pkg;
  } else {
-*ebx |= 1 << apicid_pkg_offset(topo_info) << 16;
+num = 1 << apicid_pkg_offset(topo_info);
  }
+
+/* Fixup overflow: max value for bits 23-16 is 255. */
+*ebx |= MIN(num, 255) << 16;
  }
  if (!cpu->enable_pmu) {
  *ecx &= ~CPUID_EXT_PDCM;

Re: [PATCH 3/4] i386/cpu: Fix overflow of cache topology fields in CPUID.04H

2025-02-26 Thread Xiaoyao Li


On 2/27/2025 2:25 PM, Zhao Liu wrote:

From: Qian Wen 

According to SDM, CPUID.0x4:EAX[31:26] indicates the Maximum number of
addressable IDs for processor cores in the physical package. If we
launch over 64 cores VM, the 6-bit field will overflow, and the wrong
core_id number will be reported.

Since the HW reports 0x3f when the intel processor has over 64 cores,
limit the max value written to EAX[31:26] to 63, so max num_cores should
be 64.

For EAX[14:25], though at present Q35 supports up to 4096 CPUs, to
prevent potential overflow issues from further increasing the number of
CPUs in the future, check and honor the maximum value for EAX[14:25] as
well.

In addition, for host-cache-info case, also apply the same checks and
fixes.

Signed-off-by: Qian Wen 
Signed-off-by: Zhao Liu 


Reviewed-by: Xiaoyao Li 


---
Changes since original v4 [*]:
  * Rebase on addressable ID fixup.
  * Drop R/b tags since the code base changes.
  * Teak bits 25-14 as well and add the comment.
  * Fix overflow for host-cache-info case.

[*]: original v4: 
https://lore.kernel.org/qemu-devel/20230829042405.932523-3-qian@intel.com/
---
  target/i386/cpu.c | 16 +++-
  1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index ae6c8bfd8b5e..d75175b0850a 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -280,11 +280,17 @@ static void encode_cache_cpuid4(CPUCacheInfo *cache,
  assert(cache->size == cache->line_size * cache->associativity *
cache->partitions * cache->sets);
  
+/*

+ * The following fields have bit-width limitations, so consider the
+ * maximum values to avoid overflow:
+ * Bits 25-14: maximum 4095.
+ * Bits 31-26: maximum 63.
+ */
  *eax = CACHE_TYPE(cache->type) |
 CACHE_LEVEL(cache->level) |
 (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0) |
-   (max_core_ids_in_package(topo_info) << 26) |
-   (max_thread_ids_for_cache(topo_info, cache->share_level) << 14);
+   (MIN(max_core_ids_in_package(topo_info), 63) << 26) |
+   (MIN(max_thread_ids_for_cache(topo_info, cache->share_level), 4095) 
<< 14);
  
  assert(cache->line_size > 0);

  assert(cache->partitions > 0);
@@ -6743,13 +6749,13 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
  int host_vcpus_per_cache = 1 + ((*eax & 0x3FFC000) >> 14);
  
  *eax &= ~0xFC00;

-*eax |= max_core_ids_in_package(topo_info) << 26;
+*eax |= MIN(max_core_ids_in_package(topo_info), 63) << 26;
  if (host_vcpus_per_cache > threads_per_pkg) {
  *eax &= ~0x3FFC000;
  
  /* Share the cache at package level. */

-*eax |= max_thread_ids_for_cache(topo_info,
-CPU_TOPOLOGY_LEVEL_SOCKET) << 14;
+*eax |= MIN(max_thread_ids_for_cache(topo_info,
+CPU_TOPOLOGY_LEVEL_SOCKET), 4095) << 14;
  }
  }
  } else if (cpu->vendor_cpuid_only && IS_AMD_CPU(env)) {

Re: [PATCH v4 08/14] acpi/generic_event_device: add logic to detect if HEST addr is available

2025-02-26 Thread Mauro Carvalho Chehab

Em Wed, 26 Feb 2025 16:52:26 +0100
Igor Mammedov  escreveu:

> On Fri, 21 Feb 2025 15:35:17 +0100
> Mauro Carvalho Chehab  wrote:
> 

> > diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c
> > index 5346cae573b7..14d8513a5440 100644
> > --- a/hw/acpi/generic_event_device.c
> > +++ b/hw/acpi/generic_event_device.c
> > @@ -318,6 +318,7 @@ static void acpi_ged_send_event(AcpiDeviceIf *adev, 
> > AcpiEventStatusBits ev)
> >  
> >  static const Property acpi_ged_properties[] = {
> >  DEFINE_PROP_UINT32("ged-event", AcpiGedState, ged_event_bitmap, 0),
> > +DEFINE_PROP_BOOL("x-has-hest-addr", AcpiGedState, 
> > ghes_state.use_hest_addr, false),  
> 
> you below set it for 9.2 to false, so
> shouldn't it be set to true by default here?

Yes, but it is too early to do that here, as the DSDT table was not
updated to contain the GED device.

We're switching it to true later on, at patch 11::

d8c44ee13fbe ("arm/virt: Wire up a GED error device for ACPI / GHES")

Thanks,
Mauro

Re: [PATCH v4 08/14] acpi/generic_event_device: add logic to detect if HEST addr is available

2025-02-26 Thread Mauro Carvalho Chehab

Em Thu, 27 Feb 2025 08:19:27 +0100
Mauro Carvalho Chehab  escreveu:

> Em Wed, 26 Feb 2025 16:52:26 +0100
> Igor Mammedov  escreveu:
> 
> > On Fri, 21 Feb 2025 15:35:17 +0100
> > Mauro Carvalho Chehab  wrote:
> >   
> 
> > > diff --git a/hw/acpi/generic_event_device.c 
> > > b/hw/acpi/generic_event_device.c
> > > index 5346cae573b7..14d8513a5440 100644
> > > --- a/hw/acpi/generic_event_device.c
> > > +++ b/hw/acpi/generic_event_device.c
> > > @@ -318,6 +318,7 @@ static void acpi_ged_send_event(AcpiDeviceIf *adev, 
> > > AcpiEventStatusBits ev)
> > >  
> > >  static const Property acpi_ged_properties[] = {
> > >  DEFINE_PROP_UINT32("ged-event", AcpiGedState, ged_event_bitmap, 0),
> > > +DEFINE_PROP_BOOL("x-has-hest-addr", AcpiGedState, 
> > > ghes_state.use_hest_addr, false),
> > 
> > you below set it for 9.2 to false, so
> > shouldn't it be set to true by default here?  
> 
> Yes, but it is too early to do that here, as the DSDT table was not
> updated to contain the GED device.
> 
> We're switching it to true later on, at patch 11::
> 
>   d8c44ee13fbe ("arm/virt: Wire up a GED error device for ACPI / GHES")

Hmm... too many rebases that on my head things are becoming shady ;-)

Originally, this was setting it to true, but you requested to move it
to another patch during one of the patch reorder requests.

Anyway, after all those rebases, I guess it is now safe to set it
to true here without breaking bisectability. I'll move the hunk back
to this patch.

Thanks,
Mauro

Re: [PATCH v2 2/5] pci: Use PCI PM capability initializer

On 2025/02/26 6:52, Alex Williamson wrote:

Switch callers directly initializing the PCI PM capability with
pci_add_capability() to use pci_pm_init().

Cc: Dmitry Fleytman 
Cc: Akihiko Odaki 
Cc: Jason Wang 
Cc: Stefan Weil 
Cc: Sriram Yagnaraman 
Cc: Keith Busch 
Cc: Klaus Jensen 
Cc: Jesper Devantier 
Cc: Michael S. Tsirkin 
Cc: Marcel Apfelbaum 
Cc: Cédric Le Goater 
Signed-off-by: Alex Williamson 

Reviewed-by: Akihiko Odaki

[PATCH] hw/misc/edu: Rename macros indicating the direction of DMA operations

2025-02-26 Thread Jason Chien

This commit renames the macros to accurately reflect the direction of
DMA operations.

EDU_DMA_TO_PCI now represents reading memory content into the EDU buffer,
while EDU_DMA_FROM_PCI represents writing EDU buffer content to memory.

The previous naming was misleading, as the definitions were reversed.

Signed-off-by: Jason Chien 
---
 hw/misc/edu.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/hw/misc/edu.c b/hw/misc/edu.c
index 504178b4a2..1353c67dc2 100644
--- a/hw/misc/edu.c
+++ b/hw/misc/edu.c
@@ -63,8 +63,8 @@ struct EduState {
 
 #define EDU_DMA_RUN 0x1
 #define EDU_DMA_DIR(cmd)(((cmd) & 0x2) >> 1)
-# define EDU_DMA_FROM_PCI   0
-# define EDU_DMA_TO_PCI 1
+# define EDU_DMA_TO_PCI 0
+# define EDU_DMA_FROM_PCI   1
 #define EDU_DMA_IRQ 0x4
 struct dma_state {
 dma_addr_t src;
@@ -146,7 +146,7 @@ static void edu_dma_timer(void *opaque)
 return;
 }
 
-if (EDU_DMA_DIR(edu->dma.cmd) == EDU_DMA_FROM_PCI) {
+if (EDU_DMA_DIR(edu->dma.cmd) == EDU_DMA_TO_PCI) {
 uint64_t dst = edu->dma.dst;
 edu_check_range(dst, edu->dma.cnt, DMA_START, DMA_SIZE);
 dst -= DMA_START;
-- 
2.43.2

[PATCH] hw/riscv/riscv-iommu: Fix process directory table walk

2025-02-26 Thread Jason Chien

The PPN field in a non-leaf PDT entry is positioned differently from that
in a leaf PDT entry. The original implementation incorrectly used the leaf
entry's PPN mask to extract the PPN from a non-leaf entry, leading to an
erroneous page table walk.

This commit introduces new macros to properly define the fields for
non-leaf PDT entries and corrects the page table walk.

Signed-off-by: Jason Chien 
---
 hw/riscv/riscv-iommu-bits.h | 6 +-
 hw/riscv/riscv-iommu.c  | 4 ++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/hw/riscv/riscv-iommu-bits.h b/hw/riscv/riscv-iommu-bits.h
index de599b80d6..8d621c5b70 100644
--- a/hw/riscv/riscv-iommu-bits.h
+++ b/hw/riscv/riscv-iommu-bits.h
@@ -368,12 +368,16 @@ enum riscv_iommu_fq_causes {
 #define RISCV_IOMMU_DC_MSIPTP_MODE_OFF  0
 #define RISCV_IOMMU_DC_MSIPTP_MODE_FLAT 1
 
+/* 2.2 Process Directory Table */
+#define RISCV_IOMMU_PDTE_VALID  BIT_ULL(0)
+#define RISCV_IOMMU_PDTE_PPNRISCV_IOMMU_PPN_FIELD
+
 /* Translation attributes fields */
 #define RISCV_IOMMU_PC_TA_V BIT_ULL(0)
 #define RISCV_IOMMU_PC_TA_RESERVED  GENMASK_ULL(63, 32)
 
 /* First stage context fields */
-#define RISCV_IOMMU_PC_FSC_PPN  GENMASK_ULL(43, 0)
+#define RISCV_IOMMU_PC_FSC_PPN  RISCV_IOMMU_ATP_PPN_FIELD
 #define RISCV_IOMMU_PC_FSC_RESERVED GENMASK_ULL(59, 44)
 
 enum riscv_iommu_fq_ttypes {
diff --git a/hw/riscv/riscv-iommu.c b/hw/riscv/riscv-iommu.c
index e7568ca227..1abe981244 100644
--- a/hw/riscv/riscv-iommu.c
+++ b/hw/riscv/riscv-iommu.c
@@ -1043,10 +1043,10 @@ static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, 
RISCVIOMMUContext *ctx)
 return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT;
 }
 le64_to_cpus(&de);
-if (!(de & RISCV_IOMMU_PC_TA_V)) {
+if (!(de & RISCV_IOMMU_PDTE_VALID)) {
 return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID;
 }
-addr = PPN_PHYS(get_field(de, RISCV_IOMMU_PC_FSC_PPN));
+addr = PPN_PHYS(get_field(de, RISCV_IOMMU_PDTE_PPN));
 }
 
 /* Leaf entry in PDT */
-- 
2.43.2

Re: [PATCH v2 2/3] tests/functional: Introduce the dso_suffix() helper

Akihiko Odaki  writes:

> On 2025/02/20 17:02, Philippe Mathieu-Daudé wrote:
>> Introduce a helper to get the default shared library
>> suffix used on the host.
>> Suggested-by: Pierrick Bouvier 
>> Signed-off-by: Philippe Mathieu-Daudé 
>> Reviewed-by: Pierrick Bouvier 
>> ---
>>   tests/functional/qemu_test/__init__.py | 2 +-
>>   tests/functional/qemu_test/cmd.py  | 1 -
>>   tests/functional/qemu_test/config.py   | 6 ++
>>   3 files changed, 7 insertions(+), 2 deletions(-)
>> diff --git a/tests/functional/qemu_test/__init__.py
>> b/tests/functional/qemu_test/__init__.py
>> index 5c972843a6d..45f7befa374 100644
>> --- a/tests/functional/qemu_test/__init__.py
>> +++ b/tests/functional/qemu_test/__init__.py
>> @@ -7,7 +7,7 @@
>>   from .asset import Asset
>> -from .config import BUILD_DIR
>> +from .config import BUILD_DIR, dso_suffix
>>   from .cmd import is_readable_executable_file, \
>>   interrupt_interactive_console_until_pattern, wait_for_console_pattern, 
>> \
>>   exec_command, exec_command_and_wait_for_pattern, get_qemu_img, which
>> diff --git a/tests/functional/qemu_test/cmd.py 
>> b/tests/functional/qemu_test/cmd.py
>> index dc5f422b77d..254e23ef748 100644
>> --- a/tests/functional/qemu_test/cmd.py
>> +++ b/tests/functional/qemu_test/cmd.py
>> @@ -15,7 +15,6 @@
>>   import os
>>   import os.path
>>   -
>>   def which(tool):
>>   """ looks up the full path for @tool, returns None if not found
>>   or if @tool does not have executable permissions.
>> diff --git a/tests/functional/qemu_test/config.py 
>> b/tests/functional/qemu_test/config.py
>> index edd75b7fd06..0eab1baa541 100644
>> --- a/tests/functional/qemu_test/config.py
>> +++ b/tests/functional/qemu_test/config.py
>> @@ -13,6 +13,7 @@
>> import os
>>   from pathlib import Path
>> +import platform
>>   def _source_dir():
>> @@ -34,3 +35,8 @@ def _build_dir():
>>   raise Exception("Cannot identify build dir, set QEMU_BUILD_ROOT")
>> BUILD_DIR = _build_dir()
>> +
>> +def dso_suffix():
>> +'''Return the dynamic libraries suffix for the current platform'''
>> +DSO_SUFFIXES = { 'Linux': 'so', 'Darwin': 'dylib', 'Windows': 'dll' }> 
>> +return DSO_SUFFIXES[platform.system()]
>
> It may be too late to comment, but:
> I'm a bit worried that this can break tests on platforms (BSDs?) not
> listed here though I don't know if plugin tests work on BSDs in the
> first place.

Currently we don't run any of the check-tcg tests on BSD. Mostly because
I couldn't find cross compilers packaged for BSD (and there is no
docker-like thing to bring them in) and for the "native" cross compiler
we currently block clang - although there are some fixes for that in my
current maintainer series.

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro

Re: [RFC 1/2] system/memory: Allow creating IOMMU mappings from RAM discard populate notifiers

2025-02-26 Thread Chenyi Qiang




On 2/26/2025 8:43 PM, Chenyi Qiang wrote:
> 
> 
> On 2/25/2025 5:41 PM, David Hildenbrand wrote:
>> On 25.02.25 03:00, Chenyi Qiang wrote:
>>>
>>>
>>> On 2/21/2025 6:04 PM, Chenyi Qiang wrote:


 On 2/21/2025 4:09 PM, David Hildenbrand wrote:
> On 21.02.25 03:25, Chenyi Qiang wrote:
>>
>>
>> On 2/21/2025 3:39 AM, David Hildenbrand wrote:
>>> On 20.02.25 17:13, Jean-Philippe Brucker wrote:
 For Arm CCA we'd like the guest_memfd discard notifier to call the
 IOMMU
 notifiers and create e.g. VFIO mappings. The default VFIO discard
 notifier isn't sufficient for CCA because the DMA addresses need a
 translation (even without vIOMMU).

 At the moment:
 * guest_memfd_state_change() calls the populate() notifier
 * the populate notifier() calls IOMMU notifiers
 * the IOMMU notifier handler calls memory_get_xlat_addr() to get
 a VA
 * it calls ram_discard_manager_is_populated() which fails.

 guest_memfd_state_change() only changes the section's state after
 calling the populate() notifier. We can't easily invert the order of
 operation because it uses the old state bitmap to know which
 pages need
 the populate() notifier.
>>>
>>> I assume we talk about this code: [1]
>>>
>>> [1] https://lkml.kernel.org/r/20250217081833.21568-1-
>>> chenyi.qi...@intel.com
>>>
>>>
>>> +static int memory_attribute_state_change(MemoryAttributeManager
>>> *mgr,
>>> uint64_t offset,
>>> + uint64_t size, bool
>>> shared_to_private)
>>> +{
>>> +    int block_size = memory_attribute_manager_get_block_size(mgr);
>>> +    int ret = 0;
>>> +
>>> +    if (!memory_attribute_is_valid_range(mgr, offset, size)) {
>>> +    error_report("%s, invalid range: offset 0x%lx, size 0x%lx",
>>> + __func__, offset, size);
>>> +    return -1;
>>> +    }
>>> +
>>> +    if ((shared_to_private &&
>>> memory_attribute_is_range_discarded(mgr,
>>> offset, size)) ||
>>> +    (!shared_to_private &&
>>> memory_attribute_is_range_populated(mgr,
>>> offset, size))) {
>>> +    return 0;
>>> +    }
>>> +
>>> +    if (shared_to_private) {
>>> +    memory_attribute_notify_discard(mgr, offset, size);
>>> +    } else {
>>> +    ret = memory_attribute_notify_populate(mgr, offset, size);
>>> +    }
>>> +
>>> +    if (!ret) {
>>> +    unsigned long first_bit = offset / block_size;
>>> +    unsigned long nbits = size / block_size;
>>> +
>>> +    g_assert((first_bit + nbits) <= mgr->bitmap_size);
>>> +
>>> +    if (shared_to_private) {
>>> +    bitmap_clear(mgr->shared_bitmap, first_bit, nbits);
>>> +    } else {
>>> +    bitmap_set(mgr->shared_bitmap, first_bit, nbits);
>>> +    }
>>> +
>>> +    return 0;
>>> +    }
>>> +
>>> +    return ret;
>>> +}
>>>
>>> Then, in memory_attribute_notify_populate(), we walk the bitmap
>>> again.
>>>
>>> Why?
>>>
>>> We just checked that it's all in the expected state, no?
>>>
>>>
>>> virtio-mem doesn't handle it that way, so I'm curious why we would
>>> have
>>> to do it here?
>>
>> I was concerned about the case where the guest issues a request that
>> only partial of the range is in the desired state.
>> I think the main problem is the policy for the guest conversion
>> request.
>> My current handling is:
>>
>> 1. When a conversion request is made for a range already in the
>> desired
>>     state, the helper simply returns success.
>
> Yes.
>
>> 2. For requests involving a range partially in the desired state, only
>>     the necessary segments are converted, ensuring the entire range
>>     complies with the request efficiently.
>
>
> Ah, now I get:
>
> +    if ((shared_to_private && memory_attribute_is_range_discarded(mgr,
> offset, size)) ||
> +    (!shared_to_private &&
> memory_attribute_is_range_populated(mgr,
> offset, size))) {
> +    return 0;
> +    }
> +
>
> We're not failing if it might already partially be in the other state.
>
>> 3. In scenarios where a conversion request is declined by other
>> systems,
>>     such as a failure from VFIO during notify_populate(), the
>> helper will
>>     roll back the request, maintaining consistency.
>>
>> And the policy of virtio-mem is to refuse the state change if not all
>> blocks are in the opposite state.
>
> Yes.
>
>>
>> Actually, this part is still a uncertain to me.
>>
>
> IIUC, the problem does not exist if we only convert a si

Re: [PATCH 5/6] hw/ppc: Pass device tree properties for Fadump

On Mon Feb 17, 2025 at 5:17 PM AEST, Aditya Gupta wrote:
> Platform (ie. QEMU) is expected to pass few device tree properties for
> details for fadump:
>
>   * "ibm,configure-kernel-dump": RTAS call for fadump
>   * "ibm,configure-kernel-dump-sizes": Space required to store dump data
> for firmware provided dump sections (ie. CPU & HPTE regions)
>   * "ibm,configure-kernel-dump-version": Versions of fadump supported
>   * "ibm,kernel-dump": Contains the Fadump Memory Structure on a fadump
> boot
>
> Implement passing configure-kernel-dump-sizes, and
> configure-kernel-dump-version device tree properties, irrespective of
> whether it's a fadump boot or not, so that kernel can reserve memory to
> store the firmware provided dump sections in case of a crash
>
> Also, in case of a fadump boot, pass the fadump memory structure to the
> kernel in "ibm,kernel-dump" device tree property.
>
> Signed-off-by: Aditya Gupta 
> ---
>  hw/ppc/spapr.c | 62 ++
>  include/hw/ppc/spapr.h |  2 ++
>  2 files changed, 64 insertions(+)
>
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index f3a4b4235d43..3602e5b5d18d 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -897,9 +897,27 @@ static int spapr_dt_rng(void *fdt)
>  static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt)
>  {

You might be able to add a spapr_dt_rtas_fadump() function
and do it there to help keep functions small?

Thanks,
Nick

>  MachineState *ms = MACHINE(spapr);
> +MachineClass *mc = MACHINE_GET_CLASS(ms);
>  int rtas;
>  GString *hypertas = g_string_sized_new(256);
>  GString *qemu_hypertas = g_string_sized_new(256);
> +uint32_t max_possible_cpus = mc->possible_cpu_arch_ids(ms)->len;
> +uint64_t fadump_cpu_state_size = 0;
> +uint16_t fadump_versions[2] = {
> +FADUMP_VERSION /* min supported version */,
> +FADUMP_VERSION /* max supported version */
> +};
> +uint32_t fadump_rgn_sizes[2][3] = {
> +{
> +cpu_to_be32(FADUMP_CPU_STATE_DATA),
> +0, 0 /* Calculated later */
> +},
> +{
> +cpu_to_be32(FADUMP_HPTE_REGION),
> +0, 0 /* HPTE region not implemented */
> +}
> +};
> +
>  uint32_t lrdr_capacity[] = {
>  0,
>  0,
> @@ -1006,6 +1024,50 @@ static void spapr_dt_rtas(SpaprMachineState *spapr, 
> void *fdt)
>  _FDT(fdt_setprop(fdt, rtas, "ibm,lrdr-capacity",
>   lrdr_capacity, sizeof(lrdr_capacity)));
>  
> +/*
> + * CPU State Data contains multiple fields such as header, num_cpus and
> + * register entries
> + *
> + * Calculate the maximum CPU State Data size, according to maximum
> + * possible CPUs the QEMU VM can have
> + */
> +/* Reg save header */
> +fadump_cpu_state_size += sizeof(struct rtas_fadump_reg_save_area_header);
> +
> +/* Num_cpus */
> +fadump_cpu_state_size += sizeof(__be32);
> +
> +/* Register Entries */
> +fadump_cpu_state_size += max_possible_cpus   *
> + FADUMP_NUM_PER_CPU_REGS *
> + sizeof(struct rtas_fadump_reg_entry);
> +
> +/* Set maximum size for CPU state data region */
> +assert(fadump_rgn_sizes[0][0] == cpu_to_be32(FADUMP_CPU_STATE_DATA));
> +
> +/* Upper 32 bits of size, usually 0 */
> +fadump_rgn_sizes[0][1] = cpu_to_be32(fadump_cpu_state_size >> 32);
> +
> +/* Lower 32 bits of size */
> +fadump_rgn_sizes[0][2] = cpu_to_be32(fadump_cpu_state_size & 0x);
> +
> +/* Add device tree properties required from platform for fadump */
> +_FDT((fdt_setprop(fdt, rtas, "ibm,configure-kernel-dump-version",
> +fadump_versions, sizeof(fadump_versions;
> +_FDT((fdt_setprop(fdt, rtas, "ibm,configure-kernel-dump-sizes",
> +fadump_rgn_sizes, sizeof(fadump_rgn_sizes;
> +
> +if (is_next_boot_fadump) {
> +struct rtas_fadump_mem_struct *fdm =
> +&fadump_metadata.registered_fdm;
> +
> +uint64_t fdm_size =
> +sizeof(struct rtas_fadump_section_header) +
> +(be16_to_cpu(fdm->header.dump_num_sections) *
> +sizeof(struct rtas_fadump_section));
> +
> +_FDT((fdt_setprop(fdt, rtas, "ibm,kernel-dump", fdm, fdm_size)));
> +}
>  spapr_dt_rtas_tokens(fdt, rtas);
>  }
>  
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 0e8002bad9e0..fa63008e57ec 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -928,6 +928,8 @@ static inline uint64_t fadump_gpr_id_to_u64(uint32_t 
> gpr_id)
>  return val;
>  }
>  
> +extern bool is_next_boot_fadump;
> +
>  struct fadump_metadata {
>  bool fadump_registered;
>  bool fadump_dump_active;

Re: [PATCH 4/6] hw/ppc: Implement saving CPU state in Fadump

On Mon Feb 17, 2025 at 5:17 PM AEST, Aditya Gupta wrote:
> Kernel expects CPU states/register states in the format mentioned in
> "Register Save Area" in PAPR.
>
> The platform (in our case, QEMU) saves each CPU register in the form of
> an array of "register entries", the start and end of this array is
> signified by "CPUSTRT" and "CPUEND" register entries respectively.
>
> The CPUSTRT and CPUEND register entry also has 4-byte logical CPU ID,
> thus storing the CPU ID corresponding to the array of register entries.
>
> Each register, and CPUSTRT, CPUEND has a predefined identifier.
> Implement calculating identifier for a given register in
> 'fadump_str_to_u64', which has been taken from the linux kernel
>
> Similarly GPRs also have predefined identifiers, and a corresponding
> 64-bit resiter value (split into two 32-bit cells). Implement
> calculation of GPR identifiers with 'fadump_gpr_id_to_u64'
>
> PAPR has restrictions on particular order of few registers, and is
> free to be in any order for other registers.
> Some registers mentioned in PAPR have not been exported as they are not
> implemented in QEMU / don't make sense in QEMU.
>
> Implement saving of CPU state according to the PAPR document
>
> Signed-off-by: Aditya Gupta 
> ---
>  hw/ppc/spapr_rtas.c| 200 -
>  include/hw/ppc/spapr.h |  83 +
>  2 files changed, 281 insertions(+), 2 deletions(-)
>
> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
> index 9b29cadab2c9..0aca4270aee8 100644
> --- a/hw/ppc/spapr_rtas.c
> +++ b/hw/ppc/spapr_rtas.c
> @@ -348,9 +348,12 @@ bool is_next_boot_fadump;
>  static bool fadump_preserve_mem(void)
>  {
>  struct rtas_fadump_mem_struct *fdm = &fadump_metadata.registered_fdm;
> +struct rtas_fadump_section *cpu_state_region;
>  uint64_t next_section_addr;
>  int dump_num_sections, data_type;
>  uint64_t src_addr, src_len, dest_addr;
> +uint64_t cpu_state_addr, cpu_state_len = 0;
> +void *cpu_state_buffer;
>  void *copy_buffer;
>  
>  assert(fadump_metadata.fadump_registered);
> @@ -413,9 +416,174 @@ static bool fadump_preserve_mem(void)
>  }
>  
>  switch (data_type) {
> -case FADUMP_CPU_STATE_DATA:
> -/* TODO: Add CPU state data */
> +case FADUMP_CPU_STATE_DATA: {

I would split these out into their own functions if they grow more than
a few lines.

> +struct rtas_fadump_reg_save_area_header reg_save_hdr;
> +struct rtas_fadump_reg_entry **reg_entries;
> +struct rtas_fadump_reg_entry *curr_reg_entry;
> +
> +uint32_t fadump_reg_entries_size;
> +__be32 num_cpus = 0;
> +uint32_t num_regs_per_cpu = 0;
> +CPUState *cpu;
> +CPUPPCState *env;
> +PowerPCCPU *ppc_cpu;
> +
> +CPU_FOREACH(cpu) {
> +++num_cpus;
> +}
> +
> +reg_save_hdr.version = cpu_to_be32(1);
> +reg_save_hdr.magic_number =
> +cpu_to_be64(fadump_str_to_u64("REGSAVE"));
> +
> +/* Reg save area header is immediately followed by num cpus */
> +reg_save_hdr.num_cpu_offset =
> +cpu_to_be32(sizeof(struct rtas_fadump_reg_save_area_header));
> +
> +fadump_reg_entries_size = num_cpus *
> +  FADUMP_NUM_PER_CPU_REGS *
> +  sizeof(struct rtas_fadump_reg_entry);
> +
> +reg_entries = malloc(fadump_reg_entries_size);
> +curr_reg_entry = (struct rtas_fadump_reg_entry *)reg_entries;
> +
> +/* This must loop num_cpus time */
> +CPU_FOREACH(cpu) {
> +ppc_cpu = POWERPC_CPU(cpu);
> +env = cpu_env(cpu);
> +num_regs_per_cpu = 0;
> +
> +curr_reg_entry->reg_id =
> +cpu_to_be64(fadump_str_to_u64("CPUSTRT"));
> +curr_reg_entry->reg_value = ppc_cpu->vcpu_id;
> +++curr_reg_entry;
> +
> +#define REG_ENTRY(id, val) \
> +do {   \
> +curr_reg_entry->reg_id =   \
> +cpu_to_be64(fadump_str_to_u64(#id));   \
> +curr_reg_entry->reg_value = val;   \
> +++curr_reg_entry;  \
> +++num_regs_per_cpu;\
> +} while (0)
> +
> +REG_ENTRY(ACOP, env->spr[SPR_ACOP]);
> +REG_ENTRY(AMR, env->spr[SPR_AMR]);
> +REG_ENTRY(BESCR, env->spr[SPR_BESCR]);
> +REG_ENTRY(CFAR, env->spr[SPR_CFAR]);
> +REG_ENTRY(CIABR, env->spr[SPR_CIABR]);
> +
> +/* Save the condition register */
> +uint64_t cr = 0;
> +cr |= (

Re: [PATCH 0/7] Implement MPIPL for PowerNV

On Mon Feb 17, 2025 at 5:19 PM AEST, Aditya Gupta wrote:
> Overview
> =
>
> Implemented MPIPL (Memory Preserving IPL, aka fadump) on PowerNV machine
> in QEMU.

Wow, that's a lot of effort.

> Note: It's okay if this isn't merged as there might be less users. Sending
> for archieval purpose, as the patches can be referred for how fadump/mpipl
> can be implemented in baremetal/PowerNV/any other arch QEMU.

I would like to add it. It helps test a bunch of code that is in Linux
and skiboot, so it would be quite useful. A functional test would be
important to have.

I've had a glance through it, but better review might have to wait for
until the next development cycle.

Thanks,
Nick

Re: [PATCH v2] vdpa: Fix endian bugs in shadow virtqueue

2025-02-26 Thread Michael Tokarev


25.02.2025 15:39, Konstantin Shkolnyy wrote:

On 2/25/2025 03:30, Michael Tokarev wrote:



This looks like a qemu-stable material.
Please let me know if it is not.


It won't help without my other "[PATCH v2] vdpa: Allow vDPA to work on big-endian 
machine". With both patches, VDPA works on a big-endian machine.


Aha. And it is not in master yet.  Thank you for letting me know!

How do you think, is it worth the effort to pick these up for
older stable releases (7.2, 8.2) too?

Thanks,

/mjt

Re: [PATCH v5 8/8] docs/system: Expand the virtio-gpu documentation


On 2025/02/18 15:27, Dmitry Osipenko wrote:

On 2/13/25 07:32, Akihiko Odaki wrote:

On 2025/02/10 6:03, Dmitry Osipenko wrote:

On 2/6/25 08:41, Akihiko Odaki wrote:

On 2025/02/06 2:40, Dmitry Osipenko wrote:

On 2/3/25 08:31, Akihiko Odaki wrote:
...

Requirements don't vary much. For example virglrenderer minigbm
support
is mandatory for crosvm, while for QEMU it's not.


Is that true? It seems that virglrenderer uses builds without minigbm
support to run tests on GitLab CI.


CI is running in a headless mode using software renderer. For a
full-featured crosvm support running on a baremetal, minigbm should be
needed, along with other downstream features.


That makes sense.

Based on your input, for QEMU, I don't think we need a separate
documentation to describe libvirglrenderer's build flags though crosvm
should have some documentation saying it requires minigbm.




Anyway, if there is any variance in the build procedure, that may
justify having a separate build instruction in QEMU tree to avoid
confusion. Otherwise, it's better to have a documentation shared with
other VMMs.




I'm not entirely sure the documentation will stay as is for that
long.
The requirements of Intel native context refer to merge requests
that
can be merged sooner or later. Asahi may need more updates if you
document it too because its DRM ABI is still unstable.


The unstable parts of course will need to be updated sooner, but the
stable should be solid for years. I expect that about a year later
requirements will need to be revisited.



It will be some burden in the future. Now you are adding this
documentation just for QEMU, but crosvm and libkrun may gain similar
documentation. The DRM native context support for Intel and Asahi
is in
development, and I guess nvk will support it someday.

So, a very rough estimation of future documentation updates will be:
(number of VMMs) * (number of DRM native contexts in development)
= 3 * 3
= 9

That's manageable but suboptimal.


I don't mind deferring the doc addition if that's preferred. Either way
is fine with me. Yet it's better to have doc than not.


My suggestion is not to defer the addition, but to add it to Mesa, which
does not require deferring.



In my view crosvm and libkrun exist separately from QEMU, they serve a
different purpose. Majority of QEMU users likely never heard about
those
other VMMs. A unified doc won't be a worthwhile effort, IMO.



When evaluating the utility of a unified documentation, Whether the
majority of Mesa/Virgl users care VMMs other than QEMU matters more. And
I think it is true; libkrun and crosvm are excellent options for
graphics-accelerated VMs.

If we have a unified documentation, any VM can point to it for the build
instruction of Mesa and virglrenderer. Once that's done, QEMU users who
want graphics acceleration can take the following steps:
1. See docs/system/devices/virtio-gpu.rst
2. Figure out that they need Mesa and virglrenderer
3. Click the link to the unified documentation
4. Build Mesa and virglrenderer accordingly

No other VMMs will bother them in this procedure.


Will see. For the starter, adding example build flags to QEMU doesn't
hurt, it's a very minimal information. Later on, if and when all
relevant Mesa/virglrenderer doc pages will appear, it won't be a problem
replace QEMU flags with the links. Please let's do it step-by-step, one
step at a time :)



To be honest, I'm concerned that you may be using QEMU as a staging tree
for Mesa/virglrenderer. Submitting a documentation to QEMU as a
preparation to submit one to Mesa is not OK.

You shouldn't submit a documentation to QEMU if upstream
Mesa developers rejects it because it contains too little information.
It may not hurt QEMU, but still lacks a valid reasoning.


Don't understand what you're talking about here. I may remind that this
is not my QEMU doc patch to begin with, hence it has nothing to do with
Mesa nor with virglrenderer. Alex wants to help QEMU users by adding
more QEMU documentation.


I understand Alex and you intend to help QEMU users, but sometimes a 
change to help QEMU users is better to be done in the upstream of its 
dependency.


For example, when I submitted a patch to suppress a sanitizer warning in 
the past, I was asked if it may be better to propose a change of the 
sanitizer behavior instead of changing QEMU code. We eventually 
concluded we should change our code at that time, but it could have 
turned out otherwise. It is no different from the normal review process; 
a submitter proposes a solution, a reviewer suggests alternatives, and 
both figure out the optimal one in the discussion.




Maybe you're also not very familiar with the Mesa development process.
This is okay, no problems.


Mesa should have more people who care virtio-gpu as there are people
using other VMMs and perhaps it may be difficult to convince them to add
a documentation like this. It is still not a good idea to workaround
that by adding one to QEMU. The documen

Re: [PATCH 0/7] Implement MPIPL for PowerNV


Hi Nick,

On 27/02/25 09:07, Nicholas Piggin wrote:

On Mon Feb 17, 2025 at 5:19 PM AEST, Aditya Gupta wrote:

Overview
=

Implemented MPIPL (Memory Preserving IPL, aka fadump) on PowerNV machine
in QEMU.

Wow, that's a lot of effort.


Thanks Nick.



Note: It's okay if this isn't merged as there might be less users. Sending
for archieval purpose, as the patches can be referred for how fadump/mpipl
can be implemented in baremetal/PowerNV/any other arch QEMU.

I would like to add it. It helps test a bunch of code that is in Linux
and skiboot, so it would be quite useful. A functional test would be
important to have.


Sure, it's not complete yet (didn't implement the CPU saving part) as I 
just wanted to do a experiment I did, will improve those things by v2 
then. It might take some time though.


Will look into the functional test thing also.


I've had a glance through it, but better review might have to wait for
until the next development cycle.


Sure, that's totally okay. Thank you for looking at it.


Thanks,

- Aditya G


Thanks,
Nick

Re: [PATCH v5 31/36] vfio/migration: Add x-migration-multifd-transfer VFIO property


On 2/19/25 21:34, Maciej S. Szmigiero wrote:

From: "Maciej S. Szmigiero" 

This property allows configuring at runtime whether to transfer the
particular device state via multifd channels when live migrating that
device.

It defaults to AUTO, which means that VFIO device state transfer via
multifd channels is attempted in configurations that otherwise support it.

Signed-off-by: Maciej S. Szmigiero 
---
  hw/vfio/migration-multifd.c   | 17 -
  hw/vfio/pci.c |  3 +++
  include/hw/vfio/vfio-common.h |  2 ++
  3 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c
index 0cfa9d31732a..18a5ff964a37 100644
--- a/hw/vfio/migration-multifd.c
+++ b/hw/vfio/migration-multifd.c
@@ -460,11 +460,26 @@ bool vfio_multifd_transfer_supported(void)
  
  bool vfio_multifd_transfer_enabled(VFIODevice *vbasedev)

  {
-return false;
+VFIOMigration *migration = vbasedev->migration;
+
+return migration->multifd_transfer;
  }
  
  bool vfio_multifd_transfer_setup(VFIODevice *vbasedev, Error **errp)

  {
+VFIOMigration *migration = vbasedev->migration;
+
+/*
+ * Make a copy of this setting at the start in case it is changed
+ * mid-migration.
+ */
+if (vbasedev->migration_multifd_transfer == ON_OFF_AUTO_AUTO) {
+migration->multifd_transfer = vfio_multifd_transfer_supported();
+} else {
+migration->multifd_transfer =
+vbasedev->migration_multifd_transfer == ON_OFF_AUTO_ON;
+}
+
  if (vfio_multifd_transfer_enabled(vbasedev) &&
  !vfio_multifd_transfer_supported()) {
  error_setg(errp,
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 89d900e9cf0c..184ff882f9d1 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3377,6 +3377,9 @@ static const Property vfio_pci_dev_properties[] = {
  VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false),
  DEFINE_PROP_ON_OFF_AUTO("enable-migration", VFIOPCIDevice,
  vbasedev.enable_migration, ON_OFF_AUTO_AUTO),
+DEFINE_PROP_ON_OFF_AUTO("x-migration-multifd-transfer", VFIOPCIDevice,
+vbasedev.migration_multifd_transfer,
+ON_OFF_AUTO_AUTO),
  DEFINE_PROP_BOOL("migration-events", VFIOPCIDevice,
   vbasedev.migration_events, false),
  DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false),


Please add property documentation in vfio_pci_dev_class_init()


Thanks,

C.




diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index ba851917f9fc..3006931accf6 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -91,6 +91,7 @@ typedef struct VFIOMigration {
  uint64_t mig_flags;
  uint64_t precopy_init_size;
  uint64_t precopy_dirty_size;
+bool multifd_transfer;
  VFIOMultifd *multifd;
  bool initial_data_sent;
  
@@ -153,6 +154,7 @@ typedef struct VFIODevice {

  bool no_mmap;
  bool ram_block_discard_allowed;
  OnOffAuto enable_migration;
+OnOffAuto migration_multifd_transfer;
  bool migration_events;
  VFIODeviceOps *ops;
  unsigned int num_irqs;

Re: [PATCH 4/4] tcg:tlb: use tcg_debug_assert() in assert_cpu_is_self()

On Tue, 25 Feb 2025 12:02:02 -0800
Richard Henderson  wrote:

> On 2/25/25 10:46, Alex Bennée wrote:
> > From: Igor Mammedov 
> > 
> > that will enable assert_cpu_is_self when QEMU is configured with
> > --enable-debug
> > without need for manual patching DEBUG_TLB_GATE define.
> > 
> > Need to manually path DEBUG_TLB_GATE define to enable assert,
> > let regression caused by [1] creep in unnoticed.
> > 
> > 1) 30933c4fb4f3d ("tcg/cputlb: remove other-cpu capability from TLB 
> > flushing")
> > 
> > Signed-off-by: Igor Mammedov 
> > Suggested-by: Alex Bennée 
> > Message-Id: <20250207162048.1890669-5-imamm...@redhat.com>
> > Signed-off-by: Alex Bennée 
> > ---
> >   accel/tcg/cputlb.c | 7 ++-
> >   1 file changed, 2 insertions(+), 5 deletions(-)
> > 
> > diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
> > index fc16a576f0..65b04b1055 100644
> > --- a/accel/tcg/cputlb.c
> > +++ b/accel/tcg/cputlb.c
> > @@ -73,11 +73,8 @@
> >   } \
> >   } while (0)
> >   
> > -#define assert_cpu_is_self(cpu) do {  \
> > -if (DEBUG_TLB_GATE) { \
> > -g_assert(!(cpu)->created || qemu_cpu_is_self(cpu));   \
> > -} \
> > -} while (0)
> > +#define assert_cpu_is_self(cpu) \
> > +tcg_debug_assert(!(cpu)->created || qemu_cpu_is_self(cpu))  
> 
> I think this check is just wrong or incomplete.

the point of the path is to bring out check out of ifdef limbo.
Whether it's correct or not it's up to another patch to fix.


> The intent here is to check that we're not attempting to modify the softmmu 
> tlb 
> asynchronously while a cpu is running.
> 
> (0) A synchronous flush to the current cpu is (obviously?) ok.
> (1) A flush to a cpu that is not yet created is (or should be) a no-op.

my another patch that was touching the check

"[PATCH v2 06/10] tcg: drop cpu->created check"

is trying to remove (abusing)usage of cpu->created
which should be used only for syncing main loop and
a to be created vcpu thread.
The creation of vcpu is not really complete yet by
this point so it depends on luck (being nop).

End goal from my side is to get rid of users that
use cpu->created as workaround to move from one
incomplete vcpu state to another still incomplete state.

We can drop the check after reset/postload paths are
fixed to schedule async flush.
 
> Not checked here are any of the other reasons a flush might be ok:
> 
> (2) The system as a whole is stopped, on the way in from migration/vmload.
> (3) The cpu is offline, on the way in from poweroff/reset.
> 
> If we decide that {1, 2, 3} are too complicated to check, then perhaps the 
> solution to 
> queue flushes to the cpu's workqueue is the appropriate solution.  But so far 
> all I see is 
> that we have an incomplete check, and no ready explanation for why that check 
> can't be 
> improved.
> 
> 
> r~
>

[PATCH 00/25] maintainer updates for 10.0 softfreeze (gpu/tcg tests, plugins, MAINTAINERS)

As we rapidly approach softfreeze (due March 11th) its time to collect
together what I'm planning to merge for this release.

Testing

  - expand the aarch64 GPU tests
  - bump libvirt-ci for vulkan-tools
  - some fixes for clang compile of tcg

I've dropped the host-gpu patch in favour of just skipping the test in
python when we detect nVidia drivers.

The clang fixes are part way to removing the skip we have in
configure:

  case $target_arch in
i386|x86_64)
  if $target_cc --version | grep -qi "clang"; then
continue
  fi
  ;;
  esac

So that we can build tcg tests on the BSDs (at least for native
guests) and widen the testing. The work is unfinished but anyone wants
to tackle it just patch out the above bit in configure and try and
build and run check-tcg.

Plugins

  - reduce the total number of compilation units

I'll see if I can add a test to track the total number as we aim to
clean-up stuff for single binary purposes.

MAINTAINERS

  - updates due to sanctions

I should send out the pre-PR sometime next week, hopefully getting my
10.0 PR submitted with time to spare ;-)

The following still need review:

  plugins/api: split out binary path/start/end/entry code
  plugins/api: use qemu_target_page_mask() to get value
  tests/tcg: enable -fwrapv for test-i386-bmi
  tests/tcg: fix constraints in test-i386-adcox
  tests/tcg: add message to _Static_assert in test-avx
  tests/tcg: mark test-vma as a linux-only test
  tests/vm: bump timeout for shutdown
  libvirt-ci: bump to latest for vulkan-tools (libvirt MR 525)
  tests/functional: skip vulkan tests with nVidia
  tests/functional: expand tests to cover virgl
  tests/functional: ensure we have a GPU device for tests
  tests/functional: factor out common code in gpu test
  tests/functional: move aarch64 GPU test into own file

Alex.

Alex Bennée (22):
  tests/functional: move aarch64 GPU test into own file
  tests/functional: factor out common code in gpu test
  tests/functional: ensure we have a GPU device for tests
  tests/functional: expand tests to cover virgl
  tests/functional: skip vulkan tests with nVidia
  libvirt-ci: bump to latest for vulkan-tools (libvirt MR 525)
  tests/vm: bump timeout for shutdown
  tests/tcg: mark test-vma as a linux-only test
  tests/tcg: add message to _Static_assert in test-avx
  tests/tcg: fix constraints in test-i386-adcox
  tests/tcg: enable -fwrapv for test-i386-bmi
  plugins/api: use qemu_target_page_mask() to get value
  plugins/loader: populate target_name with target_name()
  include/qemu: plugin-memory.h doesn't need cpu-defs.h
  plugins/api: clean-up the includes
  plugins/plugin.h: include queue.h
  plugins/loader: compile loader only once
  plugins/api: split out binary path/start/end/entry code
  plugins/api: split out the vaddr/hwaddr helpers
  plugins/api: split out time control helpers
  plugins/api: build only once
  MAINTAINERS: remove widely sanctioned entities

Philippe Mathieu-Daudé (2):
  tests/functional: Introduce the dso_suffix() helper
  tests/functional: Allow running TCG plugins tests on non-Linux/BSD
hosts

Pierrick Bouvier (1):
  plugins: add explicit dependency in functional tests

 MAINTAINERS   |   7 +-
 meson.build   |   1 +
 include/qemu/plugin-memory.h  |   1 -
 plugins/plugin.h  |   7 +
 bsd-user/plugin-api.c |  15 ++
 linux-user/plugin-api.c   |  14 ++
 plugins/api-system.c  | 131 ++
 plugins/api-user.c|  57 ++
 plugins/api.c | 171 +-
 plugins/loader.c  |  15 +-
 plugins/system.c  |  24 +++
 plugins/user.c|  19 ++
 tests/tcg/i386/test-avx.c |   2 +-
 tests/tcg/i386/test-i386-adcox.c  |   4 +-
 tests/tcg/multiarch/{ => linux}/test-vma.c|   0
 common-user/plugin-api.c.inc  |  43 +
 .gitlab-ci.d/cirrus/freebsd-14.vars   |   2 +-
 .gitlab-ci.d/cirrus/macos-14.vars |   2 +-
 bsd-user/meson.build  |   1 +
 contrib/plugins/meson.build   |   2 +
 linux-user/meson.build|   1 +
 plugins/meson.build   |   8 +-
 .../ci/setup/ubuntu/ubuntu-2204-aarch64.yaml  |   1 +
 .../ci/setup/ubuntu/ubuntu-2204-s390x.yaml|   1 +
 tests/docker/dockerfiles/alpine.docker|   5 +-
 tests/docker/dockerfiles/centos9.docker   |   1 +
 .../dockerfiles/debian-amd64-cross.docker |   3 +-
 .../dockerfiles/debian-arm64-cross.docker |   3 +-
 .../dockerfiles/debian-armhf-cross.docker |   3 +-
 .../dockerfiles/debian-i686-cross.docker  |   3 +-
 .../dockerfiles/debian-mips64el-cross.docker  |   3 +-
 .../dockerfiles/debian-m

[PATCH 15/25] plugins/api: use qemu_target_page_mask() to get value

Requiring TARGET_PAGE_MASK to be defined gets in the way of building
this unit once. qemu_target_page_mask() will tell us what it is.

Signed-off-by: Alex Bennée 
Message-Id: <20250225110844.3296991-2-alex.ben...@linaro.org>

---
v2
  - use the proper qemu_target_page_mask() api
---
 plugins/api.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/plugins/api.c b/plugins/api.c
index cf8cdf076a..fa4d495277 100644
--- a/plugins/api.c
+++ b/plugins/api.c
@@ -43,6 +43,7 @@
 #include "tcg/tcg.h"
 #include "exec/exec-all.h"
 #include "exec/gdbstub.h"
+#include "exec/target_page.h"
 #include "exec/translation-block.h"
 #include "exec/translator.h"
 #include "disas/disas.h"
@@ -287,7 +288,7 @@ uint64_t qemu_plugin_insn_vaddr(const struct 
qemu_plugin_insn *insn)
 void *qemu_plugin_insn_haddr(const struct qemu_plugin_insn *insn)
 {
 const DisasContextBase *db = tcg_ctx->plugin_db;
-vaddr page0_last = db->pc_first | ~TARGET_PAGE_MASK;
+vaddr page0_last = db->pc_first | ~qemu_target_page_mask();
 
 if (db->fake_insn) {
 return NULL;
-- 
2.39.5

[PATCH 10/25] tests/vm: bump timeout for shutdown

On my fairly beefy machine the timeout was triggering leaving a
corrupted disk image due to power being pulled before the disk had
synced. Double the timeout to avoid this.

Signed-off-by: Alex Bennée 
---
 tests/vm/basevm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/vm/basevm.py b/tests/vm/basevm.py
index 6d41ac7574..9e879e966a 100644
--- a/tests/vm/basevm.py
+++ b/tests/vm/basevm.py
@@ -83,7 +83,7 @@ class BaseVM(object):
 # command to halt the guest, can be overridden by subclasses
 poweroff = "poweroff"
 # Time to wait for shutdown to finish.
-shutdown_timeout_default = 30
+shutdown_timeout_default = 90
 # enable IPv6 networking
 ipv6 = True
 # This is the timeout on the wait for console bytes.
-- 
2.39.5

[PATCH 13/25] tests/tcg: fix constraints in test-i386-adcox

Clang complains:

  clang -O2 -m64 -mcx16 
/home/alex/lsrc/qemu.git/tests/tcg/i386/test-i386-adcox.c -o test-i386-adcox 
-static
  /home/alex/lsrc/qemu.git/tests/tcg/i386/test-i386-adcox.c:32:26: error: 
invalid input constraint '0' in asm
  : "r" ((REG)-1), "0" (flags), "1" (out_adcx), "2" (out_adox));
   ^
  /home/alex/lsrc/qemu.git/tests/tcg/i386/test-i386-adcox.c:57:26: error: 
invalid input constraint '0' in asm
  : "r" ((REG)-1), "0" (flags), "1" (out_adcx), "2" (out_adox));
   ^
  2 errors generated.

Pointing out a numbered input constraint can't point to a read/write
output [1]. Convert to a read-only input constraint to allow this.

[1] 
https://lists.llvm.org/pipermail/cfe-commits/Week-of-Mon-20101101/036036.html

Signed-off-by: Alex Bennée 
Suggested-by: Daniel P. Berrangé 
---
 tests/tcg/i386/test-i386-adcox.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/tcg/i386/test-i386-adcox.c b/tests/tcg/i386/test-i386-adcox.c
index 16169efff8..b56dbc00f2 100644
--- a/tests/tcg/i386/test-i386-adcox.c
+++ b/tests/tcg/i386/test-i386-adcox.c
@@ -28,7 +28,7 @@ void test_adox_adcx(uint32_t in_c, uint32_t in_o, REG 
adcx_operand, REG adox_ope
 "adox %3, %2;"
 "adcx %3, %1;"
 "pushf; pop %0"
-: "+r" (flags), "+r" (out_adcx), "+r" (out_adox)
+: "=r"(flags), "=r"(out_adcx), "=r"(out_adox)
 : "r" ((REG)-1), "0" (flags), "1" (out_adcx), "2" (out_adox));
 
 assert(out_adcx == in_c + adcx_operand - 1);
@@ -53,7 +53,7 @@ void test_adcx_adox(uint32_t in_c, uint32_t in_o, REG 
adcx_operand, REG adox_ope
 "adcx %3, %1;"
 "adox %3, %2;"
 "pushf; pop %0"
-: "+r" (flags), "+r" (out_adcx), "+r" (out_adox)
+: "=r"(flags), "=r"(out_adcx), "=r"(out_adox)
 : "r" ((REG)-1), "0" (flags), "1" (out_adcx), "2" (out_adox));
 
 assert(out_adcx == in_c + adcx_operand - 1);
-- 
2.39.5

[PATCH 01/25] tests/functional: move aarch64 GPU test into own file

I want to expand the number of tests to cover a wide range of
configurations. That starts with splitting off from the normal virt
test from which it doesn't really share much code.

Signed-off-by: Alex Bennée 
Message-Id: <20250219150009.1662688-2-alex.ben...@linaro.org>
---
 tests/functional/meson.build  |   2 +
 tests/functional/test_aarch64_virt.py |  71 ---
 tests/functional/test_aarch64_virt_gpu.py | 102 ++
 3 files changed, 104 insertions(+), 71 deletions(-)
 create mode 100755 tests/functional/test_aarch64_virt_gpu.py

diff --git a/tests/functional/meson.build b/tests/functional/meson.build
index 111d8bab26..c12ee1ce41 100644
--- a/tests/functional/meson.build
+++ b/tests/functional/meson.build
@@ -19,6 +19,7 @@ test_timeouts = {
   'aarch64_sbsaref_freebsd' : 720,
   'aarch64_tuxrun' : 240,
   'aarch64_virt' : 720,
+  'aarch64_virt_gpu' : 720,
   'acpi_bits' : 420,
   'arm_aspeed_palmetto' : 120,
   'arm_aspeed_romulus' : 120,
@@ -78,6 +79,7 @@ tests_aarch64_system_thorough = [
   'aarch64_tcg_plugins',
   'aarch64_tuxrun',
   'aarch64_virt',
+  'aarch64_virt_gpu',
   'aarch64_xen',
   'aarch64_xlnx_versal',
   'multiprocess',
diff --git a/tests/functional/test_aarch64_virt.py 
b/tests/functional/test_aarch64_virt.py
index 95f5ce8b4c..884aad7af6 100755
--- a/tests/functional/test_aarch64_virt.py
+++ b/tests/functional/test_aarch64_virt.py
@@ -134,77 +134,6 @@ def test_aarch64_virt_gicv2(self):
 self.common_aarch64_virt("virt,gic-version=2")
 
 
-ASSET_VIRT_GPU_KERNEL = Asset(
-'https://fileserver.linaro.org/s/ce5jXBFinPxtEdx/'
-'download?path=%2F&files='
-'Image',
-'89e5099d26166204cc5ca4bb6d1a11b92c217e1f82ec67e3ba363d09157462f6')
-
-ASSET_VIRT_GPU_ROOTFS = Asset(
-'https://fileserver.linaro.org/s/ce5jXBFinPxtEdx/'
-'download?path=%2F&files='
-'rootfs.ext4.zstd',
-'792da7573f5dc2913ddb7c638151d4a6b2d028a4cb2afb38add513c1924bdad4')
-
-@skipIfMissingCommands('zstd')
-def test_aarch64_virt_with_gpu(self):
-# This tests boots with a buildroot test image that contains
-# vkmark and other GPU exercising tools. We run a headless
-# weston that nevertheless still exercises the virtio-gpu
-# backend.
-
-self.set_machine('virt')
-self.require_accelerator("tcg")
-
-kernel_path = self.ASSET_VIRT_GPU_KERNEL.fetch()
-image_path = self.uncompress(self.ASSET_VIRT_GPU_ROOTFS, format="zstd")
-
-self.vm.set_console()
-kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE +
-   'console=ttyAMA0 root=/dev/vda')
-
-self.vm.add_args("-accel", "tcg")
-self.vm.add_args("-cpu", "neoverse-v1,pauth-impdef=on")
-self.vm.add_args("-machine", "virt,gic-version=max",
- '-kernel', kernel_path,
- '-append', kernel_command_line)
-self.vm.add_args("-smp", "2", "-m", "2048")
-self.vm.add_args("-device",
- "virtio-gpu-gl-pci,hostmem=4G,blob=on,venus=on")
-self.vm.add_args("-display", "egl-headless")
-self.vm.add_args("-display", "dbus,gl=on")
-self.vm.add_args("-device", "virtio-blk-device,drive=hd0")
-self.vm.add_args("-blockdev",
- "driver=raw,file.driver=file,"
- "node-name=hd0,read-only=on,"
- f"file.filename={image_path}")
-self.vm.add_args("-snapshot")
-
-try:
-self.vm.launch()
-except VMLaunchFailure as excp:
-if "old virglrenderer, blob resources unsupported" in excp.output:
-self.skipTest("No blob support for virtio-gpu")
-elif "old virglrenderer, venus unsupported" in excp.output:
-self.skipTest("No venus support for virtio-gpu")
-elif "egl: no drm render node available" in excp.output:
-self.skipTest("Can't access host DRM render node")
-elif "'type' does not accept value 'egl-headless'" in excp.output:
-self.skipTest("egl-headless support is not available")
-else:
-self.log.info(f"unhandled launch failure: {excp.output}")
-raise excp
-
-self.wait_for_console_pattern('buildroot login:')
-exec_command(self, 'root')
-exec_command(self, 'export XDG_RUNTIME_DIR=/tmp')
-exec_command_and_wait_for_pattern(self,
-  "weston -B headless "
-  "--renderer gl "
-  "--shell kiosk "
-  "-- vkmark -b:duration=1.0",
-  "vkmark Score")
-
 
 if __name__ == '__main__':
 QemuSystemTest.main()
diff --git a/tests/functional/test_aarch64_virt_gpu.py 
b/tests/functional/test_a

[PATCH 24/25] plugins/api: build only once

Now all the softmmu/user-mode stuff has been split out we can build
this compilation unit only once.

Signed-off-by: Alex Bennée 
Reviewed-by: Richard Henderson 
Message-Id: <20250225110844.3296991-11-alex.ben...@linaro.org>
---
 plugins/api.c   | 11 ---
 plugins/meson.build |  3 +--
 2 files changed, 1 insertion(+), 13 deletions(-)

diff --git a/plugins/api.c b/plugins/api.c
index 832bf6ee5e..604ce06802 100644
--- a/plugins/api.c
+++ b/plugins/api.c
@@ -46,17 +46,6 @@
 #include "exec/translator.h"
 #include "disas/disas.h"
 #include "plugin.h"
-#ifndef CONFIG_USER_ONLY
-#include "qapi/error.h"
-#include "migration/blocker.h"
-#include "qemu/plugin-memory.h"
-#include "hw/boards.h"
-#else
-#include "qemu.h"
-#ifdef CONFIG_LINUX
-#include "loader.h"
-#endif
-#endif
 
 /* Uninstall and Reset handlers */
 
diff --git a/plugins/meson.build b/plugins/meson.build
index 942b59e904..d27220d5ff 100644
--- a/plugins/meson.build
+++ b/plugins/meson.build
@@ -61,9 +61,8 @@ endif
 user_ss.add(files('user.c', 'api-user.c'))
 system_ss.add(files('system.c', 'api-system.c'))
 
-common_ss.add(files('loader.c'))
+common_ss.add(files('loader.c', 'api.c'))
 
 specific_ss.add(files(
   'core.c',
-  'api.c',
 ))
-- 
2.39.5

[PATCH 14/25] tests/tcg: enable -fwrapv for test-i386-bmi

We allow things like:

  tests/tcg/i386/test-i386-bmi2.c:124:35: warning: shifting a negative signed 
value is undefined [-Wshift-negative-value]
  assert(result == (mask & ~(-1 << 30)));

in the main code, so allow it for the test.

Signed-off-by: Alex Bennée 
---
 tests/tcg/i386/Makefile.target | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/tcg/i386/Makefile.target b/tests/tcg/i386/Makefile.target
index bbe2c44b2a..f1df40411b 100644
--- a/tests/tcg/i386/Makefile.target
+++ b/tests/tcg/i386/Makefile.target
@@ -22,7 +22,7 @@ run-test-i386-sse-exceptions: QEMU_OPTS += -cpu max
 test-i386-pcmpistri: CFLAGS += -msse4.2
 run-test-i386-pcmpistri: QEMU_OPTS += -cpu max
 
-test-i386-bmi2: CFLAGS=-O2
+test-i386-bmi2: CFLAGS=-O2 -fwrapv
 run-test-i386-bmi2: QEMU_OPTS += -cpu max
 
 test-i386-adcox: CFLAGS=-O2
-- 
2.39.5

[PATCH 06/25] tests/functional: Introduce the dso_suffix() helper

From: Philippe Mathieu-Daudé 

Introduce a helper to get the default shared library
suffix used on the host.

Suggested-by: Pierrick Bouvier 
Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Pierrick Bouvier 
Reviewed-by: Thomas Huth 
Message-Id: <20250220080215.49165-3-phi...@linaro.org>
[AJB: dropped whitespace cmd.py damage]
Signed-off-by: Alex Bennée 
---
 tests/functional/qemu_test/__init__.py | 2 +-
 tests/functional/qemu_test/config.py   | 6 ++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/tests/functional/qemu_test/__init__.py 
b/tests/functional/qemu_test/__init__.py
index 5c972843a6..45f7befa37 100644
--- a/tests/functional/qemu_test/__init__.py
+++ b/tests/functional/qemu_test/__init__.py
@@ -7,7 +7,7 @@
 
 
 from .asset import Asset
-from .config import BUILD_DIR
+from .config import BUILD_DIR, dso_suffix
 from .cmd import is_readable_executable_file, \
 interrupt_interactive_console_until_pattern, wait_for_console_pattern, \
 exec_command, exec_command_and_wait_for_pattern, get_qemu_img, which
diff --git a/tests/functional/qemu_test/config.py 
b/tests/functional/qemu_test/config.py
index edd75b7fd0..0eab1baa54 100644
--- a/tests/functional/qemu_test/config.py
+++ b/tests/functional/qemu_test/config.py
@@ -13,6 +13,7 @@
 
 import os
 from pathlib import Path
+import platform
 
 
 def _source_dir():
@@ -34,3 +35,8 @@ def _build_dir():
 raise Exception("Cannot identify build dir, set QEMU_BUILD_ROOT")
 
 BUILD_DIR = _build_dir()
+
+def dso_suffix():
+'''Return the dynamic libraries suffix for the current platform'''
+DSO_SUFFIXES = { 'Linux': 'so', 'Darwin': 'dylib', 'Windows': 'dll' }
+return DSO_SUFFIXES[platform.system()]
-- 
2.39.5

[PATCH 02/25] tests/functional: factor out common code in gpu test

In preparation for handling more tests split out the common machine
setup details from the test specific stuff.

Signed-off-by: Alex Bennée 
Message-Id: <20250219150009.1662688-3-alex.ben...@linaro.org>
---
 tests/functional/test_aarch64_virt_gpu.py | 30 +++
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/tests/functional/test_aarch64_virt_gpu.py 
b/tests/functional/test_aarch64_virt_gpu.py
index f21ae18392..06093c6b60 100755
--- a/tests/functional/test_aarch64_virt_gpu.py
+++ b/tests/functional/test_aarch64_virt_gpu.py
@@ -39,12 +39,7 @@ def wait_for_console_pattern(self, success_message, vm=None):
 'rootfs.ext4.zstd',
 '792da7573f5dc2913ddb7c638151d4a6b2d028a4cb2afb38add513c1924bdad4')
 
-@skipIfMissingCommands('zstd')
-def test_aarch64_virt_with_vulkan_gpu(self):
-# This tests boots with a buildroot test image that contains
-# vkmark and other GPU exercising tools. We run a headless
-# weston that nevertheless still exercises the virtio-gpu
-# backend.
+def _run_virt_gpu_test(self, gpu_device,  weston_cmd, weston_pattern):
 
 self.set_machine('virt')
 self.require_accelerator("tcg")
@@ -62,10 +57,10 @@ def test_aarch64_virt_with_vulkan_gpu(self):
  '-kernel', kernel_path,
  '-append', kernel_command_line)
 self.vm.add_args("-smp", "2", "-m", "2048")
-self.vm.add_args("-device",
- "virtio-gpu-gl-pci,hostmem=4G,blob=on,venus=on")
-self.vm.add_args("-display", "egl-headless")
-self.vm.add_args("-display", "dbus,gl=on")
+self.vm.add_args("-device", gpu_device)
+for opt in ["egl-headless", "dbus,gl=on"]:
+self.vm.add_args("-display", opt)
+
 self.vm.add_args("-device", "virtio-blk-device,drive=hd0")
 self.vm.add_args("-blockdev",
  "driver=raw,file.driver=file,"
@@ -91,12 +86,15 @@ def test_aarch64_virt_with_vulkan_gpu(self):
 self.wait_for_console_pattern('buildroot login:')
 exec_command(self, 'root')
 exec_command(self, 'export XDG_RUNTIME_DIR=/tmp')
-exec_command_and_wait_for_pattern(self,
-  "weston -B headless "
-  "--renderer gl "
-  "--shell kiosk "
-  "-- vkmark -b:duration=1.0",
-  "vkmark Score")
+full_cmd = f"weston -B headless --renderer gl --shell kiosk -- 
{weston_cmd}"
+exec_command_and_wait_for_pattern(self, full_cmd, weston_pattern)
+
+@skipIfMissingCommands('zstd')
+def test_aarch64_virt_with_vulkan_gpu(self):
+gpu_device = "virtio-gpu-gl-pci,hostmem=4G,blob=on,venus=on"
+weston_cmd = "vkmark -b:duration=1.0"
+weston_pattern = "vkmark Score"
+self._run_virt_gpu_test(gpu_device, weston_cmd, weston_pattern)
 
 if __name__ == '__main__':
 QemuSystemTest.main()
-- 
2.39.5

[PATCH 25/25] MAINTAINERS: remove widely sanctioned entities

The following organisations appear on the US sanctions list:

  Yadro: https://sanctionssearch.ofac.treas.gov/Details.aspx?id=41125
  ISPRAS: https://sanctionssearch.ofac.treas.gov/Details.aspx?id=50890

As a result maintainers interacting with such entities would face
legal risk in a number of jurisdictions. To reduce the risk of
inadvertent non-compliance remove entries from these organisations
from the MAINTAINERS file.

Mark the pcf8574 system as orphaned until someone volunteers to step
up as a maintainer. Add myself as a second reviewer to record/replay
so I can help with what odd fixes I can.

Reviewed-by: Markus Armbruster 
Reviewed-by: Daniel P. Berrangé 
Acked-by: Paolo Bonzini 
Signed-off-by: Alex Bennée 
Message-Id: <20250221161443.2321327-1-alex.ben...@linaro.org>
---
 MAINTAINERS | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 1911949526..9cdfe19115 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2529,8 +2529,7 @@ F: hw/i2c/i2c_mux_pca954x.c
 F: include/hw/i2c/i2c_mux_pca954x.h
 
 pcf8574
-M: Dmitrii Sharikhin 
-S: Maintained
+S: Orphaned
 F: hw/gpio/pcf8574.c
 F: include/gpio/pcf8574.h
 
@@ -3627,10 +3626,10 @@ F: net/filter-mirror.c
 F: tests/qtest/test-filter*
 
 Record/replay
-M: Pavel Dovgalyuk 
 R: Paolo Bonzini 
+R: Alex Bennée 
 W: https://wiki.qemu.org/Features/record-replay
-S: Supported
+S: Odd Fixes
 F: replay/*
 F: block/blkreplay.c
 F: net/filter-replay.c
-- 
2.39.5

[PATCH 07/25] tests/functional: Allow running TCG plugins tests on non-Linux/BSD hosts

From: Philippe Mathieu-Daudé 

Not all platforms use the '.so' suffix for shared libraries,
which is how plugins are built. Use the recently introduced
dso_suffix() helper to get the proper host suffix.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2804
Suggested-by: Pierrick Bouvier 
Suggested-by: Daniel P. Berrangé 
Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Pierrick Bouvier 
Reviewed-by: Thomas Huth 
Message-Id: <20250220080215.49165-4-phi...@linaro.org>
Signed-off-by: Alex Bennée 
---
 tests/functional/test_aarch64_tcg_plugins.py | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/tests/functional/test_aarch64_tcg_plugins.py 
b/tests/functional/test_aarch64_tcg_plugins.py
index 7e8beacc83..5736b60545 100755
--- a/tests/functional/test_aarch64_tcg_plugins.py
+++ b/tests/functional/test_aarch64_tcg_plugins.py
@@ -13,10 +13,11 @@
 
 import tempfile
 import mmap
+import os
 import re
 
 from qemu.machine.machine import VMLaunchFailure
-from qemu_test import LinuxKernelTest, Asset
+from qemu_test import LinuxKernelTest, Asset, dso_suffix
 
 
 class PluginKernelBase(LinuxKernelTest):
@@ -62,6 +63,10 @@ class PluginKernelNormal(PluginKernelBase):
 ('https://storage.tuxboot.com/20230331/arm64/Image'),
 'ce95a7101a5fecebe0fe630deee6bd97b32ba41bc8754090e9ad8961ea8674c7')
 
+def plugin_file(self, plugin_name):
+sfx = dso_suffix()
+return os.path.join('tests', 'tcg', 'plugins', f'{plugin_name}.{sfx}')
+
 def test_aarch64_virt_insn(self):
 self.set_machine('virt')
 self.cpu='cortex-a53'
@@ -74,7 +79,7 @@ def test_aarch64_virt_insn(self):
  suffix=".log")
 
 self.run_vm(kernel_path, kernel_command_line,
-"tests/tcg/plugins/libinsn.so", plugin_log.name,
+self.plugin_file('libinsn'), plugin_log.name,
 console_pattern)
 
 with plugin_log as lf, \
@@ -100,7 +105,7 @@ def test_aarch64_virt_insn_icount(self):
  suffix=".log")
 
 self.run_vm(kernel_path, kernel_command_line,
-"tests/tcg/plugins/libinsn.so", plugin_log.name,
+self.plugin_file('libinsn'), plugin_log.name,
 console_pattern,
 args=('-icount', 'shift=1'))
 
-- 
2.39.5

[PATCH 09/25] libvirt-ci: bump to latest for vulkan-tools (libvirt MR 525)

The alpine baseline has also been updated in the meantime so we need
to address that while we are at it.

Signed-off-by: Alex Bennée 
---
 .gitlab-ci.d/cirrus/freebsd-14.vars   | 2 +-
 .gitlab-ci.d/cirrus/macos-14.vars | 2 +-
 scripts/ci/setup/ubuntu/ubuntu-2204-aarch64.yaml  | 1 +
 scripts/ci/setup/ubuntu/ubuntu-2204-s390x.yaml| 1 +
 tests/docker/dockerfiles/alpine.docker| 5 +++--
 tests/docker/dockerfiles/centos9.docker   | 1 +
 tests/docker/dockerfiles/debian-amd64-cross.docker| 3 ++-
 tests/docker/dockerfiles/debian-arm64-cross.docker| 3 ++-
 tests/docker/dockerfiles/debian-armhf-cross.docker| 3 ++-
 tests/docker/dockerfiles/debian-i686-cross.docker | 3 ++-
 tests/docker/dockerfiles/debian-mips64el-cross.docker | 3 ++-
 tests/docker/dockerfiles/debian-mipsel-cross.docker   | 3 ++-
 tests/docker/dockerfiles/debian-ppc64el-cross.docker  | 3 ++-
 tests/docker/dockerfiles/debian-s390x-cross.docker| 3 ++-
 tests/docker/dockerfiles/debian.docker| 3 ++-
 tests/docker/dockerfiles/fedora-rust-nightly.docker   | 1 +
 tests/docker/dockerfiles/fedora-win64-cross.docker| 1 +
 tests/docker/dockerfiles/fedora.docker| 1 +
 tests/docker/dockerfiles/opensuse-leap.docker | 1 +
 tests/docker/dockerfiles/ubuntu2204.docker| 1 +
 tests/lcitool/libvirt-ci  | 2 +-
 tests/lcitool/projects/qemu.yml   | 1 +
 tests/lcitool/refresh | 2 +-
 tests/vm/generated/freebsd.json   | 1 +
 24 files changed, 35 insertions(+), 15 deletions(-)

diff --git a/.gitlab-ci.d/cirrus/freebsd-14.vars 
b/.gitlab-ci.d/cirrus/freebsd-14.vars
index 0997c47af5..19ca0d3663 100644
--- a/.gitlab-ci.d/cirrus/freebsd-14.vars
+++ b/.gitlab-ci.d/cirrus/freebsd-14.vars
@@ -11,6 +11,6 @@ MAKE='/usr/local/bin/gmake'
 NINJA='/usr/local/bin/ninja'
 PACKAGING_COMMAND='pkg'
 PIP3='/usr/local/bin/pip'
-PKGS='alsa-lib bash bison bzip2 ca_root_nss capstone4 ccache4 cmocka ctags 
curl cyrus-sasl dbus diffutils dtc flex fusefs-libs3 gettext git glib gmake 
gnutls gsed gtk-vnc gtk3 json-c libepoxy libffi libgcrypt libjpeg-turbo libnfs 
libslirp libspice-server libssh libtasn1 llvm lzo2 meson mtools ncurses nettle 
ninja opencv pixman pkgconf png py311-numpy py311-pillow py311-pip py311-pyyaml 
py311-sphinx py311-sphinx_rtd_theme py311-tomli python3 rpm2cpio rust 
rust-bindgen-cli sdl2 sdl2_image snappy sndio socat spice-protocol tesseract 
usbredir virglrenderer vte3 xorriso zstd'
+PKGS='alsa-lib bash bison bzip2 ca_root_nss capstone4 ccache4 cmocka ctags 
curl cyrus-sasl dbus diffutils dtc flex fusefs-libs3 gettext git glib gmake 
gnutls gsed gtk-vnc gtk3 json-c libepoxy libffi libgcrypt libjpeg-turbo libnfs 
libslirp libspice-server libssh libtasn1 llvm lzo2 meson mtools ncurses nettle 
ninja opencv pixman pkgconf png py311-numpy py311-pillow py311-pip py311-pyyaml 
py311-sphinx py311-sphinx_rtd_theme py311-tomli python3 rpm2cpio rust 
rust-bindgen-cli sdl2 sdl2_image snappy sndio socat spice-protocol tesseract 
usbredir virglrenderer vte3 vulkan-tools xorriso zstd'
 PYPI_PKGS=''
 PYTHON='/usr/local/bin/python3'
diff --git a/.gitlab-ci.d/cirrus/macos-14.vars 
b/.gitlab-ci.d/cirrus/macos-14.vars
index 25dff322e6..b039465f56 100644
--- a/.gitlab-ci.d/cirrus/macos-14.vars
+++ b/.gitlab-ci.d/cirrus/macos-14.vars
@@ -11,6 +11,6 @@ MAKE='/opt/homebrew/bin/gmake'
 NINJA='/opt/homebrew/bin/ninja'
 PACKAGING_COMMAND='brew'
 PIP3='/opt/homebrew/bin/pip3'
-PKGS='bash bc bindgen bison bzip2 capstone ccache cmocka ctags curl dbus 
diffutils dtc flex gcovr gettext git glib gnu-sed gnutls gtk+3 gtk-vnc jemalloc 
jpeg-turbo json-c libcbor libepoxy libffi libgcrypt libiscsi libnfs libpng 
libslirp libssh libtasn1 libusb llvm lzo make meson mtools ncurses nettle ninja 
pixman pkg-config python3 rpm2cpio rust sdl2 sdl2_image snappy socat sparse 
spice-protocol swtpm tesseract usbredir vde vte3 xorriso zlib zstd'
+PKGS='bash bc bindgen bison bzip2 capstone ccache cmocka ctags curl dbus 
diffutils dtc flex gcovr gettext git glib gnu-sed gnutls gtk+3 gtk-vnc jemalloc 
jpeg-turbo json-c libcbor libepoxy libffi libgcrypt libiscsi libnfs libpng 
libslirp libssh libtasn1 libusb llvm lzo make meson mtools ncurses nettle ninja 
pixman pkg-config python3 rpm2cpio rust sdl2 sdl2_image snappy socat sparse 
spice-protocol swtpm tesseract usbredir vde vte3 vulkan-tools xorriso zlib zstd'
 PYPI_PKGS='PyYAML numpy pillow sphinx sphinx-rtd-theme tomli'
 PYTHON='/opt/homebrew/bin/python3'
diff --git a/scripts/ci/setup/ubuntu/ubuntu-2204-aarch64.yaml 
b/scripts/ci/setup/ubuntu/ubuntu-2204-aarch64.yaml
index 288156d1e4..dbcd2e076d 100644
--- a/scripts/ci/setup/ubuntu/ubuntu-2204-aarch64.yaml
+++ b/scripts/ci/setup/ubuntu/ubuntu-2204-aarch64.yaml
@@ -123,6 +123,7 @@ packages:
   - tar
   - tesseract-ocr
   - tesseract-ocr-eng
+  - vulkan-tools
   - xorriso
   - zlib1

[PATCH 03/25] tests/functional: ensure we have a GPU device for tests

It's possible to build QEMU without support for the GL enabled GPU
devices and we can catch that earlier with an explicit check.

Signed-off-by: Alex Bennée 
Message-Id: <20250219150009.1662688-4-alex.ben...@linaro.org>
---
 tests/functional/test_aarch64_virt_gpu.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/functional/test_aarch64_virt_gpu.py 
b/tests/functional/test_aarch64_virt_gpu.py
index 06093c6b60..c9463d7285 100755
--- a/tests/functional/test_aarch64_virt_gpu.py
+++ b/tests/functional/test_aarch64_virt_gpu.py
@@ -91,6 +91,9 @@ def _run_virt_gpu_test(self, gpu_device,  weston_cmd, 
weston_pattern):
 
 @skipIfMissingCommands('zstd')
 def test_aarch64_virt_with_vulkan_gpu(self):
+
+self.require_device('virtio-gpu-gl-pci')
+
 gpu_device = "virtio-gpu-gl-pci,hostmem=4G,blob=on,venus=on"
 weston_cmd = "vkmark -b:duration=1.0"
 weston_pattern = "vkmark Score"
-- 
2.39.5

Re: [PATCH 4/4] tcg:tlb: use tcg_debug_assert() in assert_cpu_is_self()

On Tue, 25 Feb 2025 12:04:40 -0800
Richard Henderson  wrote:

> On 2/25/25 12:02, Richard Henderson wrote:
> > Not checked here are any of the other reasons a flush might be ok:
> > 
> > (2) The system as a whole is stopped, on the way in from migration/vmload.
> > (3) The cpu is offline, on the way in from poweroff/reset.  
> (4) Running in round-robin mode, so there is *never* a race between cpus.
> 
> Anything else I've forgotten?

looking at x86 reset path
 * we have 2 resets per vcpu from main loop,
   when vcpu is created (at realize time and at system_reset time).
   this probably are nop for tcg as you said.
   (it likely applies to all targets)

 * And also a reset triggered by IPI (run in vcpu thread),
   which likely should do flush to clear whatever context
   vcpu had before reset.

Re: [PATCH V4] migration: ram block cpr blockers

2025-02-26 Thread Steven Sistare


On 2/25/2025 4:10 PM, David Hildenbrand wrote:

+    /* Ram device is remapped in new QEMU */
+    if (memory_region_is_ram_device(mr)) {
+    return true;
+    }
+
+    /* Named files are remapped in new QEMU, same contents if shared (no COW) 
*/
+    if (qemu_ram_is_shared(rb) && qemu_ram_is_named_file(rb)) {
+    return true;
+    }
+
+    /* A file descriptor is remapped in new QEMU */
+    if (rb->fd >= 0 && qemu_ram_is_shared(rb)) {
+    return true;
+    }



Sorry, I was not fast enough to reply to your v3 reply.

This is now essentially:

if (qemu_ram_is_shared(rb) &&
     (qemu_ram_is_named_file(rb) || rb->fd >= 0)) {
 return true;
}

But what is the purpose of the "name file" check then, if rb->fd essentially 
allows for any files?

So either the "fd >= 0" check is insufficient or the qemu_ram_is_named_file() 
check us superfluous.


Yup, qemu_ram_is_named_file always has an fd at this point, so that check is 
gratuitous.
The preservation mechanism is different, which is why I thought of it as a 
different
case, but that is just a commenting issue.

V5 will be:

static bool ram_is_cpr_compatible(RAMBlock *rb)
{
MemoryRegion *mr = rb->mr;

if (!mr || !memory_region_is_ram(mr)) {
return true;
}

/* Ram device is remapped in new QEMU */
if (memory_region_is_ram_device(mr)) {
return true;
}

/*
 * A file descriptor is passed to new QEMU and remapped, or its backing
 * file is reopened and mapped.  It must be shared to avoid COW.
 */
if (rb->fd >= 0 && qemu_ram_is_shared(rb)) {
return true;
}

return false;
}

- Steve

[PATCH 12/25] tests/tcg: add message to _Static_assert in test-avx

In preparation for enabling clang and avoiding:

  error: '_Static_assert' with no message is a C2x extension 
[-Werror,-Wc2x-extensions]

lets just add the message.

Signed-off-by: Alex Bennée 
---
 tests/tcg/i386/test-avx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/tcg/i386/test-avx.c b/tests/tcg/i386/test-avx.c
index 230e6d84b8..43df2226b9 100644
--- a/tests/tcg/i386/test-avx.c
+++ b/tests/tcg/i386/test-avx.c
@@ -244,7 +244,7 @@ v4di indexd = {0x0002ffcdull, 0xfff50010ull,
0x003afff0ull, 0x000eull};
 
 v4di gather_mem[0x20];
-_Static_assert(sizeof(gather_mem) == 1024);
+_Static_assert(sizeof(gather_mem) == 1024, "gather_mem not defined size");
 
 void init_f16reg(v4di *r)
 {
-- 
2.39.5

[PATCH 05/25] plugins: add explicit dependency in functional tests

From: Pierrick Bouvier 

./tests/functional/test_aarch64_tcg_plugins.py needs to have plugin
libinsn built. However, it's not listed as a dependency, so meson can't
know it needs to be built.

Thus, we keep track of all plugins, and add them as an explicit
dependency.

Fixes: 4c134d07b9e ("tests: add a new set of tests to exercise plugins")
Signed-off-by: Pierrick Bouvier 
Tested-by: Philippe Mathieu-Daudé 
Signed-off-by: Philippe Mathieu-Daudé 
Message-Id: <20250220080215.49165-2-phi...@linaro.org>
Signed-off-by: Alex Bennée 
---
 meson.build   | 1 +
 contrib/plugins/meson.build   | 2 ++
 tests/functional/meson.build  | 2 +-
 tests/tcg/plugins/meson.build | 2 ++
 4 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/meson.build b/meson.build
index 0ee79c664d..4588bfd864 100644
--- a/meson.build
+++ b/meson.build
@@ -3657,6 +3657,7 @@ qtest_module_ss = ss.source_set()
 
 modules = {}
 target_modules = {}
+plugin_modules = []
 hw_arch = {}
 target_arch = {}
 target_system_arch = {}
diff --git a/contrib/plugins/meson.build b/contrib/plugins/meson.build
index 484b9a808c..fa8a426c8b 100644
--- a/contrib/plugins/meson.build
+++ b/contrib/plugins/meson.build
@@ -26,3 +26,5 @@ if t.length() > 0
 else
   run_target('contrib-plugins', command: find_program('true'))
 endif
+
+plugin_modules += t
diff --git a/tests/functional/meson.build b/tests/functional/meson.build
index c12ee1ce41..ea4b12c054 100644
--- a/tests/functional/meson.build
+++ b/tests/functional/meson.build
@@ -366,7 +366,7 @@ foreach speed : ['quick', 'thorough']
   # 'run_target' logic below & in Makefile.include
   test('func-' + testname,
python,
-   depends: [test_deps, test_emulator, emulator_modules],
+   depends: [test_deps, test_emulator, emulator_modules, 
plugin_modules],
env: test_env,
args: [testpath],
protocol: 'tap',
diff --git a/tests/tcg/plugins/meson.build b/tests/tcg/plugins/meson.build
index 87a17d67bd..c8cb0626a6 100644
--- a/tests/tcg/plugins/meson.build
+++ b/tests/tcg/plugins/meson.build
@@ -19,3 +19,5 @@ if t.length() > 0
 else
   run_target('test-plugins', command: find_program('true'))
 endif
+
+plugin_modules += t
-- 
2.39.5

[PATCH 19/25] plugins/plugin.h: include queue.h

Headers should bring in what they need so don't rely on getting
queue.h by side effects. This will help with clean-ups in the
following patches.

Reviewed-by: Richard Henderson 
Signed-off-by: Alex Bennée 
Message-Id: <20250225110844.3296991-6-alex.ben...@linaro.org>
---
 plugins/plugin.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/plugins/plugin.h b/plugins/plugin.h
index 30e2299a54..9ed20b5c41 100644
--- a/plugins/plugin.h
+++ b/plugins/plugin.h
@@ -13,6 +13,7 @@
 #define PLUGIN_H
 
 #include 
+#include "qemu/queue.h"
 #include "qemu/qht.h"
 
 #define QEMU_PLUGIN_MIN_VERSION 2
-- 
2.39.5

[PATCH 11/25] tests/tcg: mark test-vma as a linux-only test

The main multiarch tests should compile for any POSIX system, however
test-vma's usage of MAP_NORESERVE makes it a linux-only test. Simply
moving the source file is enough for the build logic to skip on BSD's.

Signed-off-by: Alex Bennée 
---
 tests/tcg/multiarch/{ => linux}/test-vma.c | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tests/tcg/multiarch/{ => linux}/test-vma.c (100%)

diff --git a/tests/tcg/multiarch/test-vma.c 
b/tests/tcg/multiarch/linux/test-vma.c
similarity index 100%
rename from tests/tcg/multiarch/test-vma.c
rename to tests/tcg/multiarch/linux/test-vma.c
-- 
2.39.5

[PATCH 21/25] plugins/api: split out binary path/start/end/entry code

To move the main api.c to a single build compilation object we need to
start splitting out user and system specific code. As we need to grob
around host headers we move these particular helpers into the *-user
mode directories.

The binary/start/end/entry helpers are all NOPs for system mode.

While using the plugin-api.c.inc trick means we build for both
linux-user and bsd-user the BSD user-mode command line is still
missing -plugin. This can be enabled once we have reliable check-tcg
tests working for the BSDs.

Signed-off-by: Alex Bennée 
Message-Id: <20250225110844.3296991-8-alex.ben...@linaro.org>

---
v2
  - use common-user/plugin-api.c.inc instead
  - add commentary about state of plugins for BSD user
---
 bsd-user/plugin-api.c| 15 +
 linux-user/plugin-api.c  | 14 
 plugins/api-system.c | 39 
 plugins/api.c| 43 
 common-user/plugin-api.c.inc | 43 
 bsd-user/meson.build |  1 +
 linux-user/meson.build   |  1 +
 plugins/meson.build  |  2 +-
 8 files changed, 114 insertions(+), 44 deletions(-)
 create mode 100644 bsd-user/plugin-api.c
 create mode 100644 linux-user/plugin-api.c
 create mode 100644 plugins/api-system.c
 create mode 100644 common-user/plugin-api.c.inc

diff --git a/bsd-user/plugin-api.c b/bsd-user/plugin-api.c
new file mode 100644
index 00..6ccef7eaa0
--- /dev/null
+++ b/bsd-user/plugin-api.c
@@ -0,0 +1,15 @@
+/*
+ * QEMU Plugin API - bsd-user-mode only implementations
+ *
+ * Common user-mode only APIs are in plugins/api-user. These helpers
+ * are only specific to bsd-user.
+ *
+ * Copyright (C) 2017, Emilio G. Cota 
+ * Copyright (C) 2019-2025, Linaro
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu.h"
+#include "common-user/plugin-api.c.inc"
diff --git a/linux-user/plugin-api.c b/linux-user/plugin-api.c
new file mode 100644
index 00..e4f796d926
--- /dev/null
+++ b/linux-user/plugin-api.c
@@ -0,0 +1,14 @@
+/*
+ * QEMU Plugin API - linux-user-mode only implementations
+ *
+ * Common user-mode only APIs are in plugins/api-user. These helpers
+ * are only specific to linux-user.
+ *
+ * Copyright (C) 2017, Emilio G. Cota 
+ * Copyright (C) 2019-2025, Linaro
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu.h"
+#include "common-user/plugin-api.c.inc"
diff --git a/plugins/api-system.c b/plugins/api-system.c
new file mode 100644
index 00..cb0dd8f730
--- /dev/null
+++ b/plugins/api-system.c
@@ -0,0 +1,39 @@
+/*
+ * QEMU Plugin API - System specific implementations
+ *
+ * This provides the APIs that have a specific system implementation
+ * or are only relevant to system-mode.
+ *
+ * Copyright (C) 2017, Emilio G. Cota 
+ * Copyright (C) 2019-2025, Linaro
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+#include "qemu/plugin.h"
+
+/*
+ * In system mode we cannot trace the binary being executed so the
+ * helpers all return NULL/0.
+ */
+const char *qemu_plugin_path_to_binary(void)
+{
+return NULL;
+}
+
+uint64_t qemu_plugin_start_code(void)
+{
+return 0;
+}
+
+uint64_t qemu_plugin_end_code(void)
+{
+return 0;
+}
+
+uint64_t qemu_plugin_entry_code(void)
+{
+return 0;
+}
diff --git a/plugins/api.c b/plugins/api.c
index c3ba1e98e8..ffccd71e4b 100644
--- a/plugins/api.c
+++ b/plugins/api.c
@@ -471,49 +471,6 @@ bool qemu_plugin_bool_parse(const char *name, const char 
*value, bool *ret)
 return name && value && qapi_bool_parse(name, value, ret, NULL);
 }
 
-/*
- * Binary path, start and end locations
- */
-const char *qemu_plugin_path_to_binary(void)
-{
-char *path = NULL;
-#ifdef CONFIG_USER_ONLY
-TaskState *ts = get_task_state(current_cpu);
-path = g_strdup(ts->bprm->filename);
-#endif
-return path;
-}
-
-uint64_t qemu_plugin_start_code(void)
-{
-uint64_t start = 0;
-#ifdef CONFIG_USER_ONLY
-TaskState *ts = get_task_state(current_cpu);
-start = ts->info->start_code;
-#endif
-return start;
-}
-
-uint64_t qemu_plugin_end_code(void)
-{
-uint64_t end = 0;
-#ifdef CONFIG_USER_ONLY
-TaskState *ts = get_task_state(current_cpu);
-end = ts->info->end_code;
-#endif
-return end;
-}
-
-uint64_t qemu_plugin_entry_code(void)
-{
-uint64_t entry = 0;
-#ifdef CONFIG_USER_ONLY
-TaskState *ts = get_task_state(current_cpu);
-entry = ts->info->entry;
-#endif
-return entry;
-}
-
 /*
  * Create register handles.
  *
diff --git a/common-user/plugin-api.c.inc b/common-user/plugin-api.c.inc
new file mode 100644
index 00..5b8a1396b6
--- /dev/null
+++ b/common-user/plugin-api.c.inc
@@ -0,0 +1,43 @@
+/*
+ * QEMU Plugin API - *-user-mode only implementations
+ *
+ * Common user-mode only APIs are in plugins/api-user. These helpers
+ * are only specific to the *-user frontends.
+ *
+ * Copyright (C)

[PATCH 08/25] tests/functional: skip vulkan tests with nVidia

While running the new GPU tests it was noted that the proprietary
nVidia driver barfed when run under the sanitiser:

  2025-02-20 11:13:08,226: [11:13:07.782] Output 'headless' attempts
  EOTF mode SDR and colorimetry mode default.
  2025-02-20 11:13:08,227: [11:13:07.784] Output 'headless' using color
  profile: stock sRGB color profile

  and that's the last thing it outputs.

  The sanitizer reports that when the framework sends the SIGTERM
  because of the timeout we get a write to a NULL pointer (but
  interesting not this time in an atexit callback):

  UndefinedBehaviorSanitizer:DEADLYSIGNAL
  ==471863==ERROR: UndefinedBehaviorSanitizer: SEGV on unknown address
  0x (pc 0x7a18ceaafe80 bp 0x sp 0x7ffe8e3ff6d0
  T471863)
  ==471863==The signal is caused by a WRITE memory access.
  ==471863==Hint: address points to the zero page.
  #0 0x7a18ceaafe80
  (/lib/x86_64-linux-gnu/libnvidia-eglcore.so.535.183.01+0x16afe80)
  (BuildId: 24b0d0b90369112e3de888a93eb8d7e00304a6db)
  #1 0x7a18ce9e72c0
  (/lib/x86_64-linux-gnu/libnvidia-eglcore.so.535.183.01+0x15e72c0)
  (BuildId: 24b0d0b90369112e3de888a93eb8d7e00304a6db)
  #2 0x7a18ce9f11bb
  (/lib/x86_64-linux-gnu/libnvidia-eglcore.so.535.183.01+0x15f11bb)
  (BuildId: 24b0d0b90369112e3de888a93eb8d7e00304a6db)
  #3 0x7a18ce6dc9d1
  (/lib/x86_64-linux-gnu/libnvidia-eglcore.so.535.183.01+0x12dc9d1)
  (BuildId: 24b0d0b90369112e3de888a93eb8d7e00304a6db)
  #4 0x7a18e7d15326 in vrend_renderer_create_fence
  
/usr/src/virglrenderer-1.0.0-1ubuntu2/obj-x86_64-linux-gnu/../src/vrend_renderer.c:10883:26
  #5 0x55bfb6621871 in virtio_gpu_virgl_process_cmd

The #dri-devel channel confirmed:

   stsquad: nv driver is known to not work with venus, don't use
  it for testing

So lets skip running the test to avoid known failures.

Reported-by: Peter Maydell 
Signed-off-by: Alex Bennée 
Cc: Dmitry Osipenko 

---
v2
  - implement block at the test level
---
 tests/functional/test_aarch64_virt_gpu.py | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/tests/functional/test_aarch64_virt_gpu.py 
b/tests/functional/test_aarch64_virt_gpu.py
index 7a8471d1ca..a4ae5777e6 100755
--- a/tests/functional/test_aarch64_virt_gpu.py
+++ b/tests/functional/test_aarch64_virt_gpu.py
@@ -9,8 +9,6 @@
 #
 # SPDX-License-Identifier: GPL-2.0-or-later
 
-import logging
-
 from qemu.machine.machine import VMLaunchFailure
 
 from qemu_test import QemuSystemTest, Asset
@@ -18,6 +16,10 @@
 from qemu_test import wait_for_console_pattern
 from qemu_test import skipIfMissingCommands
 
+from re import search
+from subprocess import check_output
+
+
 class Aarch64VirtGPUMachine(QemuSystemTest):
 KERNEL_COMMON_COMMAND_LINE = 'printk.time=0 '
 timeout = 360
@@ -110,10 +112,16 @@ def test_aarch64_virt_with_virgl_blobs_gpu(self):
 self._run_virt_gpu_test(gpu_device, weston_cmd, weston_pattern)
 
 @skipIfMissingCommands('zstd')
+@skipIfMissingCommands('vulkaninfo')
 def test_aarch64_virt_with_vulkan_gpu(self):
 
 self.require_device('virtio-gpu-gl-pci')
 
+vk_info = check_output(["vulkaninfo", "--summary"], encoding="utf-8")
+
+if search(r"driverID\s+=\s+DRIVER_ID_NVIDIA_PROPRIETARY", vk_info):
+self.skipTest("Test skipped on NVIDIA proprietary driver")
+
 gpu_device = "virtio-gpu-gl-pci,hostmem=4G,blob=on,venus=on"
 weston_cmd = "vkmark -b:duration=1.0"
 weston_pattern = "vkmark Score"
-- 
2.39.5

[PATCH 04/25] tests/functional: expand tests to cover virgl

Add two more test modes using glmark2-wayland to exercise the OpenGL
pass-through modes with virgl. Virgl can run with or without the
hostmem blob support.

We might want to eventually add more directed tests and individual
features later on but the glmark/vkmark tests are a good general
smoke test for accelerated 3D.

Signed-off-by: Alex Bennée 
Message-Id: <20250219150009.1662688-5-alex.ben...@linaro.org>
---
 tests/functional/test_aarch64_virt_gpu.py | 20 
 1 file changed, 20 insertions(+)

diff --git a/tests/functional/test_aarch64_virt_gpu.py 
b/tests/functional/test_aarch64_virt_gpu.py
index c9463d7285..7a8471d1ca 100755
--- a/tests/functional/test_aarch64_virt_gpu.py
+++ b/tests/functional/test_aarch64_virt_gpu.py
@@ -89,6 +89,26 @@ def _run_virt_gpu_test(self, gpu_device,  weston_cmd, 
weston_pattern):
 full_cmd = f"weston -B headless --renderer gl --shell kiosk -- 
{weston_cmd}"
 exec_command_and_wait_for_pattern(self, full_cmd, weston_pattern)
 
+@skipIfMissingCommands('zstd')
+def test_aarch64_virt_with_virgl_gpu(self):
+
+self.require_device('virtio-gpu-gl-pci')
+
+gpu_device = "virtio-gpu-gl-pci"
+weston_cmd = "glmark2-wayland -b:duration=1.0"
+weston_pattern = "glmark2 Score"
+self._run_virt_gpu_test(gpu_device, weston_cmd, weston_pattern)
+
+@skipIfMissingCommands('zstd')
+def test_aarch64_virt_with_virgl_blobs_gpu(self):
+
+self.require_device('virtio-gpu-gl-pci')
+
+gpu_device = "virtio-gpu-gl-pci,hostmem=4G,blob=on"
+weston_cmd = "glmark2-wayland -b:duration=1.0"
+weston_pattern = "glmark2 Score"
+self._run_virt_gpu_test(gpu_device, weston_cmd, weston_pattern)
+
 @skipIfMissingCommands('zstd')
 def test_aarch64_virt_with_vulkan_gpu(self):
 
-- 
2.39.5

[PATCH 22/25] plugins/api: split out the vaddr/hwaddr helpers

These only work for system-mode and are NOPs for user-mode.

Reviewed-by: Richard Henderson 
Signed-off-by: Alex Bennée 
Message-Id: <20250225110844.3296991-9-alex.ben...@linaro.org>
---
 plugins/api-system.c | 58 
 plugins/api-user.c   | 40 +
 plugins/api.c| 70 
 plugins/meson.build  |  2 +-
 4 files changed, 99 insertions(+), 71 deletions(-)
 create mode 100644 plugins/api-user.c

diff --git a/plugins/api-system.c b/plugins/api-system.c
index cb0dd8f730..38560de342 100644
--- a/plugins/api-system.c
+++ b/plugins/api-system.c
@@ -12,6 +12,10 @@
 
 #include "qemu/osdep.h"
 #include "qemu/main-loop.h"
+#include "qapi/error.h"
+#include "migration/blocker.h"
+#include "hw/boards.h"
+#include "qemu/plugin-memory.h"
 #include "qemu/plugin.h"
 
 /*
@@ -37,3 +41,57 @@ uint64_t qemu_plugin_entry_code(void)
 {
 return 0;
 }
+
+/*
+ * Virtual Memory queries
+ */
+
+static __thread struct qemu_plugin_hwaddr hwaddr_info;
+
+struct qemu_plugin_hwaddr *qemu_plugin_get_hwaddr(qemu_plugin_meminfo_t info,
+  uint64_t vaddr)
+{
+CPUState *cpu = current_cpu;
+unsigned int mmu_idx = get_mmuidx(info);
+enum qemu_plugin_mem_rw rw = get_plugin_meminfo_rw(info);
+hwaddr_info.is_store = (rw & QEMU_PLUGIN_MEM_W) != 0;
+
+assert(mmu_idx < NB_MMU_MODES);
+
+if (!tlb_plugin_lookup(cpu, vaddr, mmu_idx,
+   hwaddr_info.is_store, &hwaddr_info)) {
+error_report("invalid use of qemu_plugin_get_hwaddr");
+return NULL;
+}
+
+return &hwaddr_info;
+}
+
+bool qemu_plugin_hwaddr_is_io(const struct qemu_plugin_hwaddr *haddr)
+{
+return haddr->is_io;
+}
+
+uint64_t qemu_plugin_hwaddr_phys_addr(const struct qemu_plugin_hwaddr *haddr)
+{
+if (haddr) {
+return haddr->phys_addr;
+}
+return 0;
+}
+
+const char *qemu_plugin_hwaddr_device_name(const struct qemu_plugin_hwaddr *h)
+{
+if (h && h->is_io) {
+MemoryRegion *mr = h->mr;
+if (!mr->name) {
+unsigned maddr = (uintptr_t)mr;
+g_autofree char *temp = g_strdup_printf("anon%08x", maddr);
+return g_intern_string(temp);
+} else {
+return g_intern_string(mr->name);
+}
+} else {
+return g_intern_static_string("RAM");
+}
+}
diff --git a/plugins/api-user.c b/plugins/api-user.c
new file mode 100644
index 00..867b420339
--- /dev/null
+++ b/plugins/api-user.c
@@ -0,0 +1,40 @@
+/*
+ * QEMU Plugin API - user-mode only implementations
+ *
+ * This provides the APIs that have a user-mode specific
+ * implementations or are only relevant to user-mode.
+ *
+ * Copyright (C) 2017, Emilio G. Cota 
+ * Copyright (C) 2019-2025, Linaro
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/plugin.h"
+
+/*
+ * Virtual Memory queries - these are all NOPs for user-mode which
+ * only ever has visibility of virtual addresses.
+ */
+
+struct qemu_plugin_hwaddr *qemu_plugin_get_hwaddr(qemu_plugin_meminfo_t info,
+  uint64_t vaddr)
+{
+return NULL;
+}
+
+bool qemu_plugin_hwaddr_is_io(const struct qemu_plugin_hwaddr *haddr)
+{
+return false;
+}
+
+uint64_t qemu_plugin_hwaddr_phys_addr(const struct qemu_plugin_hwaddr *haddr)
+{
+return 0;
+}
+
+const char *qemu_plugin_hwaddr_device_name(const struct qemu_plugin_hwaddr *h)
+{
+return g_intern_static_string("Invalid");
+}
diff --git a/plugins/api.c b/plugins/api.c
index ffccd71e4b..82241699a5 100644
--- a/plugins/api.c
+++ b/plugins/api.c
@@ -383,76 +383,6 @@ qemu_plugin_mem_value 
qemu_plugin_mem_get_value(qemu_plugin_meminfo_t info)
 return value;
 }
 
-/*
- * Virtual Memory queries
- */
-
-#ifdef CONFIG_SOFTMMU
-static __thread struct qemu_plugin_hwaddr hwaddr_info;
-#endif
-
-struct qemu_plugin_hwaddr *qemu_plugin_get_hwaddr(qemu_plugin_meminfo_t info,
-  uint64_t vaddr)
-{
-#ifdef CONFIG_SOFTMMU
-CPUState *cpu = current_cpu;
-unsigned int mmu_idx = get_mmuidx(info);
-enum qemu_plugin_mem_rw rw = get_plugin_meminfo_rw(info);
-hwaddr_info.is_store = (rw & QEMU_PLUGIN_MEM_W) != 0;
-
-assert(mmu_idx < NB_MMU_MODES);
-
-if (!tlb_plugin_lookup(cpu, vaddr, mmu_idx,
-   hwaddr_info.is_store, &hwaddr_info)) {
-error_report("invalid use of qemu_plugin_get_hwaddr");
-return NULL;
-}
-
-return &hwaddr_info;
-#else
-return NULL;
-#endif
-}
-
-bool qemu_plugin_hwaddr_is_io(const struct qemu_plugin_hwaddr *haddr)
-{
-#ifdef CONFIG_SOFTMMU
-return haddr->is_io;
-#else
-return false;
-#endif
-}
-
-uint64_t qemu_plugin_hwaddr_phys_addr(const struct qemu_plugin_hwaddr *haddr)
-{
-#ifdef CONFIG_SOFTMMU
-if (haddr) {
-return haddr->phys_addr;
-}
-#endif
-return 0;
-}
-
-const ch

[PATCH 20/25] plugins/loader: compile loader only once

There is very little in loader that is different between builds save
for a tiny user/system mode difference in the plugin_info structure.
Create two new files, user and system to hold mode specific helpers
and move loader into common_ss.

Reviewed-by: Richard Henderson 
Signed-off-by: Alex Bennée 
Message-Id: <20250225110844.3296991-7-alex.ben...@linaro.org>
---
 plugins/plugin.h|  6 ++
 plugins/loader.c| 13 ++---
 plugins/system.c| 24 
 plugins/user.c  | 19 +++
 plugins/meson.build |  7 ++-
 5 files changed, 57 insertions(+), 12 deletions(-)
 create mode 100644 plugins/system.c
 create mode 100644 plugins/user.c

diff --git a/plugins/plugin.h b/plugins/plugin.h
index 9ed20b5c41..6fbc443b96 100644
--- a/plugins/plugin.h
+++ b/plugins/plugin.h
@@ -119,4 +119,10 @@ struct qemu_plugin_scoreboard 
*plugin_scoreboard_new(size_t element_size);
 
 void plugin_scoreboard_free(struct qemu_plugin_scoreboard *score);
 
+/**
+ * qemu_plugin_fillin_mode_info() - populate mode specific info
+ * info: pointer to qemu_info_t structure
+ */
+void qemu_plugin_fillin_mode_info(qemu_info_t *info);
+
 #endif /* PLUGIN_H */
diff --git a/plugins/loader.c b/plugins/loader.c
index 827473c8b6..7523d554f0 100644
--- a/plugins/loader.c
+++ b/plugins/loader.c
@@ -31,9 +31,6 @@
 #include "qemu/memalign.h"
 #include "hw/core/cpu.h"
 #include "exec/tb-flush.h"
-#ifndef CONFIG_USER_ONLY
-#include "hw/boards.h"
-#endif
 
 #include "plugin.h"
 
@@ -300,14 +297,8 @@ int qemu_plugin_load_list(QemuPluginList *head, Error 
**errp)
 info->target_name = target_name();
 info->version.min = QEMU_PLUGIN_MIN_VERSION;
 info->version.cur = QEMU_PLUGIN_VERSION;
-#ifndef CONFIG_USER_ONLY
-MachineState *ms = MACHINE(qdev_get_machine());
-info->system_emulation = true;
-info->system.smp_vcpus = ms->smp.cpus;
-info->system.max_vcpus = ms->smp.max_cpus;
-#else
-info->system_emulation = false;
-#endif
+
+qemu_plugin_fillin_mode_info(info);
 
 QTAILQ_FOREACH_SAFE(desc, head, entry, next) {
 int err;
diff --git a/plugins/system.c b/plugins/system.c
new file mode 100644
index 00..b3ecc33ba5
--- /dev/null
+++ b/plugins/system.c
@@ -0,0 +1,24 @@
+/*
+ * QEMU Plugin system-emulation helpers
+ *
+ * Helpers that are specific to system emulation.
+ *
+ * Copyright (C) 2017, Emilio G. Cota 
+ * Copyright (C) 2019-2025, Linaro
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/plugin.h"
+#include "hw/boards.h"
+
+#include "plugin.h"
+
+void qemu_plugin_fillin_mode_info(qemu_info_t *info)
+{
+MachineState *ms = MACHINE(qdev_get_machine());
+info->system_emulation = true;
+info->system.smp_vcpus = ms->smp.cpus;
+info->system.max_vcpus = ms->smp.max_cpus;
+}
diff --git a/plugins/user.c b/plugins/user.c
new file mode 100644
index 00..250d542502
--- /dev/null
+++ b/plugins/user.c
@@ -0,0 +1,19 @@
+/*
+ * QEMU Plugin user-mode helpers
+ *
+ * Helpers that are specific to user-mode.
+ *
+ * Copyright (C) 2017, Emilio G. Cota 
+ * Copyright (C) 2019-2025, Linaro
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/plugin.h"
+#include "plugin.h"
+
+void qemu_plugin_fillin_mode_info(qemu_info_t *info)
+{
+info->system_emulation = false;
+}
diff --git a/plugins/meson.build b/plugins/meson.build
index d60be2a4d6..f7820806d3 100644
--- a/plugins/meson.build
+++ b/plugins/meson.build
@@ -57,8 +57,13 @@ if host_os == 'windows'
 command: dlltool_cmd
   )
 endif
+
+user_ss.add(files('user.c'))
+system_ss.add(files('system.c'))
+
+common_ss.add(files('loader.c'))
+
 specific_ss.add(files(
-  'loader.c',
   'core.c',
   'api.c',
 ))
-- 
2.39.5

[PATCH 17/25] include/qemu: plugin-memory.h doesn't need cpu-defs.h

hwaddr is a fixed size on all builds.

Reviewed-by: Richard Henderson 
Signed-off-by: Alex Bennée 
Message-Id: <20250225110844.3296991-4-alex.ben...@linaro.org>
---
 include/qemu/plugin-memory.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/qemu/plugin-memory.h b/include/qemu/plugin-memory.h
index 71c1123308..6065ec7aaf 100644
--- a/include/qemu/plugin-memory.h
+++ b/include/qemu/plugin-memory.h
@@ -9,7 +9,6 @@
 #ifndef PLUGIN_MEMORY_H
 #define PLUGIN_MEMORY_H
 
-#include "exec/cpu-defs.h"
 #include "exec/hwaddr.h"
 
 struct qemu_plugin_hwaddr {
-- 
2.39.5

[PATCH 16/25] plugins/loader: populate target_name with target_name()

We have a function we can call for this, lets not rely on macros that
stop us building once.

Reviewed-by: Richard Henderson 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Alex Bennée 
Message-Id: <20250225110844.3296991-3-alex.ben...@linaro.org>
---
 plugins/loader.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/plugins/loader.c b/plugins/loader.c
index 99686b5466..827473c8b6 100644
--- a/plugins/loader.c
+++ b/plugins/loader.c
@@ -297,7 +297,7 @@ int qemu_plugin_load_list(QemuPluginList *head, Error 
**errp)
 struct qemu_plugin_desc *desc, *next;
 g_autofree qemu_info_t *info = g_new0(qemu_info_t, 1);
 
-info->target_name = TARGET_NAME;
+info->target_name = target_name();
 info->version.min = QEMU_PLUGIN_MIN_VERSION;
 info->version.cur = QEMU_PLUGIN_VERSION;
 #ifndef CONFIG_USER_ONLY
-- 
2.39.5

Re: [PATCH v4 00/14] Change ghes to use HEST-based offsets and add support for error inject

2025-02-26 Thread Mauro Carvalho Chehab

Em Wed, 26 Feb 2025 15:16:56 +0100
Igor Mammedov  escreveu:

> On Fri, 21 Feb 2025 15:35:09 +0100
> Mauro Carvalho Chehab  wrote:
> 
> > Now that the ghes preparation patches were merged, let's add support
> > for error injection.
> > 
> > On this series, the first 6 patches chang to the math used to calculate 
> > offsets at HEST
> > table and hardware_error firmware file, together with its migration code. 
> > Migration tested
> > with both latest QEMU released kernel and upstream, on both directions.
> > 
> > The next patches add a new QAPI to allow injecting GHESv2 errors, and a 
> > script using such QAPI
> >to inject ARM Processor Error records.  
> 
> please, run ./scripts/checkpatch on patches before submitting them.
> as it stands now series cannot be merged due to failing checkpatch

Weird... checkpatch is at pre-commit hook, as recommended at QEMU 
documentation. It is actually a little harder to manage this way, as it 
sometimes cause troubles with binary files.

Anyway, I'll run it by hand before sending the next version.

> 
> > 
> > ---
> > v4:
> > - added an extra comment for AcpiGhesState structure;
> > - patches reordered;
> > - no functional changes, just code shift between the patches in this series.
> > 
> > v3:
> > - addressed more nits;
> > - hest_add_le now points to the beginning of HEST table;
> > - removed HEST from tests/data/acpi;
> > - added an extra patch to not use fw_cfg with virt-10.0 for hw_error_le
> > 
> > v2: 
> > - address some nits;
> > - improved ags cleanup patch and removed ags.present field;
> > - added some missing le*_to_cpu() calls;
> > - update date at copyright for new files to 2024-2025;
> > - qmp command changed to: inject-ghes-v2-error ans since updated to 10.0;
> > - added HEST and DSDT tables after the changes to make check target happy.
> >   (two patches: first one whitelisting such tables; second one removing from
> >whitelist and updating/adding such tables to tests/data/acpi)
> > 
> > 
> > 
> > Mauro Carvalho Chehab (14):
> >   acpi/ghes: prepare to change the way HEST offsets are calculated
> >   acpi/ghes: add a firmware file with HEST address
> >   acpi/ghes: Use HEST table offsets when preparing GHES records
> >   acpi/ghes: don't hard-code the number of sources for HEST table
> >   acpi/ghes: add a notifier to notify when error data is ready
> >   acpi/ghes: create an ancillary acpi_ghes_get_state() function
> >   acpi/generic_event_device: Update GHES migration to cover hest addr
> >   acpi/generic_event_device: add logic to detect if HEST addr is
> > available
> >   acpi/generic_event_device: add an APEI error device
> >   tests/acpi: virt: allow acpi table changes for a new table: HEST
> >   arm/virt: Wire up a GED error device for ACPI / GHES
> >   tests/acpi: virt: add a HEST table to aarch64 virt and update DSDT
> >   qapi/acpi-hest: add an interface to do generic CPER error injection
> >   scripts/ghes_inject: add a script to generate GHES error inject
> > 
> >  MAINTAINERS   |  10 +
> >  hw/acpi/Kconfig   |   5 +
> >  hw/acpi/aml-build.c   |  10 +
> >  hw/acpi/generic_event_device.c|  43 ++
> >  hw/acpi/ghes-stub.c   |   7 +-
> >  hw/acpi/ghes.c| 231 --
> >  hw/acpi/ghes_cper.c   |  38 +
> >  hw/acpi/ghes_cper_stub.c  |  19 +
> >  hw/acpi/meson.build   |   2 +
> >  hw/arm/virt-acpi-build.c  |  37 +-
> >  hw/arm/virt.c |  19 +-
> >  hw/core/machine.c |   2 +
> >  include/hw/acpi/acpi_dev_interface.h  |   1 +
> >  include/hw/acpi/aml-build.h   |   2 +
> >  include/hw/acpi/generic_event_device.h|   1 +
> >  include/hw/acpi/ghes.h|  54 +-
> >  include/hw/arm/virt.h |   2 +
> >  qapi/acpi-hest.json   |  35 +
> >  qapi/meson.build  |   1 +
> >  qapi/qapi-schema.json |   1 +
> >  scripts/arm_processor_error.py| 476 
> >  scripts/ghes_inject.py|  51 ++
> >  scripts/qmp_helper.py | 702 ++
> >  target/arm/kvm.c  |   7 +-
> >  tests/data/acpi/aarch64/virt/DSDT | Bin 5196 -> 5240 bytes
> >  .../data/acpi/aarch64/virt/DSDT.acpihmatvirt  | Bin 5282 -> 5326 bytes
> >  tests/data/acpi/aarch64/virt/DSDT.memhp   | Bin 6557 -> 6601 bytes
> >  tests/data/acpi/aarch64/virt/DSDT.pxb | Bin 7679 -> 7723 bytes
> >  tests/data/acpi/aarch64/virt/DSDT.topology| Bin 5398 -> 5442 bytes
> >  29 files changed, 1677 insertions(+), 79 deletions(-)
> >  create mode 100644 hw/acpi/ghes_cper.c
> >  create mode 100644 hw/acpi/ghes_cper_stub.c
> >  create mode 1

[PATCH 2/2] target/riscv: Support matching scontext in Sdtrig's textra CSRs

2025-02-26 Thread Florian Lugou

Support setting textra32.sselect or textra64.sselect to 1 (scontext).
The trigger will only match if the content of scontext matches the value
in svalue, after it is masked as configured in sbytemask.

Signed-off-by: Florian Lugou 
---
 target/riscv/debug.c | 75 +++-
 target/riscv/debug.h |  3 ++
 2 files changed, 57 insertions(+), 21 deletions(-)

diff --git a/target/riscv/debug.c b/target/riscv/debug.c
index 914a9ce0f8..ac9752d30e 100644
--- a/target/riscv/debug.c
+++ b/target/riscv/debug.c
@@ -219,8 +219,8 @@ static inline void warn_always_zero_bit(target_ulong val, 
target_ulong mask,
 
 static target_ulong textra_validate(CPURISCVState *env, target_ulong tdata3)
 {
-target_ulong mhvalue, mhselect;
-target_ulong mhselect_new;
+target_ulong mhvalue, mhselect, sbytemask, svalue, sselect;
+target_ulong mhselect_new, sselect_new;
 target_ulong textra;
 const uint32_t mhselect_no_rvh[8] = { 0, 0, 0, 0, 4, 4, 4, 4 };
 
@@ -228,25 +228,17 @@ static target_ulong textra_validate(CPURISCVState *env, 
target_ulong tdata3)
 case MXL_RV32:
 mhvalue  = get_field(tdata3, TEXTRA32_MHVALUE);
 mhselect = get_field(tdata3, TEXTRA32_MHSELECT);
-/* Validate unimplemented (always zero) bits */
-warn_always_zero_bit(tdata3, (target_ulong)TEXTRA32_SBYTEMASK,
- "sbytemask");
-warn_always_zero_bit(tdata3, (target_ulong)TEXTRA32_SVALUE,
- "svalue");
-warn_always_zero_bit(tdata3, (target_ulong)TEXTRA32_SSELECT,
- "sselect");
+sbytemask  = get_field(tdata3, TEXTRA32_SBYTEMASK);
+svalue  = get_field(tdata3, TEXTRA32_SVALUE);
+sselect = get_field(tdata3, TEXTRA32_SSELECT);
 break;
 case MXL_RV64:
 case MXL_RV128:
 mhvalue  = get_field(tdata3, TEXTRA64_MHVALUE);
 mhselect = get_field(tdata3, TEXTRA64_MHSELECT);
-/* Validate unimplemented (always zero) bits */
-warn_always_zero_bit(tdata3, (target_ulong)TEXTRA64_SBYTEMASK,
- "sbytemask");
-warn_always_zero_bit(tdata3, (target_ulong)TEXTRA64_SVALUE,
- "svalue");
-warn_always_zero_bit(tdata3, (target_ulong)TEXTRA64_SSELECT,
- "sselect");
+sbytemask  = get_field(tdata3, TEXTRA64_SBYTEMASK);
+svalue  = get_field(tdata3, TEXTRA64_SVALUE);
+sselect = get_field(tdata3, TEXTRA64_SSELECT);
 break;
 default:
 g_assert_not_reached();
@@ -258,17 +250,34 @@ static target_ulong textra_validate(CPURISCVState *env, 
target_ulong tdata3)
 qemu_log_mask(LOG_UNIMP, "mhselect only supports 0 or 4 for now\n");
 }
 
+/* Validate sselect. */
+switch (sselect) {
+case SSELECT_IGNORE:
+case SSELECT_SCONTEXT:
+sselect_new = sselect;
+break;
+default:
+sselect_new = 0;
+qemu_log_mask(LOG_UNIMP, "sselect only supports 0 or 1 for now\n");
+}
+
 /* Write legal values into textra */
 textra = 0;
 switch (riscv_cpu_mxl(env)) {
 case MXL_RV32:
-textra = set_field(textra, TEXTRA32_MHVALUE,  mhvalue);
-textra = set_field(textra, TEXTRA32_MHSELECT, mhselect_new);
+textra = set_field(textra, TEXTRA32_MHVALUE,   mhvalue);
+textra = set_field(textra, TEXTRA32_MHSELECT,  mhselect_new);
+textra = set_field(textra, TEXTRA32_SBYTEMASK, sbytemask);
+textra = set_field(textra, TEXTRA32_SVALUE,svalue);
+textra = set_field(textra, TEXTRA32_SSELECT,   sselect_new);
 break;
 case MXL_RV64:
 case MXL_RV128:
-textra = set_field(textra, TEXTRA64_MHVALUE,  mhvalue);
-textra = set_field(textra, TEXTRA64_MHSELECT, mhselect_new);
+textra = set_field(textra, TEXTRA64_MHVALUE,   mhvalue);
+textra = set_field(textra, TEXTRA64_MHSELECT,  mhselect_new);
+textra = set_field(textra, TEXTRA64_SBYTEMASK, sbytemask);
+textra = set_field(textra, TEXTRA64_SVALUE,svalue);
+textra = set_field(textra, TEXTRA64_SSELECT,   sselect_new);
 break;
 default:
 g_assert_not_reached();
@@ -368,7 +377,7 @@ static bool trigger_textra_match(CPURISCVState *env, 
trigger_type_t type,
  int trigger_index)
 {
 target_ulong textra = env->tdata3[trigger_index];
-target_ulong mhvalue, mhselect;
+target_ulong mhvalue, mhselect, sbytemask, svalue, sselect;
 
 if (type < TRIGGER_TYPE_AD_MATCH || type > TRIGGER_TYPE_AD_MATCH6) {
 /* textra checking is only applicable when type is 2, 3, 4, 5, or 6 */
@@ -379,11 +388,17 @@ static bool trigger_textra_match(CPURISCVState *env, 
trigger_type_t type,
 case MXL_RV32:
 mhvalue  = get_field(textra, TEXTRA32_MHVALUE);
 mhselect = get_field(textra, TEXTRA32_MHSELECT);
+sbytemask = get_field(textra, TEXTRA32_SBYT

[PATCH 0/2] target/riscv: Support scontext-based trigger matching

2025-02-26 Thread Florian Lugou

Hi,

These 2 patches allow scontext-based trigger matching as specified by the Sdtrig
extension. Patch 1 allows access to the scontext CSR and patch 2 enforces
scontext matching as specified by the textra CSRs.

Florian Lugou (2):
  target/riscv: Add scontext CSR handling
  target/riscv: Support matching scontext in Sdtrig's textra CSRs

 target/riscv/cpu.h  |  1 +
 target/riscv/cpu_bits.h |  5 +++
 target/riscv/csr.c  | 36 +++
 target/riscv/debug.c| 76 +
 target/riscv/debug.h|  3 ++
 5 files changed, 100 insertions(+), 21 deletions(-)

-- 
2.43.0

[PATCH 23/25] plugins/api: split out time control helpers

These are only usable in system mode where we control the timer. For
user-mode make them NOPs.

Reviewed-by: Richard Henderson 
Signed-off-by: Alex Bennée 
Message-Id: <20250225110844.3296991-10-alex.ben...@linaro.org>
---
 plugins/api-system.c | 34 ++
 plugins/api-user.c   | 17 +
 plugins/api.c| 41 -
 3 files changed, 51 insertions(+), 41 deletions(-)

diff --git a/plugins/api-system.c b/plugins/api-system.c
index 38560de342..cc190b167e 100644
--- a/plugins/api-system.c
+++ b/plugins/api-system.c
@@ -95,3 +95,37 @@ const char *qemu_plugin_hwaddr_device_name(const struct 
qemu_plugin_hwaddr *h)
 return g_intern_static_string("RAM");
 }
 }
+
+/*
+ * Time control
+ */
+static bool has_control;
+static Error *migration_blocker;
+
+const void *qemu_plugin_request_time_control(void)
+{
+if (!has_control) {
+has_control = true;
+error_setg(&migration_blocker,
+   "TCG plugin time control does not support migration");
+migrate_add_blocker(&migration_blocker, NULL);
+return &has_control;
+}
+return NULL;
+}
+
+static void advance_virtual_time__async(CPUState *cpu, run_on_cpu_data data)
+{
+int64_t new_time = data.host_ulong;
+qemu_clock_advance_virtual_time(new_time);
+}
+
+void qemu_plugin_update_ns(const void *handle, int64_t new_time)
+{
+if (handle == &has_control) {
+/* Need to execute out of cpu_exec, so bql can be locked. */
+async_run_on_cpu(current_cpu,
+ advance_virtual_time__async,
+ RUN_ON_CPU_HOST_ULONG(new_time));
+}
+}
diff --git a/plugins/api-user.c b/plugins/api-user.c
index 867b420339..28704a89e8 100644
--- a/plugins/api-user.c
+++ b/plugins/api-user.c
@@ -12,6 +12,7 @@
 
 #include "qemu/osdep.h"
 #include "qemu/plugin.h"
+#include "exec/log.h"
 
 /*
  * Virtual Memory queries - these are all NOPs for user-mode which
@@ -38,3 +39,19 @@ const char *qemu_plugin_hwaddr_device_name(const struct 
qemu_plugin_hwaddr *h)
 {
 return g_intern_static_string("Invalid");
 }
+
+/*
+ * Time control - for user mode the only real time is wall clock time
+ * so realistically all you can do in user mode is slow down execution
+ * which doesn't require the ability to mess with the clock.
+ */
+
+const void *qemu_plugin_request_time_control(void)
+{
+return NULL;
+}
+
+void qemu_plugin_update_ns(const void *handle, int64_t new_time)
+{
+qemu_log_mask(LOG_UNIMP, "user-mode can't control time");
+}
diff --git a/plugins/api.c b/plugins/api.c
index 82241699a5..832bf6ee5e 100644
--- a/plugins/api.c
+++ b/plugins/api.c
@@ -526,44 +526,3 @@ uint64_t qemu_plugin_u64_sum(qemu_plugin_u64 entry)
 return total;
 }
 
-/*
- * Time control
- */
-static bool has_control;
-#ifdef CONFIG_SOFTMMU
-static Error *migration_blocker;
-#endif
-
-const void *qemu_plugin_request_time_control(void)
-{
-if (!has_control) {
-has_control = true;
-#ifdef CONFIG_SOFTMMU
-error_setg(&migration_blocker,
-   "TCG plugin time control does not support migration");
-migrate_add_blocker(&migration_blocker, NULL);
-#endif
-return &has_control;
-}
-return NULL;
-}
-
-#ifdef CONFIG_SOFTMMU
-static void advance_virtual_time__async(CPUState *cpu, run_on_cpu_data data)
-{
-int64_t new_time = data.host_ulong;
-qemu_clock_advance_virtual_time(new_time);
-}
-#endif
-
-void qemu_plugin_update_ns(const void *handle, int64_t new_time)
-{
-#ifdef CONFIG_SOFTMMU
-if (handle == &has_control) {
-/* Need to execute out of cpu_exec, so bql can be locked. */
-async_run_on_cpu(current_cpu,
- advance_virtual_time__async,
- RUN_ON_CPU_HOST_ULONG(new_time));
-}
-#endif
-}
-- 
2.39.5

Re: [PATCH v5 20/36] vfio/migration: Add vfio_add_bytes_transferred()

2025-02-26 Thread Maciej S. Szmigiero


On 26.02.2025 09:06, Cédric Le Goater wrote:

On 2/19/25 21:34, Maciej S. Szmigiero wrote:

From: "Maciej S. Szmigiero" 

This way bytes_transferred can also be incremented in other translation
units than migration.c.

Signed-off-by: Maciej S. Szmigiero 


Looks good. Just a small aesthetic issue.


---
  hw/vfio/migration.c   | 7 ++-
  include/hw/vfio/vfio-common.h | 1 +
  2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 5532787be63b..e9645cb9d088 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -391,7 +391,7 @@ static ssize_t vfio_save_block(QEMUFile *f, VFIOMigration 
*migration)
  qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE);
  qemu_put_be64(f, data_size);
  qemu_put_buffer(f, migration->data_buffer, data_size);
-    qatomic_add(&bytes_transferred, data_size);
+    vfio_add_bytes_transferred(data_size);
  trace_vfio_save_block(migration->vbasedev->name, data_size);
@@ -1021,6 +1021,11 @@ void vfio_reset_bytes_transferred(void)
  qatomic_set(&bytes_transferred, 0);
  }
+void vfio_add_bytes_transferred(unsigned long val)


vfio_migration_add_bytes_transferred()



Renamed into vfio_mig_add_bytes_transferred() for consistency with
vfio_mig_bytes_transferred().
 

Thanks,

C.


Thanks,
Maciej

Re: [PATCH v4 2/6] migration: check RDMA and capabilities are compatible on both sides

2025-02-26 Thread Peter Xu

On Wed, Feb 26, 2025 at 02:30:39PM +0800, Li Zhijian wrote:
> Depending on the order of starting RDMA and setting capability,
> the following scenarios can be categorized into the following scenarios:
> Source:
>  S1: [set capabilities] -> [Start RDMA outgoing]
> Destination:
>  D1: [set capabilities] -> [Start RDMA incoming]
>  D2: [Start RDMA incoming] -> [set capabilities]
> 
> Previously, compatibility between RDMA and capabilities was verified only
> in scenario D1, potentially causing migration failures in other situations.
> 
> For scenarios S1 and D1, we can seamlessly incorporate
> migration_transport_compatible() to address compatibility between
> channels and capabilities vs transport.
> 
> For scenario D2, ensure compatibility within migrate_caps_check().
> 
> Signed-off-by: Li Zhijian 

Reviewed-by: Peter Xu 

-- 
Peter Xu

Re: [PATCH v4 04/14] acpi/ghes: don't hard-code the number of sources for HEST table

On Fri, 21 Feb 2025 15:35:13 +0100
Mauro Carvalho Chehab  wrote:

> The current code is actually dependent on having just one error
> structure with a single source, as any change there would cause
> migration issues.
> 
> As the number of sources should be arch-dependent, as it will depend on
> what kind of notifications will exist, and how many errors can be
> reported at the same time, change the logic to be more flexible,
> allowing the number of sources to be defined when building the
> HEST table by the caller.
> 
> Signed-off-by: Mauro Carvalho Chehab 
> Reviewed-by: Jonathan Cameron 
> Reviewed-by: Igor Mammedov 
> ---
>  hw/acpi/ghes.c   | 38 +-
>  hw/arm/virt-acpi-build.c |  8 +++-
>  include/hw/acpi/ghes.h   | 17 -
>  3 files changed, 40 insertions(+), 23 deletions(-)
> 
> diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
> index 7efea519f766..4a4ea8f4be90 100644
> --- a/hw/acpi/ghes.c
> +++ b/hw/acpi/ghes.c
> @@ -238,17 +238,17 @@ ghes_gen_err_data_uncorrectable_recoverable(GArray 
> *block,
>   * See docs/specs/acpi_hest_ghes.rst for blobs format.
>   */
>  static void build_ghes_error_table(AcpiGhesState *ags, GArray 
> *hardware_errors,
> -   BIOSLinker *linker)
> +   BIOSLinker *linker, int num_sources)
>  {
>  int i, error_status_block_offset;
>  
>  /* Build error_block_address */
> -for (i = 0; i < ACPI_GHES_ERROR_SOURCE_COUNT; i++) {
> +for (i = 0; i < num_sources; i++) {
>  build_append_int_noprefix(hardware_errors, 0, sizeof(uint64_t));
>  }
>  
>  /* Build read_ack_register */
> -for (i = 0; i < ACPI_GHES_ERROR_SOURCE_COUNT; i++) {
> +for (i = 0; i < num_sources; i++) {
>  /*
>   * Initialize the value of read_ack_register to 1, so GHES can be
>   * writable after (re)boot.
> @@ -263,13 +263,13 @@ static void build_ghes_error_table(AcpiGhesState *ags, 
> GArray *hardware_errors,
>  
>  /* Reserve space for Error Status Data Block */
>  acpi_data_push(hardware_errors,
> -ACPI_GHES_MAX_RAW_DATA_LENGTH * ACPI_GHES_ERROR_SOURCE_COUNT);
> +ACPI_GHES_MAX_RAW_DATA_LENGTH * num_sources);
>  
>  /* Tell guest firmware to place hardware_errors blob into RAM */
>  bios_linker_loader_alloc(linker, ACPI_HW_ERROR_FW_CFG_FILE,
>   hardware_errors, sizeof(uint64_t), false);
>  
> -for (i = 0; i < ACPI_GHES_ERROR_SOURCE_COUNT; i++) {
> +for (i = 0; i < num_sources; i++) {
>  /*
>   * Tell firmware to patch error_block_address entries to point to
>   * corresponding "Generic Error Status Block"
> @@ -295,12 +295,14 @@ static void build_ghes_error_table(AcpiGhesState *ags, 
> GArray *hardware_errors,
>  }
>  
>  /* Build Generic Hardware Error Source version 2 (GHESv2) */
> -static void build_ghes_v2(GArray *table_data,
> -  BIOSLinker *linker,
> -  enum AcpiGhesNotifyType notify,
> -  uint16_t source_id)
> +static void build_ghes_v2_entry(GArray *table_data,
> +BIOSLinker *linker,
> +const AcpiNotificationSourceId *notif_src,
> +uint16_t index, int num_sources)
>  {
>  uint64_t address_offset;
> +const uint16_t notify = notif_src->notify;
> +const uint16_t source_id = notif_src->source_id;
>  
>  /*
>   * Type:
> @@ -331,7 +333,7 @@ static void build_ghes_v2(GArray *table_data,
> address_offset + GAS_ADDR_OFFSET,
> sizeof(uint64_t),
> ACPI_HW_ERROR_FW_CFG_FILE,
> -   source_id * sizeof(uint64_t));
> +   index * sizeof(uint64_t));
>  
>  /* Notification Structure */
>  build_ghes_hw_error_notification(table_data, notify);
> @@ -351,8 +353,7 @@ static void build_ghes_v2(GArray *table_data,
> address_offset + GAS_ADDR_OFFSET,
> sizeof(uint64_t),
> ACPI_HW_ERROR_FW_CFG_FILE,
> -   (ACPI_GHES_ERROR_SOURCE_COUNT + source_id)
> -   * sizeof(uint64_t));
> +   (num_sources + index) * sizeof(uint64_t));
>  
>  /*
>   * Read Ack Preserve field
> @@ -368,22 +369,26 @@ static void build_ghes_v2(GArray *table_data,
>  void acpi_build_hest(AcpiGhesState *ags, GArray *table_data,
>   GArray *hardware_errors,
>   BIOSLinker *linker,
> + const AcpiNotificationSourceId *notif_source,
> + int num_sources,
>   const char *oem_id, const char *oem_table_id)
>  {
>  AcpiTable tabl

Re: [PATCH v4 4/6] migration/rdma: Remove redundant migration_in_postcopy checks

2025-02-26 Thread Peter Xu

On Wed, Feb 26, 2025 at 02:30:41PM +0800, Li Zhijian wrote:
> Since we have disabled RDMA + postcopy, it's safe to remove
> the migration_in_postcopy() that follows the migrate_rdma().
> 
> Signed-off-by: Li Zhijian 

Reviewed-by: Peter Xu 

-- 
Peter Xu

Re: [PATCH 0/2] vfio: Restrict to 64-bit host platforms


Hello

On 2/26/25 15:01, Daniel P. Berrangé wrote:

On Wed, Feb 26, 2025 at 09:47:19AM +0100, Cédric Le Goater wrote:

Hello,

This series avoids compiling VFIO on 32-bit host platforms where it is
not needed.


If it was previously enabled on 32-bit and was possible to successfully
build & use, then it needs to go through the deprecation process - we
can't just rip out features with no prior warning.


well, x86 32-bit was the host platform I was not sure about and Alex
confirmed it worked. We would need deprecation for it I agree.
 

In any case this recently merged:

   commit 6d701c9bac1d3571e9ad511e01b27df7237f0b13
   Author: Richard Henderson 
   Date:   Mon Jan 27 16:22:24 2025 -0800

 meson: Deprecate 32-bit host support
 
 We deprecated i686 system mode support for qemu 8.0.  However, to

 make real cleanups to TCG we need to deprecate all 32-bit hosts.


oh. I missed that. Great, at last !


so 32-bit host support in general is nearing end of life. Given that,
I don't think we need to further limit 32-bit host for individual
features, just let it die all at once.


yes. Let's do that.

Now, I have to look more closely at those patches modifying VFIO for
32-bit support.

Thanks,

C.

Re: [PATCH v4 08/14] acpi/generic_event_device: add logic to detect if HEST addr is available

On Fri, 21 Feb 2025 15:35:17 +0100
Mauro Carvalho Chehab  wrote:

> Create a new property (x-has-hest-addr) and use it to detect if
> the GHES table offsets can be calculated from the HEST address
> (qemu 10.0 and upper) or via the legacy way via an offset obtained
> from the hardware_errors firmware file.
> 
> Signed-off-by: Mauro Carvalho Chehab 
> Reviewed-by: Jonathan Cameron 
> ---
>  hw/acpi/generic_event_device.c |  1 +
>  hw/arm/virt-acpi-build.c   | 18 --
>  hw/core/machine.c  |  2 ++
>  3 files changed, 19 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c
> index 5346cae573b7..14d8513a5440 100644
> --- a/hw/acpi/generic_event_device.c
> +++ b/hw/acpi/generic_event_device.c
> @@ -318,6 +318,7 @@ static void acpi_ged_send_event(AcpiDeviceIf *adev, 
> AcpiEventStatusBits ev)
>  
>  static const Property acpi_ged_properties[] = {
>  DEFINE_PROP_UINT32("ged-event", AcpiGedState, ged_event_bitmap, 0),
> +DEFINE_PROP_BOOL("x-has-hest-addr", AcpiGedState, 
> ghes_state.use_hest_addr, false),

you below set it for 9.2 to false, so
shouldn't it be set to true by default here?

>  };
>  
>  static const VMStateDescription vmstate_memhp_state = {
> diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
> index 4439252e1a75..9de51105a513 100644
> --- a/hw/arm/virt-acpi-build.c
> +++ b/hw/arm/virt-acpi-build.c
> @@ -897,6 +897,10 @@ static const AcpiNotificationSourceId hest_ghes_notify[] 
> = {
>  { ACPI_HEST_SRC_ID_SYNC, ACPI_GHES_NOTIFY_SEA },
>  };
>  
> +static const AcpiNotificationSourceId hest_ghes_notify_9_2[] = {
> +{ ACPI_HEST_SRC_ID_SYNC, ACPI_GHES_NOTIFY_SEA },
> +};
> +
>  static
>  void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables)
>  {
> @@ -950,7 +954,9 @@ void virt_acpi_build(VirtMachineState *vms, 
> AcpiBuildTables *tables)
>  build_dbg2(tables_blob, tables->linker, vms);
>  
>  if (vms->ras) {
> +static const AcpiNotificationSourceId *notify;
>  AcpiGedState *acpi_ged_state;
> +unsigned int notify_sz;
>  AcpiGhesState *ags;
>  
>  acpi_ged_state = ACPI_GED(object_resolve_path_type("", TYPE_ACPI_GED,
> @@ -959,9 +965,17 @@ void virt_acpi_build(VirtMachineState *vms, 
> AcpiBuildTables *tables)
>  ags = &acpi_ged_state->ghes_state;
>  
>  acpi_add_table(table_offsets, tables_blob);
> +
> +if (!ags->use_hest_addr) {
> +notify = hest_ghes_notify_9_2;
> +notify_sz = ARRAY_SIZE(hest_ghes_notify_9_2);
> +} else {
> +notify = hest_ghes_notify;
> +notify_sz = ARRAY_SIZE(hest_ghes_notify);
> +}
> +
>  acpi_build_hest(ags, tables_blob, tables->hardware_errors,
> -tables->linker, hest_ghes_notify,
> -ARRAY_SIZE(hest_ghes_notify),
> +tables->linker, notify, notify_sz,
>  vms->oem_id, vms->oem_table_id);
>  }
>  }
> diff --git a/hw/core/machine.c b/hw/core/machine.c
> index 02cff735b3fb..7a11e0f87b11 100644
> --- a/hw/core/machine.c
> +++ b/hw/core/machine.c
> @@ -34,6 +34,7 @@
>  #include "hw/virtio/virtio-pci.h"
>  #include "hw/virtio/virtio-net.h"
>  #include "hw/virtio/virtio-iommu.h"
> +#include "hw/acpi/generic_event_device.h"
>  #include "audio/audio.h"
>  
>  GlobalProperty hw_compat_9_2[] = {
> @@ -43,6 +44,7 @@ GlobalProperty hw_compat_9_2[] = {
>  { "virtio-balloon-pci-non-transitional", "vectors", "0" },
>  { "virtio-mem-pci", "vectors", "0" },
>  { "migration", "multifd-clean-tls-termination", "false" },
> +{ TYPE_ACPI_GED, "x-has-hest-addr", "false" },
>  };
>  const size_t hw_compat_9_2_len = G_N_ELEMENTS(hw_compat_9_2);
>

Re: [PATCH v4 5/6] migration: Unfold control_save_page()

2025-02-26 Thread Peter Xu

On Wed, Feb 26, 2025 at 02:30:42PM +0800, Li Zhijian wrote:
> control_save_page() is for RDMA only, unfold it to make the code more
> clear.
> In addition:
>  - Similar to other branches style in ram_save_target_page(), involve RDMA
>only if the condition 'migrate_rdma()' is true.
>  - Further simplify the code by removing the RAM_SAVE_CONTROL_NOT_SUPP.
> 
> Signed-off-by: Li Zhijian 

[...]

> @@ -56,7 +55,7 @@ static inline
>  int rdma_control_save_page(QEMUFile *f, ram_addr_t block_offset,
> ram_addr_t offset, size_t size)
>  {
> -return RAM_SAVE_CONTROL_NOT_SUPP;
> +g_assert_not_reached();
>  }

Not sure if some compiler will be unhappy on the retval not provided, but
anyway we'll see..

Reviewed-by: Peter Xu 

>  #endif
>  #endif
> -- 
> 2.44.0
> 

-- 
Peter Xu

Re: [PATCH v4 10/14] tests/acpi: virt: allow acpi table changes for a new table: HEST

On Fri, 21 Feb 2025 15:35:19 +0100
Mauro Carvalho Chehab  wrote:

> The DSDT table will also be affected by such change.
> 
> Signed-off-by: Mauro Carvalho Chehab 
> ---
>  tests/qtest/bios-tables-test-allowed-diff.h | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/tests/qtest/bios-tables-test-allowed-diff.h 
> b/tests/qtest/bios-tables-test-allowed-diff.h
> index dfb8523c8bf4..1a4c2277bd5a 100644
> --- a/tests/qtest/bios-tables-test-allowed-diff.h
> +++ b/tests/qtest/bios-tables-test-allowed-diff.h
> @@ -1 +1,2 @@
>  /* List of comma-separated changed AML files to ignore */
> +"tests/data/acpi/aarch64/virt/DSDT",
this and flowing update would also include HEST table, once you enable 'ras' in 
tests

Re: [PATCH v5 19/36] vfio/migration: Convert bytes_transferred counter to atomic


On 2/26/25 14:55, Maciej S. Szmigiero wrote:

On 26.02.2025 08:52, Cédric Le Goater wrote:

On 2/19/25 21:34, Maciej S. Szmigiero wrote:

From: "Maciej S. Szmigiero" 

So it can be safety accessed from multiple threads.

This variable type needs to be changed to unsigned long since
32-bit host platforms lack the necessary addition atomics on 64-bit
variables.

Using 32-bit counters on 32-bit host platforms should not be a problem
in practice since they can't realistically address more memory anyway.


Is it useful to have VFIO on 32-bit host platforms ?

If not, VFIO PCI should depend on (AARCH64 || PPC64 || X86_64) and we
could drop this patch. Let's address that independently.


Not sure how much use VFIO gets on 32-bit host platforms,
however totally disabling it on these would be a major functional regression -
at least if taken at its face value.


32-bit host platform support is being deprecated in QEMU 10.0 and should
be removed in QEMU 10.2.


Especially considering that making it work on 32-bit platform requires
just this tiny variable type change here.


yes. It raised my attention because x86 32-bit was the only host platform
I was not sure about and Alex confirmed it worked. We should simply wait
for removal.


Thanks,

C.

Re: [PATCH 03/25] tests/functional: ensure we have a GPU device for tests

2025-02-26 Thread Thomas Huth


On 26/02/2025 15.03, Alex Bennée wrote:

It's possible to build QEMU without support for the GL enabled GPU
devices and we can catch that earlier with an explicit check.

Signed-off-by: Alex Bennée 
Message-Id: <20250219150009.1662688-4-alex.ben...@linaro.org>
---
  tests/functional/test_aarch64_virt_gpu.py | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/tests/functional/test_aarch64_virt_gpu.py 
b/tests/functional/test_aarch64_virt_gpu.py
index 06093c6b60..c9463d7285 100755
--- a/tests/functional/test_aarch64_virt_gpu.py
+++ b/tests/functional/test_aarch64_virt_gpu.py
@@ -91,6 +91,9 @@ def _run_virt_gpu_test(self, gpu_device,  weston_cmd, 
weston_pattern):
  
  @skipIfMissingCommands('zstd')

  def test_aarch64_virt_with_vulkan_gpu(self):
+
+self.require_device('virtio-gpu-gl-pci')
+
  gpu_device = "virtio-gpu-gl-pci,hostmem=4G,blob=on,venus=on"
  weston_cmd = "vkmark -b:duration=1.0"
  weston_pattern = "vkmark Score"


Reviewed-by: Thomas Huth

Re: [PATCH v4 11/14] arm/virt: Wire up a GED error device for ACPI / GHES

On Fri, 21 Feb 2025 15:35:20 +0100
Mauro Carvalho Chehab  wrote:

> Adds support to ARM virtualization to allow handling
> generic error ACPI Event via GED & error source device.
> 
> It is aligned with Linux Kernel patch:
> https://lore.kernel.org/lkml/1272350481-27951-8-git-send-email-ying.hu...@intel.com/
> 
> Co-authored-by: Mauro Carvalho Chehab 
> Co-authored-by: Jonathan Cameron 
> Signed-off-by: Jonathan Cameron 
> Signed-off-by: Mauro Carvalho Chehab 
> Acked-by: Igor Mammedov 
> 
> ---
> 
> Changes from v8:
> 
> - Added a call to the function that produces GHES generic
>   records, as this is now added earlier in this series.
> 
> Signed-off-by: Mauro Carvalho Chehab 
> ---
>  hw/acpi/generic_event_device.c |  2 +-
>  hw/arm/virt-acpi-build.c   |  1 +
>  hw/arm/virt.c  | 12 +++-
>  include/hw/arm/virt.h  |  1 +
>  4 files changed, 14 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c
> index 180eebbce1cd..f5e899155d34 100644
> --- a/hw/acpi/generic_event_device.c
> +++ b/hw/acpi/generic_event_device.c
> @@ -331,7 +331,7 @@ static void acpi_ged_send_event(AcpiDeviceIf *adev, 
> AcpiEventStatusBits ev)
>  
>  static const Property acpi_ged_properties[] = {
>  DEFINE_PROP_UINT32("ged-event", AcpiGedState, ged_event_bitmap, 0),
> -DEFINE_PROP_BOOL("x-has-hest-addr", AcpiGedState, 
> ghes_state.use_hest_addr, false),
> +DEFINE_PROP_BOOL("x-has-hest-addr", AcpiGedState, 
> ghes_state.use_hest_addr, true),
irrelevant to this patch, see comment in 8/14

>  };
>  
>  static const VMStateDescription vmstate_memhp_state = {
> diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
> index 9de51105a513..4f174795ed60 100644
> --- a/hw/arm/virt-acpi-build.c
> +++ b/hw/arm/virt-acpi-build.c
> @@ -861,6 +861,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, 
> VirtMachineState *vms)
>  }
>  
>  acpi_dsdt_add_power_button(scope);
> +aml_append(scope, aml_error_device());
>  #ifdef CONFIG_TPM
>  acpi_dsdt_add_tpm(scope, vms);
>  #endif
> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index 4a5a9666e916..3faf32f900b5 100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c
> @@ -678,7 +678,7 @@ static inline DeviceState 
> *create_acpi_ged(VirtMachineState *vms)
>  DeviceState *dev;
>  MachineState *ms = MACHINE(vms);
>  int irq = vms->irqmap[VIRT_ACPI_GED];
> -uint32_t event = ACPI_GED_PWR_DOWN_EVT;
> +uint32_t event = ACPI_GED_PWR_DOWN_EVT | ACPI_GED_ERROR_EVT;
>  
>  if (ms->ram_slots) {
>  event |= ACPI_GED_MEM_HOTPLUG_EVT;
> @@ -1010,6 +1010,13 @@ static void virt_powerdown_req(Notifier *n, void 
> *opaque)
>  }
>  }
>  
> +static void virt_generic_error_req(Notifier *n, void *opaque)
> +{
> +VirtMachineState *s = container_of(n, VirtMachineState, 
> generic_error_notifier);
> +
> +acpi_send_event(s->acpi_dev, ACPI_GENERIC_ERROR);
> +}
> +
>  static void create_gpio_keys(char *fdt, DeviceState *pl061_dev,
>   uint32_t phandle)
>  {
> @@ -2404,6 +2411,9 @@ static void machvirt_init(MachineState *machine)
>  
>  if (has_ged && aarch64 && firmware_loaded && virt_is_acpi_enabled(vms)) {
>  vms->acpi_dev = create_acpi_ged(vms);
> +vms->generic_error_notifier.notify = virt_generic_error_req;
> +notifier_list_add(&acpi_generic_error_notifiers,
> +  &vms->generic_error_notifier);
>  } else {
>  create_gpio_devices(vms, VIRT_GPIO, sysmem);
>  }
> diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
> index c8e94e6aedc9..f3cf28436770 100644
> --- a/include/hw/arm/virt.h
> +++ b/include/hw/arm/virt.h
> @@ -176,6 +176,7 @@ struct VirtMachineState {
>  DeviceState *gic;
>  DeviceState *acpi_dev;
>  Notifier powerdown_notifier;
> +Notifier generic_error_notifier;
>  PCIBus *bus;
>  char *oem_id;
>  char *oem_table_id;

[Bug 2072564] Re: qemu-aarch64-static segfaults running ldconfig.real (amd64 host)

2025-02-26 Thread Lukas Märdian

** Changed in: qemu (Ubuntu)
   Status: Triaged => In Progress

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/2072564

Title:
  qemu-aarch64-static segfaults running ldconfig.real (amd64 host)

Status in QEMU:
  Fix Released
Status in qemu package in Ubuntu:
  In Progress
Status in qemu source package in Noble:
  Triaged
Status in qemu source package in Oracular:
  Triaged

Bug description:
  [ Impact ]

   * QEMU crashes when running (emulating) ldconfig in a Ubuntu 22.04
  arm64 guest

   * This affects the qemu-user-static 1:8.2.2+ds-0ubuntu1 package on
  Ubuntu 24.04+, running on a amd64 host.

   * When running docker containers with Ubuntu 22.04 in them, emulating
  arm64 with qemu-aarch64-static, invocations of ldconfig (actually
  ldconfig.real) segfault, leading to problems when loading shared
  libraries.

  [ Test Plan ]

   * Reproducer is very easy:

  $ sudo snap install docker
  docker 27.5.1 from Canonical** installed
  $ docker run -ti --platform linux/arm64/v8 ubuntu:22.04
  Unable to find image 'ubuntu:22.04' locally
  22.04: Pulling from library/ubuntu
  0d1c17d4e593: Pull complete 
  Digest: 
sha256:ed1544e454989078f5dec1bfdabd8c5cc9c48e0705d07b678ab6ae3fb61952d2
  Status: Downloaded newer image for ubuntu:22.04

  # Execute ldconfig.real inside the arm64 guest.
  # This should not crash after the fix!
  root@ad80af5378dc:/# /sbin/ldconfig.real
  qemu: uncaught target signal 11 (Segmentation fault) - core dumped
  Segmentation fault (core dumped)

  [ Where problems could occur ]

   * This changes the alignment of sections in the ELF binary via QEMUs
  elfloader, if something goes wrong with this change, it could lead to
  all kind of crashes (segfault) of any emulated binaries.

  [ Other Info ]

   * Upstream bug: https://gitlab.com/qemu-project/qemu/-/issues/1913
   * Upstream fix: https://gitlab.com/qemu-project/qemu/-/commit/4b7b20a3
 - Fix dependency (needed for QEMU < 9.20): 
https://gitlab.com/qemu-project/qemu/-/commit/c81d1faf

  --- original bug report ---

  
  This affects the qemu-user-static 1:8.2.2+ds-0ubuntu1 package on Ubuntu 
24.04, running on a amd64 host.

  When running docker containers with Ubuntu 22.04 in them, emulating
  arm64 with qemu-aarch64-static, invocations of ldconfig (actually
  ldconfig.real) segfault. For example:

  $ docker run -ti --platform linux/arm64/v8 ubuntu:22.04
  root@8861ff640a1c:/# /sbin/ldconfig.real
  Segmentation fault

  If you copy the ldconfig.real binary to the host, and run it directly
  via qemu-aarch64-static:

  $ gdb --args qemu-aarch64-static ./ldconfig.real
  GNU gdb (Ubuntu 15.0.50.20240403-0ubuntu1) 15.0.50.20240403-git
  Copyright (C) 2024 Free Software Foundation, Inc.
  License GPLv3+: GNU GPL version 3 or later 
  This is free software: you are free to change and redistribute it.
  There is NO WARRANTY, to the extent permitted by law.
  Type "show copying" and "show warranty" for details.
  This GDB was configured as "x86_64-linux-gnu".
  Type "show configuration" for configuration details.
  For bug reporting instructions, please see:
  .
  Find the GDB manual and other documentation resources online at:
  .

  For help, type "help".
  Type "apropos word" to search for commands related to "word"...
  Reading symbols from qemu-aarch64-static...
  Reading symbols from 
/home/dim/.cache/debuginfod_client/86579812b213be0964189499f62f176bea817bf2/debuginfo...
  (gdb) r
  Starting program: /usr/bin/qemu-aarch64-static ./ldconfig.real
  [Thread debugging using libthread_db enabled]
  Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
  [New Thread 0x776006c0 (LWP 28378)]

  Thread 1 "qemu-aarch64-st" received signal SIGSEGV, Segmentation fault.
  0x7fffe801645b in ?? ()
  (gdb) disassemble
  No function contains program counter for selected frame.

  It looks like this is a known qemu regression after v8.1.1:
  https://gitlab.com/qemu-project/qemu/-/issues/1913

  Downgrading the package to qemu-user-
  static_8.0.4+dfsg-1ubuntu3_amd64.deb fixes the segfault.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/2072564/+subscriptions

Re: [PATCH v2 0/2] Emulated AMD IOMMU cleanup and fixes

2025-02-26 Thread Michael Tokarev


26.02.2025 15:53, Vasant Hegde wrote:

Hi Michael,


Hi!


On 2/25/2025 2:17 PM, Michael Tokarev wrote:

...>> Is this qemu-stable material (current series: 7.2, 8.2, 9.2)?


Linux kernel doesn't use these changes. So its fine. But I believe we care for
other OS as well? if yes then better to backport.


Yes, we definitely care about other OSes.  There are numerous possible
other questions though.  For example, how relevant these changes are
for older 7.2.x series, where AMD IOMMU is in less current state (missing
all further development) so might not be as relevant anymore.


3684717b74 "amd_iommu: Use correct bitmask to set capability BAR" does
not apply to 7.2, since v8.0.0-10-g6291a28645 "hw/i386/amd_iommu: Explicit
use of AMDVI_BASE_ADDR in amdvi_init" in not in 7.2, but the change can be
adjusted for 7.2 easily, or 6291a28645 can be picked up too.


How is this works? You will pick it up -OR- you want us to backport and send it
to stable mailing list?


This is just a data point, nothing more.  Indicating that for 7.2, it needs some
more work.  I picked it up for 7.2 already: 
https://gitlab.com/mjt0k/qemu/-/tree/staging-7.2
But this is more mechanical way, maybe you, who know this area much better than
me, prefer other way, like picking up already mentioned commit 6291a28645.
Or maybe it isn't worth the effort for 7.2 anyway, provided the issue isn't
that important and it needs any additional work to back-port.

If you especially care about some older stable releases and think one or
another change really needs to be there *and* needs some backporting work,
you might do a backport yourself or give some notes for me to do that.

It's always a trade-off between "importance" of the change, age of the
stable series, the amount of work needed for backporting, and possibility
of breakage.  For less-important or less-used stuff, even thinking about
this tradeoff is already too much work ;)

Thanks,

/mjt

Re: [PATCH v4 00/14] Change ghes to use HEST-based offsets and add support for error inject

On Wed, 26 Feb 2025 15:51:43 +0100
Igor Mammedov  wrote:

> On Wed, 26 Feb 2025 15:39:13 +0100
> Mauro Carvalho Chehab  wrote:
[...]
> 
> PS: do not respin until I've finish this review.

finished

>  
> > > 
> > > > 
> > > > ---
> > > > v4:
> > > > - added an extra comment for AcpiGhesState structure;
> > > > - patches reordered;
> > > > - no functional changes, just code shift between the patches in this 
> > > > series.
> > > > 
> > > > v3:
> > > > - addressed more nits;
> > > > - hest_add_le now points to the beginning of HEST table;
> > > > - removed HEST from tests/data/acpi;
> > > > - added an extra patch to not use fw_cfg with virt-10.0 for hw_error_le
> > > > 
> > > > v2: 
> > > > - address some nits;
> > > > - improved ags cleanup patch and removed ags.present field;
> > > > - added some missing le*_to_cpu() calls;
> > > > - update date at copyright for new files to 2024-2025;
> > > > - qmp command changed to: inject-ghes-v2-error ans since updated to 
> > > > 10.0;
> > > > - added HEST and DSDT tables after the changes to make check target 
> > > > happy.
> > > >   (two patches: first one whitelisting such tables; second one removing 
> > > > from
> > > >whitelist and updating/adding such tables to tests/data/acpi)
> > > > 
> > > > 
> > > > 
> > > > Mauro Carvalho Chehab (14):
> > > >   acpi/ghes: prepare to change the way HEST offsets are calculated
> > > >   acpi/ghes: add a firmware file with HEST address
> > > >   acpi/ghes: Use HEST table offsets when preparing GHES records
> > > >   acpi/ghes: don't hard-code the number of sources for HEST table
> > > >   acpi/ghes: add a notifier to notify when error data is ready
> > > >   acpi/ghes: create an ancillary acpi_ghes_get_state() function
> > > >   acpi/generic_event_device: Update GHES migration to cover hest addr
> > > >   acpi/generic_event_device: add logic to detect if HEST addr is
> > > > available
> > > >   acpi/generic_event_device: add an APEI error device
> > > >   tests/acpi: virt: allow acpi table changes for a new table: HEST
> > > >   arm/virt: Wire up a GED error device for ACPI / GHES
> > > >   tests/acpi: virt: add a HEST table to aarch64 virt and update DSDT
> > > >   qapi/acpi-hest: add an interface to do generic CPER error injection
> > > >   scripts/ghes_inject: add a script to generate GHES error inject
> > > > 
> > > >  MAINTAINERS   |  10 +
> > > >  hw/acpi/Kconfig   |   5 +
> > > >  hw/acpi/aml-build.c   |  10 +
> > > >  hw/acpi/generic_event_device.c|  43 ++
> > > >  hw/acpi/ghes-stub.c   |   7 +-
> > > >  hw/acpi/ghes.c| 231 --
> > > >  hw/acpi/ghes_cper.c   |  38 +
> > > >  hw/acpi/ghes_cper_stub.c  |  19 +
> > > >  hw/acpi/meson.build   |   2 +
> > > >  hw/arm/virt-acpi-build.c  |  37 +-
> > > >  hw/arm/virt.c |  19 +-
> > > >  hw/core/machine.c |   2 +
> > > >  include/hw/acpi/acpi_dev_interface.h  |   1 +
> > > >  include/hw/acpi/aml-build.h   |   2 +
> > > >  include/hw/acpi/generic_event_device.h|   1 +
> > > >  include/hw/acpi/ghes.h|  54 +-
> > > >  include/hw/arm/virt.h |   2 +
> > > >  qapi/acpi-hest.json   |  35 +
> > > >  qapi/meson.build  |   1 +
> > > >  qapi/qapi-schema.json |   1 +
> > > >  scripts/arm_processor_error.py| 476 
> > > >  scripts/ghes_inject.py|  51 ++
> > > >  scripts/qmp_helper.py | 702 ++
> > > >  target/arm/kvm.c  |   7 +-
> > > >  tests/data/acpi/aarch64/virt/DSDT | Bin 5196 -> 5240 bytes
> > > >  .../data/acpi/aarch64/virt/DSDT.acpihmatvirt  | Bin 5282 -> 5326 bytes
> > > >  tests/data/acpi/aarch64/virt/DSDT.memhp   | Bin 6557 -> 6601 bytes
> > > >  tests/data/acpi/aarch64/virt/DSDT.pxb | Bin 7679 -> 7723 bytes
> > > >  tests/data/acpi/aarch64/virt/DSDT.topology| Bin 5398 -> 5442 bytes
> > > >  29 files changed, 1677 insertions(+), 79 deletions(-)
> > > >  create mode 100644 hw/acpi/ghes_cper.c
> > > >  create mode 100644 hw/acpi/ghes_cper_stub.c
> > > >  create mode 100644 qapi/acpi-hest.json
> > > >  create mode 100644 scripts/arm_processor_error.py
> > > >  create mode 100755 scripts/ghes_inject.py
> > > >  create mode 100755 scripts/qmp_helper.py
> > > >   
> > > 
> >   
>

[PATCH v5 1/3] Add support for emulation of CRC32 instructions

From: Aleksandar Rakic 

Add emulation of MIPS' CRC32 (Cyclic Redundancy Check) instructions.
Reuse zlib crc32() and Linux crc32c().

Enable CRC for mips64r6.

Cherry-picked 4cc974938aee1588f852590509004e340c072940
from https://github.com/MIPS/gnutools-qemu

Signed-off-by: Yongbok Kim 
Signed-off-by: Aleksandar Markovic 
Signed-off-by: Aleksandar Rakic 
Reviewed-by: Aleksandar Rikalo 
---
 target/mips/cpu-defs.c.inc|  10 +-
 target/mips/helper.h  |   2 +
 target/mips/meson.build   |   1 +
 target/mips/tcg/op_helper.c   |  27 +++
 target/mips/tcg/translate.c   |  37 +
 target/mips/tcg/translate.h   |   1 +
 tests/tcg/mips/include/wrappers_mips64r6.h|  35 
 tests/tcg/mips/user/isa/mips64r6/crc/Makefile |  34 
 .../isa/mips64r6/crc/test_mips64r6_crc32b.c   | 154 ++
 .../isa/mips64r6/crc/test_mips64r6_crc32cb.c  | 154 ++
 .../isa/mips64r6/crc/test_mips64r6_crc32cd.c  | 154 ++
 .../isa/mips64r6/crc/test_mips64r6_crc32ch.c  | 154 ++
 .../isa/mips64r6/crc/test_mips64r6_crc32cw.c  | 154 ++
 .../isa/mips64r6/crc/test_mips64r6_crc32d.c   | 154 ++
 .../isa/mips64r6/crc/test_mips64r6_crc32h.c   | 154 ++
 .../isa/mips64r6/crc/test_mips64r6_crc32w.c   | 154 ++
 16 files changed, 1375 insertions(+), 4 deletions(-)
 create mode 100644 tests/tcg/mips/user/isa/mips64r6/crc/Makefile
 create mode 100644 tests/tcg/mips/user/isa/mips64r6/crc/test_mips64r6_crc32b.c
 create mode 100644 tests/tcg/mips/user/isa/mips64r6/crc/test_mips64r6_crc32cb.c
 create mode 100644 tests/tcg/mips/user/isa/mips64r6/crc/test_mips64r6_crc32cd.c
 create mode 100644 tests/tcg/mips/user/isa/mips64r6/crc/test_mips64r6_crc32ch.c
 create mode 100644 tests/tcg/mips/user/isa/mips64r6/crc/test_mips64r6_crc32cw.c
 create mode 100644 tests/tcg/mips/user/isa/mips64r6/crc/test_mips64r6_crc32d.c
 create mode 100644 tests/tcg/mips/user/isa/mips64r6/crc/test_mips64r6_crc32h.c
 create mode 100644 tests/tcg/mips/user/isa/mips64r6/crc/test_mips64r6_crc32w.c

diff --git a/target/mips/cpu-defs.c.inc b/target/mips/cpu-defs.c.inc
index 922fc39138..d93b9d341a 100644
--- a/target/mips/cpu-defs.c.inc
+++ b/target/mips/cpu-defs.c.inc
@@ -756,8 +756,9 @@ const mips_def_t mips_defs[] =
(1 << CP0C3_RXI) | (1 << CP0C3_LPA) | (1 << CP0C3_VInt),
 .CP0_Config4 = MIPS_CONFIG4 | (1U << CP0C4_M) | (3 << CP0C4_IE) |
(1 << CP0C4_AE) | (0xfc << CP0C4_KScrExist),
-.CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_XNP) | (1 << CP0C5_VP) |
-   (1 << CP0C5_LLB) | (1 << CP0C5_MRP) | (3 << CP0C5_GI),
+.CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_CRCP) | (1 << CP0C5_XNP) |
+   (1 << CP0C5_VP) | (1 << CP0C5_LLB) | (1 << CP0C5_MRP) |
+   (3 << CP0C5_GI),
 .CP0_Config5_rw_bitmask = (1 << CP0C5_MSAEn) | (1 << CP0C5_SBRI) |
   (1 << CP0C5_FRE) | (1 << CP0C5_UFE),
 .CP0_LLAddr_rw_bitmask = 0,
@@ -796,8 +797,9 @@ const mips_def_t mips_defs[] =
(1 << CP0C3_RXI) | (1 << CP0C3_LPA) | (1 << CP0C3_VInt),
 .CP0_Config4 = MIPS_CONFIG4 | (1U << CP0C4_M) | (3 << CP0C4_IE) |
(1 << CP0C4_AE) | (0xfc << CP0C4_KScrExist),
-.CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_XNP) | (1 << CP0C5_VP) |
-   (1 << CP0C5_LLB) | (1 << CP0C5_MRP) | (3 << CP0C5_GI),
+.CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_CRCP) | (1 << CP0C5_XNP) |
+   (1 << CP0C5_VP) | (1 << CP0C5_LLB) | (1 << CP0C5_MRP) |
+   (3 << CP0C5_GI),
 .CP0_Config5_rw_bitmask = (1 << CP0C5_MSAEn) | (1 << CP0C5_SBRI) |
   (1 << CP0C5_FRE) | (1 << CP0C5_UFE),
 .CP0_LLAddr_rw_bitmask = 0,
diff --git a/target/mips/helper.h b/target/mips/helper.h
index 0f8462febb..752748d5e6 100644
--- a/target/mips/helper.h
+++ b/target/mips/helper.h
@@ -21,6 +21,8 @@ DEF_HELPER_FLAGS_1(bitswap, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_1(dbitswap, TCG_CALL_NO_RWG_SE, tl, tl)
 #endif
 
+DEF_HELPER_3(crc32, tl, tl, tl, i32)
+DEF_HELPER_3(crc32c, tl, tl, tl, i32)
 DEF_HELPER_FLAGS_4(rotx, TCG_CALL_NO_RWG_SE, tl, tl, i32, i32, i32)
 
 /* microMIPS functions */
diff --git a/target/mips/meson.build b/target/mips/meson.build
index a26d1e1f79..d2d686fc0c 100644
--- a/target/mips/meson.build
+++ b/target/mips/meson.build
@@ -7,6 +7,7 @@ mips_ss.add(files(
   'gdbstub.c',
   'msa.c',
 ))
+mips_ss.add(zlib)
 
 if have_system
   subdir('sysemu')
diff --git a/target/mips/tcg/op_helper.c b/target/mips/tcg/op_helper.c
index 65403f1a87..5fe9cf1360 100644
--- a/target/mips/tcg/op_helper.c
+++ b/target/mips/tcg/op_helper.c
@@ -25,6 +25,8 @@
 #include "exec/exec-all.h"
 #include "exec/memop.h"
 #include "fpu_help

[PATCH v5 2/3] Skip NaN mode check for soft-float

From: Aleksandar Rakic 

Skip NaN mode check for soft-float since NaN mode is irrelevant if an ELF
binary's FPU mode is soft-float, i.e. it doesn't utilize a FPU.

Cherry-picked 63492a56485f6b755fccf7ad623f7a189bfc79b6
from https://github.com/MIPS/gnutools-qemu

Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 linux-user/mips/cpu_loop.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/linux-user/mips/cpu_loop.c b/linux-user/mips/cpu_loop.c
index 462387a073..07c1ebe287 100644
--- a/linux-user/mips/cpu_loop.c
+++ b/linux-user/mips/cpu_loop.c
@@ -304,8 +304,10 @@ void target_cpu_copy_regs(CPUArchState *env, struct 
target_pt_regs *regs)
 if (env->insn_flags & ISA_NANOMIPS32) {
 return;
 }
-if (((info->elf_flags & EF_MIPS_NAN2008) != 0) !=
-((env->active_fpu.fcr31 & (1 << FCR31_NAN2008)) != 0)) {
+if (info->fp_abi != MIPS_ABI_FP_SOFT
+&& ((info->elf_flags & EF_MIPS_NAN2008) != 0) !=
+   ((env->active_fpu.fcr31 & (1 << FCR31_NAN2008)) != 0))
+  {
 if ((env->active_fpu.fcr31_rw_bitmask &
   (1 << FCR31_NAN2008)) == 0) {
 fprintf(stderr, "ELF binary's NaN mode not supported by CPU\n");
-- 
2.34.1

[PATCH v5 3/3] target/mips: Enable MSA ASE using a CLI flag

From: Aleksandar Rakic 

Enable MSA ASE using a CLI flag -cpu ,msa=on.

Signed-off-by: Aleksandar Rakic 
---
 target/mips/cpu.c  | 16 
 target/mips/cpu.h  |  1 +
 target/mips/internal.h |  2 +-
 3 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/target/mips/cpu.c b/target/mips/cpu.c
index d0a43b6d5c..8e12d303de 100644
--- a/target/mips/cpu.c
+++ b/target/mips/cpu.c
@@ -494,8 +494,24 @@ static void mips_cpu_realizefn(DeviceState *dev, Error 
**errp)
 mcc->parent_realize(dev, errp);
 }
 
+static bool mips_get_msa_on(Object *obj, Error **errp)
+{
+MIPSCPU *cpu = MIPS_CPU(obj);
+CPUMIPSState *env = &cpu->env;
+return env->msa_on;
+}
+
+static void mips_set_msa_on(Object *obj, bool value, Error **errp)
+{
+MIPSCPU *cpu = MIPS_CPU(obj);
+CPUMIPSState *env = &cpu->env;
+env->msa_on = value;
+}
+
 static void mips_cpu_initfn(Object *obj)
 {
+object_property_add_bool(obj, "msa", mips_get_msa_on, mips_set_msa_on);
+object_property_set_bool(obj, "msa", false, NULL);
 MIPSCPU *cpu = MIPS_CPU(obj);
 CPUMIPSState *env = &cpu->env;
 MIPSCPUClass *mcc = MIPS_CPU_GET_CLASS(obj);
diff --git a/target/mips/cpu.h b/target/mips/cpu.h
index f6877ece8b..3e636535c6 100644
--- a/target/mips/cpu.h
+++ b/target/mips/cpu.h
@@ -1191,6 +1191,7 @@ typedef struct CPUArchState {
 QEMUTimer *timer; /* Internal timer */
 Clock *count_clock; /* CP0_Count clock */
 target_ulong exception_base; /* ExceptionBase input to the core */
+bool msa_on; /* Enable MSA using a CLI flag -cpu ...,msa=on/off */
 } CPUMIPSState;
 
 /**
diff --git a/target/mips/internal.h b/target/mips/internal.h
index 91c786cff8..bbe2acffe2 100644
--- a/target/mips/internal.h
+++ b/target/mips/internal.h
@@ -399,7 +399,7 @@ static inline void compute_hflags(CPUMIPSState *env)
 }
 }
 if (ase_msa_available(env)) {
-if (env->CP0_Config5 & (1 << CP0C5_MSAEn)) {
+if ((env->CP0_Config5 & (1 << CP0C5_MSAEn)) || (env->msa_on)) {
 env->hflags |= MIPS_HFLAG_MSA;
 }
 }
-- 
2.34.1

[PATCH v5 0/3] Improve Mips target

Hi,

This patch series adds support for emulation of CRC32 instructions for
the Mips target in QEMU, enables CRC for mips64r6, skips NaN mode check
for soft-float, and adds a CLI flag for enabling an MSA feature.

The CRC32 instructions are available in MD00087 Revision 6.06.
Since the disassembly for crc32 is hidden in commit 99029be1c28, the new
version of the patch 1/3 is basically identical to v1, except tests
being added.

Most of the following patches are cherry-picked from the branch
mips_rel/4_1_0/master on the MIPS' repository:
https://github.com/MIPS/gnutools-qemu/
Further details on individual changes are included in the respective
patches.

Kind regards,
Aleksandar Rakic

Re: [PATCH v5 25/36] vfio/migration: Multifd device state transfer support - receive init/cleanup


On 2/19/25 21:34, Maciej S. Szmigiero wrote:

From: "Maciej S. Szmigiero" 

Add support for VFIOMultifd data structure that will contain most of the
receive-side data together with its init/cleanup methods.

Signed-off-by: Maciej S. Szmigiero 
---
  hw/vfio/migration-multifd.c   | 33 +
  hw/vfio/migration-multifd.h   |  8 
  hw/vfio/migration.c   | 29 +++--
  include/hw/vfio/vfio-common.h |  3 +++
  4 files changed, 71 insertions(+), 2 deletions(-)

diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c
index 7328ad8e925c..c2defc0efef0 100644
--- a/hw/vfio/migration-multifd.c
+++ b/hw/vfio/migration-multifd.c
@@ -41,6 +41,9 @@ typedef struct VFIOStateBuffer {
  size_t len;
  } VFIOStateBuffer;
  
+typedef struct VFIOMultifd {

+} VFIOMultifd;
+
  static void vfio_state_buffer_clear(gpointer data)
  {
  VFIOStateBuffer *lb = data;
@@ -84,8 +87,38 @@ static VFIOStateBuffer 
*vfio_state_buffers_at(VFIOStateBuffers *bufs, guint idx)
  return &g_array_index(bufs->array, VFIOStateBuffer, idx);
  }
  
+VFIOMultifd *vfio_multifd_new(void)

+{
+VFIOMultifd *multifd = g_new(VFIOMultifd, 1);
+
+return multifd;
+}
+
+void vfio_multifd_free(VFIOMultifd *multifd)
+{
+g_free(multifd);
+}
+
  bool vfio_multifd_transfer_supported(void)
  {
  return multifd_device_state_supported() &&
  migrate_send_switchover_start();
  }
+
+bool vfio_multifd_transfer_enabled(VFIODevice *vbasedev)
+{
+return false;
+}
+
+bool vfio_multifd_transfer_setup(VFIODevice *vbasedev, Error **errp)
+{
+if (vfio_multifd_transfer_enabled(vbasedev) &&
+!vfio_multifd_transfer_supported()) {
+error_setg(errp,
+   "%s: Multifd device transfer requested but unsupported in the 
current config",
+   vbasedev->name);
+return false;
+}
+
+return true;
+}
diff --git a/hw/vfio/migration-multifd.h b/hw/vfio/migration-multifd.h
index 8fe004c1da81..1eefba3b2eed 100644
--- a/hw/vfio/migration-multifd.h
+++ b/hw/vfio/migration-multifd.h
@@ -12,6 +12,14 @@
  
  #include "hw/vfio/vfio-common.h"
  
+typedef struct VFIOMultifd VFIOMultifd;

+
+VFIOMultifd *vfio_multifd_new(void);
+void vfio_multifd_free(VFIOMultifd *multifd);
+
  bool vfio_multifd_transfer_supported(void);
+bool vfio_multifd_transfer_enabled(VFIODevice *vbasedev);
+
+bool vfio_multifd_transfer_setup(VFIODevice *vbasedev, Error **errp);
  
  #endif

diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 7b79be6ad293..4311de763885 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -674,15 +674,40 @@ static void vfio_save_state(QEMUFile *f, void *opaque)
  static int vfio_load_setup(QEMUFile *f, void *opaque, Error **errp)
  {
  VFIODevice *vbasedev = opaque;
+VFIOMigration *migration = vbasedev->migration;
+int ret;
+
+if (!vfio_multifd_transfer_setup(vbasedev, errp)) {
+return -EINVAL;
+}
+
+ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RESUMING,
+   migration->device_state, errp);
+if (ret) {
+return ret;
+}
  
-return vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RESUMING,

-vbasedev->migration->device_state, errp);
+if (vfio_multifd_transfer_enabled(vbasedev)) {
+assert(!migration->multifd);
+migration->multifd = vfio_multifd_new();


When called from vfio_load_setup(), I think vfio_multifd_transfer_setup()
should allocate migration->multifd at the same time. It would simplify
the setup to one step. Maybe we could add a bool parameter ? because,
IIRC, you didn't like the idea of allocating it always, that is in
vfio_save_setup() too.

For symmetry, could vfio_save_cleanup() call vfio_multifd_cleanup() too ?
a setup implies a cleanup.

Thanks,

C.



+}
+
+return 0;
+}
+
+static void vfio_multifd_cleanup(VFIODevice *vbasedev)
+{
+VFIOMigration *migration = vbasedev->migration;
+
+g_clear_pointer(&migration->multifd, vfio_multifd_free);
  }
  
  static int vfio_load_cleanup(void *opaque)

  {
  VFIODevice *vbasedev = opaque;
  
+vfio_multifd_cleanup(vbasedev);

+
  vfio_migration_cleanup(vbasedev);
  trace_vfio_load_cleanup(vbasedev->name);
  
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h

index 64ee3b1a2547..ab110198bd6b 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -78,6 +78,8 @@ typedef struct VFIORegion {
  uint8_t nr; /* cache the region number for debug */
  } VFIORegion;
  
+typedef struct VFIOMultifd VFIOMultifd;

+
  typedef struct VFIOMigration {
  struct VFIODevice *vbasedev;
  VMChangeStateEntry *vm_state;
@@ -89,6 +91,7 @@ typedef struct VFIOMigration {
  uint64_t mig_flags;
  uint64_t precopy_init_size;
  uint64_t precopy_dirty_size;
+VFIOMultifd *multifd;
  bool initial_data_sent;
  
  bool e

Re: [PATCH 1/2] vfio: Make vfio-pci available on 64-bit host platforms only

2025-02-26 Thread BALATON Zoltan


On Wed, 26 Feb 2025, Cédric Le Goater wrote:

On 2/26/25 15:12, BALATON Zoltan wrote:

On Wed, 26 Feb 2025, Cédric Le Goater wrote:

VFIO PCI never worked on PPC32 nor ARM, S390x is 64-bit, it might have
worked on i386 long ago but we have no plans to further support VFIO
on any 32-bit host platforms. Restrict to 64-bit host platforms.

Cc: Harsh Prateek Bora 
Cc: Tony Krowiak 
Cc: Eric Farman 
Cc: Eric Auger 
Signed-off-by: Cédric Le Goater 
---
hw/vfio/Kconfig | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/vfio/Kconfig b/hw/vfio/Kconfig
index 
7cdba0560aa821c88d3420b36f86020575834202..6ed825429a9151fcdff33e95d1a310210689b258 
100644

--- a/hw/vfio/Kconfig
+++ b/hw/vfio/Kconfig
@@ -7,7 +7,7 @@ config VFIO_PCI
    default y
    select VFIO
    select EDID
-    depends on LINUX && PCI
+    depends on LINUX && PCI && (AARCH64 || PPC64 || X86_64 || S390X)


Are these defined for the host or target? 


host.


Where are these defined? I thought compiling qemu-system-ppc on x64_64 or 
aarch64 would have PPC defined not X86_64 or AARCH64 but I could well be 
missing something.


I see PPC is defined in target/ppc/Kconfig so I think these mark the target 
not the host. Vfio-pci works with qemu-system-ppc 


Ah ! I am surprised. Which host and QEMU machine please ?


I've seen people do this on x86_64 host with pegasos2, amigaone and mac99 
running 32 bit guests (AmigaOS and MacOS). Some people running older 32 
bit Windows versions on pc machine might also use this.


and we are trying to use it for GPU pass through for 32 bit PPC guests. 
Please keep that enabled.


As per commit 6d701c9bac1d3571e9ad511e01b27df7237f0b13 "meson: Deprecate
32-bit host support", support will be fully removed in 2 releases and
it doesn't need to be addressed by VFIO.


That's about removing support for compiling QEMU on 32 bit host OSes not 
using 32 bit guests with qemu-system-ppc and qemu-system-i386 isn't it?


Regards,
BALATON Zoltan

Re: [PATCH v5 32/36] vfio/migration: Make x-migration-multifd-transfer VFIO property mutable


On 2/19/25 21:34, Maciej S. Szmigiero wrote:

From: "Maciej S. Szmigiero" 

DEFINE_PROP_ON_OFF_AUTO() property isn't runtime-mutable so using it
would mean that the source VM would need to decide upfront at startup
time whether it wants to do a multifd device state transfer at some
point.

Source VM can run for a long time before being migrated so it is
desirable to have a fallback mechanism to the old way of transferring
VFIO device state if it turns to be necessary.

This brings this property to the same mutability level as ordinary
migration parameters, which too can be adjusted at the run time.

Signed-off-by: Maciej S. Szmigiero 
---
  hw/vfio/pci.c | 12 +---
  1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 184ff882f9d1..9111805ae06c 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3353,6 +3353,8 @@ static void vfio_instance_init(Object *obj)
  pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
  }
  
+static PropertyInfo qdev_prop_on_off_auto_mutable;


please use another name, like vfio_pci_migration_multifd_transfer_prop.
I wish we could define the property info all at once.

Thanks,

C.



+
  static const Property vfio_pci_dev_properties[] = {
  DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIOPCIDevice, host),
  DEFINE_PROP_UUID_NODEFAULT("vf-token", VFIOPCIDevice, vf_token),
@@ -3377,9 +3379,10 @@ static const Property vfio_pci_dev_properties[] = {
  VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false),
  DEFINE_PROP_ON_OFF_AUTO("enable-migration", VFIOPCIDevice,
  vbasedev.enable_migration, ON_OFF_AUTO_AUTO),
-DEFINE_PROP_ON_OFF_AUTO("x-migration-multifd-transfer", VFIOPCIDevice,
-vbasedev.migration_multifd_transfer,
-ON_OFF_AUTO_AUTO),
+DEFINE_PROP("x-migration-multifd-transfer", VFIOPCIDevice,
+vbasedev.migration_multifd_transfer,
+qdev_prop_on_off_auto_mutable, OnOffAuto,
+.set_default = true, .defval.i = ON_OFF_AUTO_AUTO),
  DEFINE_PROP_BOOL("migration-events", VFIOPCIDevice,
   vbasedev.migration_events, false),
  DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false),
@@ -3475,6 +3478,9 @@ static const TypeInfo vfio_pci_nohotplug_dev_info = {
  
  static void register_vfio_pci_dev_type(void)

  {
+qdev_prop_on_off_auto_mutable = qdev_prop_on_off_auto;
+qdev_prop_on_off_auto_mutable.realized_set_allowed = true;
+
  type_register_static(&vfio_pci_dev_info);
  type_register_static(&vfio_pci_nohotplug_dev_info);
  }

Re: [PATCH v5 33/36] hw/core/machine: Add compat for x-migration-multifd-transfer VFIO property


On 2/19/25 21:34, Maciej S. Szmigiero wrote:

From: "Maciej S. Szmigiero" 

Add a hw_compat entry for recently added x-migration-multifd-transfer VFIO
property.

Signed-off-by: Maciej S. Szmigiero 



Reviewed-by: Cédric Le Goater 

Thanks,

C.



---
  hw/core/machine.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index 21c3bde92f08..d0a87f5ccbaa 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -44,6 +44,7 @@ GlobalProperty hw_compat_9_2[] = {
  { "virtio-mem-pci", "vectors", "0" },
  { "migration", "multifd-clean-tls-termination", "false" },
  { "migration", "send-switchover-start", "off"},
+{ "vfio-pci", "x-migration-multifd-transfer", "off" },
  };
  const size_t hw_compat_9_2_len = G_N_ELEMENTS(hw_compat_9_2);

Re: [PATCH 3/4] cputlb: introduce tlb_flush_other_cpu for reset use

2025-02-26 Thread Richard Henderson


On 2/26/25 06:29, Alex Bennée wrote:

I guess we want something like:


/* tlb_reset() - reset the TLB when the CPU is not running
  * cs: the cpu
  *
  * Only to be used when the CPU is definitely not running
  */

void tlb_reset(CPUState *cs) {
  g_assert(cs->cpu_stopped);

 for (i = 0; i < NB_MMU_MODES; i++) {
 tlb_mmu_flush_locked(&cpu->neg.tlb.d[i], &cpu->neg.tlb.f[i]);
 }
}

?


I like the name, and the separate assert.
I'm not convinced skipping the tlb resize and (especially) accounting is a good 
idea.

I suspect that the tlb_flush_by_mmuidx_async_work should be split vs its 
assert_cpu_is_self, and you just should use that.  I'll note that tcg_cpu_reset_hold and 
tlb_flush_by_mmuidx_async_work both call tcg_flush_jmp_cache, so we've got a double-flush 
in there.


If you don't want to use tlb_flush_by_mmuidx_async_work, I think you need at 
minimum

- take the lock
- tlb_window_reset()
- honor and update cpu->neg.tlb.c.dirty


r~

Re: [PATCH 01/25] tests/functional: move aarch64 GPU test into own file

Richard Henderson  writes:

> On 2/26/25 06:03, Alex Bennée wrote:
>> 'aarch64_virt' : 720,
>> +  'aarch64_virt_gpu' : 720,
>
> Does the split mean that we can reduce the timeout?

My run with all 4 tests takes ~100s but when people --enable-debug and
santisers that expands quickly.

>
>
> r~

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro

Re: [PATCH] hw/misc/npcm_clk: fix buffer-overflow

2025-02-26 Thread Pierrick Bouvier

On 2/26/25 03:50, Peter Maydell wrote:

On Tue, 25 Feb 2025 at 20:57, Pierrick Bouvier
wrote:

On 2/25/25 05:41, Peter Maydell wrote:

(Looking more closely at the cold_reset_values handling
in npcm_gcr.c, that looks not quite right in a different
way; I'll send a reply to that patch email about that.)

It may be a hole in our CI right now.
Would that be interesting for CI to run all tests (check-functional +
check w/o functional) with both ubsan and asan?

We do have at least some ubsan tests in our CI right now
(eg the "clang-system" job). The problem with ubsan coverage
is the usual one that we already have too much CI going on,
and it takes forever and we don't have that much headroom
for adding more jobs.

I understand the problem behind spending more minutes on this.

However, looking at our CI, we already duplicate functional testing a lot:
buildtest.yml:functional-system-alpine:
buildtest.yml:functional-system-ubuntu:
buildtest.yml:functional-system-debian:
buildtest.yml:functional-system-fedora:
buildtest.yml:functional-system-centos:
buildtest.yml:functional-system-opensuse:

Would that hurt so much to have one configuration enabled with ubsan and
asan, which catches *real* bugs, and potential security issues?

Yes, it adds overhead, but it should not be x10. Around x2 to x3.

On github running, running -j2, running all functional tests with
sanitizers takes less than 1 hour, and the build takes the same amount
in time (-j2 as well). Hopefully we have more cores available on our own
runners.

On the asan front, also, yes, coverage would be a good idea.
Here I think we will probably have to gradually ratchet
up the coverage because I'm pretty sure that at the moment
we will find we don't get a clean pass (mostly for "uninteresting"
memory leaks).

Yes, I run with ASAN_OPTIONS=detect_leaks=0, and I deactivate any test
that is flaky.

Two of them related to asan are tcg tests:
- munmap-pthread
- follow-fork-mode
I didn't have time to investigate, so I just removed them in my tree.

At this point, this whole list of tests concerned is:
https://github.com/search?q=repo%3Apbo-linaro%2Fqemu-ci+%22ci+fix%22+author%3Apbo-linaro&type=commits

(I do also usually run a local
ubsan test build when doing my acculumation of patches in
target-arm, but since that's a manual step it is fallible :-))

It's always said that "Maintainer time is precious", shouldn't that be
CI job to catch this?

I guess CI minutes are cheaper than engineer ones those days.

-- PMM

Re: [PATCH 1/2] vfio: Make vfio-pci available on 64-bit host platforms only

2025-02-26 Thread BALATON Zoltan

On Wed, 26 Feb 2025, Cédric Le Goater wrote:

VFIO PCI never worked on PPC32 nor ARM, S390x is 64-bit, it might have
worked on i386 long ago but we have no plans to further support VFIO
on any 32-bit host platforms. Restrict to 64-bit host platforms.

Cc: Harsh Prateek Bora 
Cc: Tony Krowiak 
Cc: Eric Farman 
Cc: Eric Auger 
Signed-off-by: Cédric Le Goater 
---
hw/vfio/Kconfig | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/vfio/Kconfig b/hw/vfio/Kconfig
index 
7cdba0560aa821c88d3420b36f86020575834202..6ed825429a9151fcdff33e95d1a310210689b258
 100644
--- a/hw/vfio/Kconfig
+++ b/hw/vfio/Kconfig
@@ -7,7 +7,7 @@ config VFIO_PCI
default y
select VFIO
select EDID
-depends on LINUX && PCI
+depends on LINUX && PCI && (AARCH64 || PPC64 || X86_64 || S390X)

Are these defined for the host or target? I see PPC is defined in 
target/ppc/Kconfig so I think these mark the target not the host. Vfio-pci 
works with qemu-system-ppc and we are trying to use it for GPU pass 
through for 32 bit PPC guests. Please keep that enabled.

Regards,
BALATON Zoltan

config VFIO_CCW
bool

Re: [PATCH 3/4] cputlb: introduce tlb_flush_other_cpu for reset use

Richard Henderson  writes:

> On 2/25/25 10:46, Alex Bennée wrote:
>> The commit 30933c4fb4 (tcg/cputlb: remove other-cpu capability from
>> TLB flushing) introduced a regression that only shows up when
>> --enable-debug-tcg is used. The main use case of tlb_flush outside of
>> the current_cpu context is for handling reset and CPU creation. Rather
>> than revert the commit introduce a new helper and tweak the
>> documentation to make it clear where it should be used.
>> Signed-off-by: Alex Bennée 
>> ---
>> v2
>>- appraently reset can come from both cpu context and outside
>>- add cpu_common_post_load fixes
>> ---
>>   include/exec/exec-all.h   | 20 
>>   accel/tcg/cputlb.c| 11 +++
>>   accel/tcg/tcg-accel-ops.c |  2 +-
>>   cpu-target.c  |  2 +-
>>   target/i386/machine.c |  2 +-
>>   5 files changed, 30 insertions(+), 7 deletions(-)
>> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
>> index d9045c9ac4..cf030001ca 100644
>> --- a/include/exec/exec-all.h
>> +++ b/include/exec/exec-all.h
>> @@ -64,12 +64,24 @@ void tlb_flush_page_all_cpus_synced(CPUState *src, vaddr 
>> addr);
>>* tlb_flush:
>>* @cpu: CPU whose TLB should be flushed
>>*
>> - * Flush the entire TLB for the specified CPU. Most CPU architectures
>> - * allow the implementation to drop entries from the TLB at any time
>> - * so this is generally safe. If more selective flushing is required
>> - * use one of the other functions for efficiency.
>> + * Flush the entire TLB for the specified current CPU.
>> + *
>> + * Most CPU architectures allow the implementation to drop entries
>> + * from the TLB at any time so this is generally safe. If more
>> + * selective flushing is required use one of the other functions for
>> + * efficiency.
>>*/
>>   void tlb_flush(CPUState *cpu);
>> +/**
>> + * tlb_flush_other_cpu:
>> + * @cpu: CPU whose TLB should be flushed
>> + *
>> + * Flush the entire TLB for a specified CPU. For cross vCPU flushes
>> + * you shuld be using a more selective function. This is really only
>> + * used for flushing CPUs being reset from outside their current
>> + * context.
>> + */
>> +void tlb_flush_other_cpu(CPUState *cpu);
>>   /**
>>* tlb_flush_all_cpus_synced:
>>* @cpu: src CPU of the flush
>> diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
>> index ad158050a1..fc16a576f0 100644
>> --- a/accel/tcg/cputlb.c
>> +++ b/accel/tcg/cputlb.c
>> @@ -417,6 +417,17 @@ void tlb_flush(CPUState *cpu)
>>   tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS);
>>   }
>>   +void tlb_flush_other_cpu(CPUState *cpu)
>> +{
>> +if (qemu_cpu_is_self(cpu)) {
>> +tlb_flush(cpu);
>> +} else {
>> +async_run_on_cpu(cpu,
>> + tlb_flush_by_mmuidx_async_work,
>> + RUN_ON_CPU_HOST_INT(ALL_MMUIDX_BITS));
>> +}
>> +}
>
> I'm not convinced this is necessary.

I guess we want something like:


/* tlb_reset() - reset the TLB when the CPU is not running
 * cs: the cpu
 *
 * Only to be used when the CPU is definitely not running
 */

void tlb_reset(CPUState *cs) {
 g_assert(cs->cpu_stopped);

for (i = 0; i < NB_MMU_MODES; i++) {
tlb_mmu_flush_locked(&cpu->neg.tlb.d[i], &cpu->neg.tlb.f[i]);
}
}

?

>
>> diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c
>> index 6e3f1fa92b..e85d317d34 100644
>> --- a/accel/tcg/tcg-accel-ops.c
>> +++ b/accel/tcg/tcg-accel-ops.c
>> @@ -85,7 +85,7 @@ static void tcg_cpu_reset_hold(CPUState *cpu)
>>   {
>>   tcg_flush_jmp_cache(cpu);
>>   -tlb_flush(cpu);
>> +tlb_flush_other_cpu(cpu);
>>   }
>
> I would really like to believe that at this point, hold phase, the cpu
> is *not* running. Therefore it is safe to zero out the softmmu tlb
> data structures.
>
>> /* mask must never be zero, except for A20 change call */
>> diff --git a/cpu-target.c b/cpu-target.c
>> index 667688332c..8eb1633c02 100644
>> --- a/cpu-target.c
>> +++ b/cpu-target.c
>> @@ -56,7 +56,7 @@ static int cpu_common_post_load(void *opaque, int 
>> version_id)
>>   /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
>>  version_id is increased. */
>>   cpu->interrupt_request &= ~0x01;
>> -tlb_flush(cpu);
>> +tlb_flush_other_cpu(cpu);
>
> Likewise, in post_load, the cpu is *not* running.
>
>> diff --git a/target/i386/machine.c b/target/i386/machine.c
>> index d9d4f25d1a..e66f46758a 100644
>> --- a/target/i386/machine.c
>> +++ b/target/i386/machine.c
>> @@ -401,7 +401,7 @@ static int cpu_post_load(void *opaque, int version_id)
>>   env->dr[7] = dr7 & ~(DR7_GLOBAL_BP_MASK | DR7_LOCAL_BP_MASK);
>>   cpu_x86_update_dr7(env, dr7);
>>   }
>> -tlb_flush(cs);
>> +tlb_flush_other_cpu(cs);
>>   return 0;
>
> Likewise.
>
>
> r~

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro

[PATCH 1/2] target/riscv: Add scontext CSR handling

2025-02-26 Thread Florian Lugou

scontext size is 16 bits on RV32 and 32 bits on RV64, as recommended by
version 1.0 2025-02-21 of the debug specification.

When the Smstateen extension is implemented, accessibility to the
scontext CSR is controlled by bit 57 of the [mh]stateen0 CSRs.

Signed-off-by: Florian Lugou 
---
 target/riscv/cpu.h  |  1 +
 target/riscv/cpu_bits.h |  5 +
 target/riscv/csr.c  | 36 
 target/riscv/debug.c|  1 +
 4 files changed, 43 insertions(+)

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 97713681cb..e47200f409 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -430,6 +430,7 @@ struct CPUArchState {
 target_ulong tdata2[RV_MAX_TRIGGERS];
 target_ulong tdata3[RV_MAX_TRIGGERS];
 target_ulong mcontext;
+target_ulong scontext;
 struct CPUBreakpoint *cpu_breakpoint[RV_MAX_TRIGGERS];
 struct CPUWatchpoint *cpu_watchpoint[RV_MAX_TRIGGERS];
 QEMUTimer *itrigger_timer[RV_MAX_TRIGGERS];
diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
index f97c48a394..add0bb9d0e 100644
--- a/target/riscv/cpu_bits.h
+++ b/target/riscv/cpu_bits.h
@@ -247,6 +247,9 @@
 #define CSR_SIEH0x114
 #define CSR_SIPH0x154
 
+/* Supervisor-Level Sdtrig CSRs (debug) */
+#define CSR_SCONTEXT0x5a8
+
 /* Hpervisor CSRs */
 #define CSR_HSTATUS 0x600
 #define CSR_HEDELEG 0x602
@@ -959,4 +962,6 @@ typedef enum RISCVException {
 #define MCONTEXT64 0x1FFFULL
 #define MCONTEXT32_HCONTEXT0x007F
 #define MCONTEXT64_HCONTEXT0x3FFFULL
+#define SCONTEXT32 0x
+#define SCONTEXT64 0xULL
 #endif
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index afb7544f07..1c1ac8ed67 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -3221,6 +3221,10 @@ static RISCVException write_mstateen0(CPURISCVState 
*env, int csrno,
 wr_mask |= SMSTATEEN0_P1P13;
 }
 
+if (riscv_cpu_cfg(env)->debug) {
+wr_mask |= SMSTATEEN0_HSCONTXT;
+}
+
 if (riscv_cpu_cfg(env)->ext_smaia || riscv_cpu_cfg(env)->ext_smcsrind) {
 wr_mask |= SMSTATEEN0_SVSLCT;
 }
@@ -5053,6 +5057,35 @@ static RISCVException write_mcontext(CPURISCVState *env, 
int csrno,
 return RISCV_EXCP_NONE;
 }
 
+static RISCVException read_scontext(CPURISCVState *env, int csrno,
+target_ulong *val)
+{
+RISCVException ret = smstateen_acc_ok(env, 0, SMSTATEEN0_HSCONTXT);
+if (ret != RISCV_EXCP_NONE) {
+return ret;
+}
+
+*val = env->scontext;
+return RISCV_EXCP_NONE;
+}
+
+static RISCVException write_scontext(CPURISCVState *env, int csrno,
+ target_ulong val)
+{
+bool rv32 = riscv_cpu_mxl(env) == MXL_RV32 ? true : false;
+
+RISCVException ret = smstateen_acc_ok(env, 0, SMSTATEEN0_HSCONTXT);
+if (ret != RISCV_EXCP_NONE) {
+return ret;
+}
+
+/* Spec suggest 16-bit for RV32 and 34-bit for RV64 */
+target_ulong mask = rv32 ? SCONTEXT32 : SCONTEXT64;
+
+env->scontext = val & mask;
+return RISCV_EXCP_NONE;
+}
+
 static RISCVException read_mnscratch(CPURISCVState *env, int csrno,
  target_ulong *val)
 {
@@ -5705,6 +5738,9 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = {
 [CSR_SIEH]   = { "sieh",   aia_smode32, NULL, NULL, rmw_sieh },
 [CSR_SIPH]   = { "siph",   aia_smode32, NULL, NULL, rmw_siph },
 
+/* Supervisor-Level Sdtrig CSRs (debug) */
+[CSR_SCONTEXT]   = { "scontext", debug, read_scontext, write_scontext },
+
 [CSR_HSTATUS] = { "hstatus", hmode,   read_hstatus, write_hstatus,
   .min_priv_ver = PRIV_VERSION_1_12_0},
 [CSR_HEDELEG] = { "hedeleg", hmode,   read_hedeleg, write_hedeleg,
diff --git a/target/riscv/debug.c b/target/riscv/debug.c
index f6241a80be..914a9ce0f8 100644
--- a/target/riscv/debug.c
+++ b/target/riscv/debug.c
@@ -1086,4 +1086,5 @@ void riscv_trigger_reset_hold(CPURISCVState *env)
 }
 
 env->mcontext = 0;
+env->scontext = 0;
 }
-- 
2.43.0

Re: [PATCH 02/10] python: add qapi static analysis tests

2025-02-26 Thread John Snow

On Wed, Feb 26, 2025 at 4:29 AM Markus Armbruster  wrote:

> John Snow  writes:
>
> > On Mon, Feb 24, 2025 at 7:36 AM Markus Armbruster 
> wrote:
> >
> >> John Snow  writes:
> >>
> >> > Update the python tests to also check qapi. No idea why I didn't do
> this
> >> > before. I guess I was counting on moving it under python/ and then
> just
> >> > forgot after that was NACKed. Oops, this turns out to be really easy.
> >> >
> >> > flake8, isort and mypy use the tool configuration from the existing
> >> > python directory. pylint continues to use the special configuration
> >> > located in scripts/qapi/ - that configuration is more permissive. If
> we
> >> > wish to unify the two configurations, that's a separate series and a
> >> > discussion for a later date.
> >> >
> >> > As a result of this patch, one would be able to run any of the
> following
> >> > tests locally from the qemu.git/python directory and have it cover the
> >> > scripts/qapi/ module as well. All of the following options run the
> >> > python tests, static analysis tests, and linter checks; but with
> >> > different combinations of dependencies and interpreters.
> >> >
> >> > - "make check-minreqs" Run tests specifically under our oldest
> supported
> >> >   Python and our oldest supported dependencies. This is the test that
> >> >   runs on GitLab as "check-python-minreqs". This helps ensure we do
> not
> >> >   regress support on older platforms accidentally.
> >> >
> >> > - "make check-tox" Runs the tests under the newest supported
> >> >   dependencies, but under each supported version of Python in turn. At
> >> >   time of writing, this is Python 3.8 to 3.13 inclusive. This test
> helps
> >> >   catch bleeding-edge problems before they become problems for
> developer
> >> >   workstations. This is the GitLab test "check-python-tox" and is an
> >> >   optionally run, may-fail test due to the unpredictable nature of new
> >> >   dependencies being released into the ecosystem that may cause
> >> >   regressions.
> >> >
> >> > - "make check-dev" Runs the tests under the newest supported
> >> >   dependencies using whatever version of Python the user happens to
> have
> >> >   installed. This is a quick convenience check that does not map to
> any
> >> >   particular GitLab test.
> >> >
> >> > (Note! check-dev may be busted on Fedora 41 and bleeding edge versions
> >>
> >> It is for me.
> >>
> >> > of setuptools. That's unrelated to this patch and I'll address it
> >> > separately and soon. Thank you for your patience, --mgmt)
> >>
> >> Which of these tests, if any, run in "make check"?  In CI?
> >>
> >
> > Under "make check", the top-level test in qemu.git, none. I swear on my
> > future grave
>
> "Not today!"
>
> >  I am trying to fix that,
>
> Also not today.  SCNR!
>
> >   but there are barriers to it.
> Adding
> > make check support means installing testing dependencies in the configure
> > venv, which means a slower ./configure invocation. I am trying to figure
> > out how to minimize this penalty for cases where we either do not want
> to,
> > or can't, run the python tests. It's a long story, we can talk about it
> > later.
> >
> > In CI, the "check-minreqs" test will run by default as a must-pass test
> > under the job "check python minreqs".
> >
> > "check-tox" is an optional job in the CI pipeline that is allowed to fail
> > as a warning, due to the nature of this test checking bleeding edge
> > dependencies.
> >
> > All three local invocations run the exact same tests (literally "make
> > check" in the python dir), just under different combinations of
> > dependencies and python versions. "check-minreqs" is more or less the
> > "canonical" one that *must* succeed, but as a Python maintainer I do my
> > best to enforce "check-tox" as well, though it does lag behind.
> >
> > So, this isn't a perfect solution yet but it's certainly much better than
> > carrying around ad-hoc linter shell scripts and attempting to manage the
> > dependencies yourself. At least we all have access to the same
> invocations.
>
> So:
>
> * At some point, we'll integrate whatever we want developers to run into
>   "make check".  Until then:
>
> * Running "make check-dev" is nice and good enough.  CI might find
>   additional problems.  Expected to be rare and no big deal.
>
> * Running "make check-minreqs" locally will get the exact same results
>   as the same test in CI will.  Run if you care.
>
> * "make check-tox" is an early warning system.  Don't run unless you're
>   interested in preventing potential future problems.
>

More or less; though it does test in every supported python interpreter if
you happen to have multiple installed, so it can be a way to catch errors
that exist between minreqs and $current, but it's still generally only a
test that I think you should run if you are touching the Python stuff in a
major way; i.e. if you're sending something that's a PR for *me*, I think
you should run

Re: [PATCH v4 03/14] acpi/ghes: Use HEST table offsets when preparing GHES records

On Fri, 21 Feb 2025 15:35:12 +0100
Mauro Carvalho Chehab  wrote:

> There are two pointers that are needed during error injection:
> 
> 1. The start address of the CPER block to be stored;
> 2. The address of the ack.
s/ack/read_ack/

> 
> It is preferable to calculate them from the HEST table.  This allows
> checking the source ID, the size of the table and the type of the
> HEST error block structures.
> 
> Yet, keep the old code, as this is needed for migration purposes

+ from older QEMU versions

> 
> Signed-off-by: Mauro Carvalho Chehab 
> Reviewed-by: Jonathan Cameron 
> ---
>  hw/acpi/ghes.c | 100 +
>  include/hw/acpi/ghes.h |   2 +-
>  2 files changed, 101 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
> index ba37be9e7022..7efea519f766 100644
> --- a/hw/acpi/ghes.c
> +++ b/hw/acpi/ghes.c
> @@ -41,6 +41,12 @@
>  /* Address offset in Generic Address Structure(GAS) */
>  #define GAS_ADDR_OFFSET 4
>  
> +/*
> + * ACPI spec 1.0b
> + * 5.2.3 System Description Table Header
> + */
> +#define ACPI_DESC_HEADER_OFFSET 36
> +
>  /*
>   * The total size of Generic Error Data Entry
>   * ACPI 6.1/6.2: 18.3.2.7.1 Generic Error Data,
> @@ -61,6 +67,30 @@
>   */
>  #define ACPI_GHES_GESB_SIZE 20
>  
> +/*
> + * See the memory layout map at docs/specs/acpi_hest_ghes.rst.
> + */
> +
> +/*
> + * ACPI 6.1: 18.3.2.8 Generic Hardware Error Source version 2
> + * Table 18-344 Generic Hardware Error Source version 2 (GHESv2) Structure
> + */
> +#define HEST_GHES_V2_ENTRY_SIZE  92
> +
> +/*
> + * ACPI 6.1: 18.3.2.7: Generic Hardware Error Source
wrong chapter, read ack can't be in v1 GHES

> + * Table 18-344 Generic Hardware Error Source version 2 (GHESv2) Structure
> + * Read Ack Register
> + */
> +#define GHES_READ_ACK_ADDR_OFF  64
> +
> +/*
> + * ACPI 6.1: 18.3.2.7: Generic Hardware Error Source
> + * Table 18-341 Generic Hardware Error Source Structure
> + * Error Status Address
> + */
> +#define GHES_ERR_STATUS_ADDR_OFF  20
> +
>  /*
>   * Values for error_severity field
>   */
> @@ -412,6 +442,73 @@ static void get_hw_error_offsets(uint64_t ghes_addr,
>  *read_ack_register_addr = ghes_addr + sizeof(uint64_t);
>  }
>  
> +static void get_ghes_source_offsets(uint16_t source_id,
> +uint64_t hest_addr,
> +uint64_t *cper_addr,
> +uint64_t *read_ack_start_addr,
> +Error **errp)
> +{
> +uint64_t hest_err_block_addr, hest_read_ack_addr;
> +uint64_t err_source_entry, error_block_addr;
> +uint32_t num_sources, i;
> +
> +hest_addr += ACPI_DESC_HEADER_OFFSET;
> +
> +cpu_physical_memory_read(hest_addr, &num_sources,
> + sizeof(num_sources));
> +num_sources = le32_to_cpu(num_sources);
> +
> +err_source_entry = hest_addr + sizeof(num_sources);
> +
> +/*
> + * Currently, HEST Error source navigates only for GHESv2 tables
> + */
> +for (i = 0; i < num_sources; i++) {
> +uint64_t addr = err_source_entry;
> +uint16_t type, src_id;
> +
> +cpu_physical_memory_read(addr, &type, sizeof(type));
> +type = le16_to_cpu(type);
> +
> +/* For now, we only know the size of GHESv2 table */
> +if (type != ACPI_GHES_SOURCE_GENERIC_ERROR_V2) {
> +error_setg(errp, "HEST: type %d not supported.", type);
> +return;
> +}
> +
> +/* Compare CPER source address at the GHESv2 structure */
  ^ typo?

> +addr += sizeof(type);
> +cpu_physical_memory_read(addr, &src_id, sizeof(src_id));
> +if (le16_to_cpu(src_id) == source_id) {
> +break;
> +}
> +
> +err_source_entry += HEST_GHES_V2_ENTRY_SIZE;
> +}
> +if (i == num_sources) {
> +error_setg(errp, "HEST: Source %d not found.", source_id);
> +return;
> +}
> +
> +/* Navigate though table address pointers */
   ^ typo
 
> +hest_err_block_addr = err_source_entry + GHES_ERR_STATUS_ADDR_OFF +
> +  GAS_ADDR_OFFSET;
> +
> +cpu_physical_memory_read(hest_err_block_addr, &error_block_addr,
> + sizeof(error_block_addr));
> +error_block_addr = le64_to_cpu(error_block_addr);
> +
> +cpu_physical_memory_read(error_block_addr, cper_addr,
> + sizeof(*cper_addr));
> +*cper_addr = le64_to_cpu(*cper_addr);
> +
> +hest_read_ack_addr = err_source_entry + GHES_READ_ACK_ADDR_OFF +
> + GAS_ADDR_OFFSET;
> +cpu_physical_memory_read(hest_read_ack_addr, read_ack_start_addr,
> + sizeof(*read_ack_start_addr));
> +*read_ack_start_addr = le64_to_cpu(*read_ack_start_addr);
> +}
> +
>  void ghes_record_cper_errors(const void *cper, size_t len

Re: [PATCH 01/25] tests/functional: move aarch64 GPU test into own file

2025-02-26 Thread Thomas Huth


On 26/02/2025 15.03, Alex Bennée wrote:

I want to expand the number of tests to cover a wide range of
configurations. That starts with splitting off from the normal virt
test from which it doesn't really share much code.

Signed-off-by: Alex Bennée 
Message-Id: <20250219150009.1662688-2-alex.ben...@linaro.org>
---

...

+class Aarch64VirtGPUMachine(QemuSystemTest):
+KERNEL_COMMON_COMMAND_LINE = 'printk.time=0 '
+timeout = 360
+
+def wait_for_console_pattern(self, success_message, vm=None):
+wait_for_console_pattern(self, success_message,
+ failure_message='Kernel panic - not syncing',
+ vm=vm)


You could inherit your class from LinuxKernelTest instead, then you would 
get this wait_for_console_pattern() for free.


Anyway,
Reviewed-by: Thomas Huth

Re: [PATCH v4 06/14] acpi/ghes: create an ancillary acpi_ghes_get_state() function

On Fri, 21 Feb 2025 15:35:15 +0100
Mauro Carvalho Chehab  wrote:

> Instead of having a function to check if ACPI is enabled
> (acpi_ghes_present), change its logic to be more generic,
> returing a pointed to AcpiGhesState.
> 
> Such change allows cleanup the ghes GED state code, avoiding
> to read it multiple times, and simplifying the code.
> 
> Signed-off-by: Mauro Carvalho Chehab 
> Reviewed-by: Jonathan Cameron 
> Reviewed-by:  Igor Mammedov 
> ---
>  hw/acpi/ghes-stub.c|  7 ---
>  hw/acpi/ghes.c | 38 ++
>  include/hw/acpi/ghes.h | 14 --
>  target/arm/kvm.c   |  7 +--
>  4 files changed, 27 insertions(+), 39 deletions(-)
> 
> diff --git a/hw/acpi/ghes-stub.c b/hw/acpi/ghes-stub.c
> index 7cec1812dad9..40f660c246fe 100644
> --- a/hw/acpi/ghes-stub.c
> +++ b/hw/acpi/ghes-stub.c
> @@ -11,12 +11,13 @@
>  #include "qemu/osdep.h"
>  #include "hw/acpi/ghes.h"
>  
> -int acpi_ghes_memory_errors(uint16_t source_id, uint64_t physical_address)
> +int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
> +uint64_t physical_address)
>  {
>  return -1;
>  }
>  
> -bool acpi_ghes_present(void)
> +AcpiGhesState *acpi_ghes_get_state(void)
>  {
> -return false;
> +return NULL;
>  }
> diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
> index f2d1cc7369f4..401789259f60 100644
> --- a/hw/acpi/ghes.c
> +++ b/hw/acpi/ghes.c
> @@ -425,10 +425,6 @@ static void get_hw_error_offsets(uint64_t ghes_addr,
>   uint64_t *cper_addr,
>   uint64_t *read_ack_register_addr)
>  {
> -if (!ghes_addr) {
> -return;
> -}
> -
>  /*
>   * non-HEST version supports only one source, so no need to change
>   * the start offset based on the source ID. Also, we can't validate
> @@ -517,27 +513,16 @@ static void get_ghes_source_offsets(uint16_t source_id,
>  NotifierList acpi_generic_error_notifiers =
>  NOTIFIER_LIST_INITIALIZER(error_device_notifiers);
>  
> -void ghes_record_cper_errors(const void *cper, size_t len,
> +void ghes_record_cper_errors(AcpiGhesState *ags, const void *cper, size_t 
> len,
>   uint16_t source_id, Error **errp)
>  {
>  uint64_t cper_addr = 0, read_ack_register_addr = 0, read_ack_register;
> -AcpiGedState *acpi_ged_state;
> -AcpiGhesState *ags;
>  
>  if (len > ACPI_GHES_MAX_RAW_DATA_LENGTH) {
>  error_setg(errp, "GHES CPER record is too big: %zd", len);
>  return;
>  }
>  
> -acpi_ged_state = ACPI_GED(object_resolve_path_type("", TYPE_ACPI_GED,
> -   NULL));
> -if (!acpi_ged_state) {
> -error_setg(errp, "Can't find ACPI_GED object");
> -return;
> -}
> -ags = &acpi_ged_state->ghes_state;
> -
> -
>  if (!ags->use_hest_addr) {
>  get_hw_error_offsets(le64_to_cpu(ags->hw_error_le),
>   &cper_addr, &read_ack_register_addr);
> @@ -546,11 +531,6 @@ void ghes_record_cper_errors(const void *cper, size_t 
> len,
>  &cper_addr, &read_ack_register_addr, errp);
>  }
>  
> -if (!cper_addr) {
> -error_setg(errp, "can not find Generic Error Status Block");
> -return;
> -}
> -
>  cpu_physical_memory_read(read_ack_register_addr,
>   &read_ack_register, sizeof(read_ack_register));
>  
> @@ -576,7 +556,8 @@ void ghes_record_cper_errors(const void *cper, size_t len,
>  notifier_list_notify(&acpi_generic_error_notifiers, NULL);
>  }
>  
> -int acpi_ghes_memory_errors(uint16_t source_id, uint64_t physical_address)
> +int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
> +uint64_t physical_address)
>  {
>  /* Memory Error Section Type */
>  const uint8_t guid[] =
> @@ -602,7 +583,7 @@ int acpi_ghes_memory_errors(uint16_t source_id, uint64_t 
> physical_address)
>  acpi_ghes_build_append_mem_cper(block, physical_address);
>  
>  /* Report the error */
> -ghes_record_cper_errors(block->data, block->len, source_id, &errp);
> +ghes_record_cper_errors(ags, block->data, block->len, source_id, &errp);
>  
>  g_array_free(block, true);
>  
> @@ -614,7 +595,7 @@ int acpi_ghes_memory_errors(uint16_t source_id, uint64_t 
> physical_address)
>  return 0;
>  }
>  
> -bool acpi_ghes_present(void)
> +AcpiGhesState *acpi_ghes_get_state(void)
>  {
>  AcpiGedState *acpi_ged_state;
>  AcpiGhesState *ags;
> @@ -623,11 +604,12 @@ bool acpi_ghes_present(void)
> NULL));
>  
>  if (!acpi_ged_state) {
> -return false;
> +return NULL;
>  }
>  ags = &acpi_ged_state->ghes_state;
> -if (!ags->hw_error_le && !ags->hest_addr_le)
> -return false;
>  
> -return true;
> +if (!ags->hw_error_le && !ags->hest

Re: [PATCH v5 28/36] vfio/migration: Multifd device state transfer support - config loading support


On 2/19/25 21:34, Maciej S. Szmigiero wrote:

From: "Maciej S. Szmigiero" 

Load device config received via multifd using the existing machinery
behind vfio_load_device_config_state().

Also, make sure to process the relevant main migration channel flags.

Signed-off-by: Maciej S. Szmigiero 
---
  hw/vfio/migration-multifd.c   | 47 ++-
  hw/vfio/migration.c   |  8 +-
  include/hw/vfio/vfio-common.h |  2 ++
  3 files changed, 55 insertions(+), 2 deletions(-)

diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c
index b3a88c062769..7200f6f1c2a2 100644
--- a/hw/vfio/migration-multifd.c
+++ b/hw/vfio/migration-multifd.c
@@ -15,6 +15,7 @@
  #include "qemu/lockable.h"
  #include "qemu/main-loop.h"
  #include "qemu/thread.h"
+#include "io/channel-buffer.h"
  #include "migration/qemu-file.h"
  #include "migration-multifd.h"
  #include "trace.h"
@@ -186,7 +187,51 @@ bool vfio_load_state_buffer(void *opaque, char *data, 
size_t data_size,
  
  static int vfio_load_bufs_thread_load_config(VFIODevice *vbasedev)


please modify to return a bool and take a "Error **errp" parameter.


Thanks,

C.



  {
-return -EINVAL;
+VFIOMigration *migration = vbasedev->migration;
+VFIOMultifd *multifd = migration->multifd;
+VFIOStateBuffer *lb;
+g_autoptr(QIOChannelBuffer) bioc = NULL;
+QEMUFile *f_out = NULL, *f_in = NULL;
+uint64_t mig_header;
+int ret;
+
+assert(multifd->load_buf_idx == multifd->load_buf_idx_last);
+lb = vfio_state_buffers_at(&multifd->load_bufs, multifd->load_buf_idx);
+assert(lb->is_present);
+
+bioc = qio_channel_buffer_new(lb->len);
+qio_channel_set_name(QIO_CHANNEL(bioc), "vfio-device-config-load");
+
+f_out = qemu_file_new_output(QIO_CHANNEL(bioc));
+qemu_put_buffer(f_out, (uint8_t *)lb->data, lb->len);
+
+ret = qemu_fflush(f_out);
+if (ret) {
+g_clear_pointer(&f_out, qemu_fclose);
+return ret;
+}
+
+qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL);
+f_in = qemu_file_new_input(QIO_CHANNEL(bioc));
+
+mig_header = qemu_get_be64(f_in);
+if (mig_header != VFIO_MIG_FLAG_DEV_CONFIG_STATE) {
+g_clear_pointer(&f_out, qemu_fclose);
+g_clear_pointer(&f_in, qemu_fclose);
+return -EINVAL;
+}
+
+bql_lock();
+ret = vfio_load_device_config_state(f_in, vbasedev);
+bql_unlock();
+
+g_clear_pointer(&f_out, qemu_fclose);
+g_clear_pointer(&f_in, qemu_fclose);
+if (ret < 0) {
+return ret;
+}
+
+return 0;
  }
  
  static VFIOStateBuffer *vfio_load_state_buffer_get(VFIOMultifd *multifd)

diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 85f54cb22df2..b962309f7c27 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -264,7 +264,7 @@ static int vfio_save_device_config_state(QEMUFile *f, void 
*opaque,
  return ret;
  }
  
-static int vfio_load_device_config_state(QEMUFile *f, void *opaque)

+int vfio_load_device_config_state(QEMUFile *f, void *opaque)
  {
  VFIODevice *vbasedev = opaque;
  uint64_t data;
@@ -728,6 +728,12 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int 
version_id)
  switch (data) {
  case VFIO_MIG_FLAG_DEV_CONFIG_STATE:
  {
+if (vfio_multifd_transfer_enabled(vbasedev)) {
+error_report("%s: got DEV_CONFIG_STATE but doing multifd 
transfer",
+ vbasedev->name);
+return -EINVAL;
+}
+
  return vfio_load_device_config_state(f, opaque);
  }
  case VFIO_MIG_FLAG_DEV_SETUP_STATE:
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index ab110198bd6b..ce2bdea8a2c2 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -298,6 +298,8 @@ void vfio_add_bytes_transferred(unsigned long val);
  bool vfio_device_state_is_running(VFIODevice *vbasedev);
  bool vfio_device_state_is_precopy(VFIODevice *vbasedev);
  
+int vfio_load_device_config_state(QEMUFile *f, void *opaque);

+
  #ifdef CONFIG_LINUX
  int vfio_get_region_info(VFIODevice *vbasedev, int index,
   struct vfio_region_info **info);

Re: [PATCH v5 19/36] vfio/migration: Convert bytes_transferred counter to atomic

2025-02-26 Thread Maciej S. Szmigiero


On 26.02.2025 08:52, Cédric Le Goater wrote:

On 2/19/25 21:34, Maciej S. Szmigiero wrote:

From: "Maciej S. Szmigiero" 

So it can be safety accessed from multiple threads.

This variable type needs to be changed to unsigned long since
32-bit host platforms lack the necessary addition atomics on 64-bit
variables.

Using 32-bit counters on 32-bit host platforms should not be a problem
in practice since they can't realistically address more memory anyway.


Is it useful to have VFIO on 32-bit host platforms ?

If not, VFIO PCI should depend on (AARCH64 || PPC64 || X86_64) and we
could drop this patch. Let's address that independently.


Not sure how much use VFIO gets on 32-bit host platforms,
however totally disabling it on these would be a major functional regression -
at least if taken at its face value.

Especially considering that making it work on 32-bit platform requires
just this tiny variable type change here.


Thanks,

C.


Thanks,
Maciej

Re: [PATCH v5 27/36] vfio/migration: Multifd device state transfer support - load thread


On 2/19/25 21:34, Maciej S. Szmigiero wrote:

From: "Maciej S. Szmigiero" 

Since it's important to finish loading device state transferred via the
main migration channel (via save_live_iterate SaveVMHandler) before
starting loading the data asynchronously transferred via multifd the thread
doing the actual loading of the multifd transferred data is only started
from switchover_start SaveVMHandler.

switchover_start handler is called when MIG_CMD_SWITCHOVER_START
sub-command of QEMU_VM_COMMAND is received via the main migration channel.

This sub-command is only sent after all save_live_iterate data have already
been posted so it is safe to commence loading of the multifd-transferred
device state upon receiving it - loading of save_live_iterate data happens
synchronously in the main migration thread (much like the processing of
MIG_CMD_SWITCHOVER_START) so by the time MIG_CMD_SWITCHOVER_START is
processed all the proceeding data must have already been loaded.

Signed-off-by: Maciej S. Szmigiero 
---
  hw/vfio/migration-multifd.c | 225 
  hw/vfio/migration-multifd.h |   2 +
  hw/vfio/migration.c |  12 ++
  hw/vfio/trace-events|   5 +
  4 files changed, 244 insertions(+)

diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c
index 5d5ee1393674..b3a88c062769 100644
--- a/hw/vfio/migration-multifd.c
+++ b/hw/vfio/migration-multifd.c
@@ -42,8 +42,13 @@ typedef struct VFIOStateBuffer {
  } VFIOStateBuffer;
  
  typedef struct VFIOMultifd {

+QemuThread load_bufs_thread;
+bool load_bufs_thread_running;
+bool load_bufs_thread_want_exit;
+
  VFIOStateBuffers load_bufs;
  QemuCond load_bufs_buffer_ready_cond;
+QemuCond load_bufs_thread_finished_cond;
  QemuMutex load_bufs_mutex; /* Lock order: this lock -> BQL */
  uint32_t load_buf_idx;
  uint32_t load_buf_idx_last;
@@ -179,6 +184,175 @@ bool vfio_load_state_buffer(void *opaque, char *data, 
size_t data_size,
  return true;
  }
  
+static int vfio_load_bufs_thread_load_config(VFIODevice *vbasedev)

+{
+return -EINVAL;
+}



please move to next patch.


+static VFIOStateBuffer *vfio_load_state_buffer_get(VFIOMultifd *multifd)
+{
+VFIOStateBuffer *lb;
+guint bufs_len;


guint:  I guess it's ok to use here. It is not common practice in VFIO.


+
+bufs_len = vfio_state_buffers_size_get(&multifd->load_bufs);
+if (multifd->load_buf_idx >= bufs_len) {
+assert(multifd->load_buf_idx == bufs_len);
+return NULL;
+}
+
+lb = vfio_state_buffers_at(&multifd->load_bufs,
+   multifd->load_buf_idx);


Could be one line. minor.


+if (!lb->is_present) {
+return NULL;
+}
+
+return lb;
+}
+
+static bool vfio_load_state_buffer_write(VFIODevice *vbasedev,
+ VFIOStateBuffer *lb,
+ Error **errp)
+{
+VFIOMigration *migration = vbasedev->migration;
+VFIOMultifd *multifd = migration->multifd;
+g_autofree char *buf = NULL;
+char *buf_cur;
+size_t buf_len;
+
+if (!lb->len) {
+return true;
+}
+
+trace_vfio_load_state_device_buffer_load_start(vbasedev->name,
+   multifd->load_buf_idx);


I thin we can move this trace event to vfio_load_bufs_thread()


+/* lb might become re-allocated when we drop the lock */
+buf = g_steal_pointer(&lb->data);
+buf_cur = buf;
+buf_len = lb->len;
+while (buf_len > 0) {
+ssize_t wr_ret;
+int errno_save;
+
+/*
+ * Loading data to the device takes a while,
+ * drop the lock during this process.
+ */
+qemu_mutex_unlock(&multifd->load_bufs_mutex);
+wr_ret = write(migration->data_fd, buf_cur, buf_len);> +
errno_save = errno;
+qemu_mutex_lock(&multifd->load_bufs_mutex);
+
+if (wr_ret < 0) {
+error_setg(errp,
+   "writing state buffer %" PRIu32 " failed: %d",
+   multifd->load_buf_idx, errno_save);
+return false;
+}
+
+assert(wr_ret <= buf_len);
+buf_len -= wr_ret;
+buf_cur += wr_ret;
+}
+
+trace_vfio_load_state_device_buffer_load_end(vbasedev->name,
+ multifd->load_buf_idx);


and drop this trace event.

In which case, we can modify the parameters of vfio_load_state_buffer_write()
to use directly a 'VFIOMultifd *multifd'and an fd instead of 
"migration->data_fd".


+
+return true;
+}
+
+static bool vfio_load_bufs_thread_want_exit(VFIOMultifd *multifd,
+bool *should_quit)
+{
+return multifd->load_bufs_thread_want_exit || qatomic_read(should_quit);
+}
+
+/*
+ * This thread is spawned by vfio_multifd_switchover_start() which gets
+ * called upon encountering the switchover point marker in main migration
+

[PATCH 18/25] plugins/api: clean-up the includes

Thanks to re-factoring and clean-up work (especially to exec-all) we
no longer need such broad headers for the api.

Reviewed-by: Richard Henderson 
Signed-off-by: Alex Bennée 
Message-Id: <20250225110844.3296991-5-alex.ben...@linaro.org>
---
 plugins/api.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/plugins/api.c b/plugins/api.c
index fa4d495277..c3ba1e98e8 100644
--- a/plugins/api.c
+++ b/plugins/api.c
@@ -39,9 +39,7 @@
 #include "qemu/main-loop.h"
 #include "qemu/plugin.h"
 #include "qemu/log.h"
-#include "qemu/timer.h"
 #include "tcg/tcg.h"
-#include "exec/exec-all.h"
 #include "exec/gdbstub.h"
 #include "exec/target_page.h"
 #include "exec/translation-block.h"
@@ -51,7 +49,6 @@
 #ifndef CONFIG_USER_ONLY
 #include "qapi/error.h"
 #include "migration/blocker.h"
-#include "exec/ram_addr.h"
 #include "qemu/plugin-memory.h"
 #include "hw/boards.h"
 #else
-- 
2.39.5

Re: [PATCH v4 00/14] Change ghes to use HEST-based offsets and add support for error inject