[PULL 2/3] docs/system: Add entry for LoongArch system

2025-03-20 Thread Bibo Mao
Add index entry for LoongArch system and do some small modification
with  LoongArch document with rst syntax.

Signed-off-by: Bibo Mao 
Reviewed-by: Song Gao 
---
 docs/system/loongarch/virt.rst   | 31 ++-
 docs/system/target-loongarch.rst | 19 +++
 docs/system/targets.rst  |  1 +
 3 files changed, 30 insertions(+), 21 deletions(-)
 create mode 100644 docs/system/target-loongarch.rst

diff --git a/docs/system/loongarch/virt.rst b/docs/system/loongarch/virt.rst
index 172fba079e..7845878469 100644
--- a/docs/system/loongarch/virt.rst
+++ b/docs/system/loongarch/virt.rst
@@ -12,14 +12,15 @@ Supported devices
 -
 
 The ``virt`` machine supports:
-- Gpex host bridge
-- Ls7a RTC device
-- Ls7a IOAPIC device
-- ACPI GED device
-- Fw_cfg device
-- PCI/PCIe devices
-- Memory device
-- CPU device. Type: la464.
+
+* Gpex host bridge
+* Ls7a RTC device
+* Ls7a IOAPIC device
+* ACPI GED device
+* Fw_cfg device
+* PCI/PCIe devices
+* Memory device
+* CPU device. Type: la464.
 
 CPU and machine Type
 
@@ -39,13 +40,7 @@ can be accessed by following steps.
 
 .. code-block:: bash
 
-  ./configure --disable-rdma --prefix=/usr \
-  --target-list="loongarch64-softmmu" \
-  --disable-libiscsi --disable-libnfs --disable-libpmem \
-  --disable-glusterfs --enable-libusb --enable-usb-redir \
-  --disable-opengl --disable-xen --enable-spice \
-  --enable-debug --disable-capstone --disable-kvm \
-  --enable-profiler
+  ./configure --target-list="loongarch64-softmmu"
   make -j8
 
 (2) Set cross tools:
@@ -53,9 +48,7 @@ can be accessed by following steps.
 .. code-block:: bash
 
   wget 
https://github.com/loongson/build-tools/releases/download/2022.09.06/loongarch64-clfs-6.3-cross-tools-gcc-glibc.tar.xz
-
   tar -vxf loongarch64-clfs-6.3-cross-tools-gcc-glibc.tar.xz  -C /opt
-
   export PATH=/opt/cross-tools/bin:$PATH
   export LD_LIBRARY_PATH=/opt/cross-tools/lib:$LD_LIBRARY_PATH
   export 
LD_LIBRARY_PATH=/opt/cross-tools/loongarch64-unknown-linux-gnu/lib/:$LD_LIBRARY_PATH
@@ -74,13 +67,9 @@ Note: To build the release version of the bios,  set 
--buildtarget=RELEASE,
 .. code-block:: bash
 
   git clone https://github.com/loongson/linux.git
-
   cd linux
-
   git checkout loongarch-next
-
   make ARCH=loongarch CROSS_COMPILE=loongarch64-unknown-linux-gnu- 
loongson3_defconfig
-
   make ARCH=loongarch CROSS_COMPILE=loongarch64-unknown-linux-gnu- -j32
 
 Note: The branch of linux source code is loongarch-next.
diff --git a/docs/system/target-loongarch.rst b/docs/system/target-loongarch.rst
new file mode 100644
index 00..316c604b91
--- /dev/null
+++ b/docs/system/target-loongarch.rst
@@ -0,0 +1,19 @@
+.. _LoongArch-System-emulator:
+
+LoongArch System emulator
+-
+
+QEMU can emulate loongArch 64 bit systems via the
+``qemu-system-loongarch64`` binary. Only one machine type ``virt`` is
+supported.
+
+When using KVM as accelerator, QEMU can emulate la464 cpu model. And when
+using the default cpu model with TCG as accelerator, QEMU will emulate a
+subset of la464 cpu features that should be enough to run distributions
+built for the la464.
+
+Board-specific documentation
+
+
+.. toctree::
+   loongarch/virt
diff --git a/docs/system/targets.rst b/docs/system/targets.rst
index 224fadae71..38e2418801 100644
--- a/docs/system/targets.rst
+++ b/docs/system/targets.rst
@@ -18,6 +18,7 @@ Contents:
 
target-arm
target-avr
+   target-loongarch
target-m68k
target-mips
target-ppc
-- 
2.43.5




[PULL 1/3] host/include/loongarch64: Fix inline assembly compatibility with Clang

2025-03-20 Thread Bibo Mao
From: Yao Zi 

Clang on LoongArch only accepts fp register names in the dollar-prefixed
form, while GCC allows omitting the dollar. Change registers in ASM
clobbers to the dollar-prefixed form to make user emulators buildable
with Clang on loongarch64. No functional change invovled.

Cc: qemu-sta...@nongnu.org
Fixes: adc8467e697 ("host/include/loongarch64: Add atomic16 load and store")
Signed-off-by: Yao Zi 
Reviewed-by: Richard Henderson 
Reviewed-by: Bibo Mao 
Signed-off-by: Bibo Mao 
---
 host/include/loongarch64/host/atomic128-ldst.h.inc| 4 ++--
 host/include/loongarch64/host/bufferiszero.c.inc  | 6 --
 host/include/loongarch64/host/load-extract-al16-al8.h.inc | 2 +-
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/host/include/loongarch64/host/atomic128-ldst.h.inc 
b/host/include/loongarch64/host/atomic128-ldst.h.inc
index 9a4a8f8b9e..754d2143f0 100644
--- a/host/include/loongarch64/host/atomic128-ldst.h.inc
+++ b/host/include/loongarch64/host/atomic128-ldst.h.inc
@@ -28,7 +28,7 @@ static inline Int128 atomic16_read_ro(const Int128 *ptr)
 asm("vld $vr0, %2, 0\n\t"
 "vpickve2gr.d %0, $vr0, 0\n\t"
 "vpickve2gr.d %1, $vr0, 1"
-   : "=r"(l), "=r"(h) : "r"(ptr), "m"(*ptr) : "f0");
+: "=r"(l), "=r"(h) : "r"(ptr), "m"(*ptr) : "$f0");
 
 return int128_make128(l, h);
 }
@@ -46,7 +46,7 @@ static inline void atomic16_set(Int128 *ptr, Int128 val)
 asm("vinsgr2vr.d $vr0, %1, 0\n\t"
 "vinsgr2vr.d $vr0, %2, 1\n\t"
 "vst $vr0, %3, 0"
-   : "=m"(*ptr) : "r"(l), "r"(h), "r"(ptr) : "f0");
+: "=m"(*ptr) : "r"(l), "r"(h), "r"(ptr) : "$f0");
 }
 
 #endif /* LOONGARCH_ATOMIC128_LDST_H */
diff --git a/host/include/loongarch64/host/bufferiszero.c.inc 
b/host/include/loongarch64/host/bufferiszero.c.inc
index 69891eac80..bb2598fdc3 100644
--- a/host/include/loongarch64/host/bufferiszero.c.inc
+++ b/host/include/loongarch64/host/bufferiszero.c.inc
@@ -61,7 +61,8 @@ static bool buffer_is_zero_lsx(const void *buf, size_t len)
 "2:"
 : "=&r"(ret), "+r"(p)
 : "r"(buf), "r"(e), "r"(l)
-: "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "fcc0");
+: "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", "$f8",
+  "$fcc0");
 
 return ret;
 }
@@ -119,7 +120,8 @@ static bool buffer_is_zero_lasx(const void *buf, size_t len)
 "3:"
 : "=&r"(ret), "+r"(p)
 : "r"(buf), "r"(e), "r"(l)
-: "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "fcc0");
+: "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", "$f8",
+  "$fcc0");
 
 return ret;
 }
diff --git a/host/include/loongarch64/host/load-extract-al16-al8.h.inc 
b/host/include/loongarch64/host/load-extract-al16-al8.h.inc
index d1fb59d8af..9528521e7d 100644
--- a/host/include/loongarch64/host/load-extract-al16-al8.h.inc
+++ b/host/include/loongarch64/host/load-extract-al16-al8.h.inc
@@ -31,7 +31,7 @@ static inline uint64_t load_atom_extract_al16_or_al8(void 
*pv, int s)
 asm("vld $vr0, %2, 0\n\t"
 "vpickve2gr.d %0, $vr0, 0\n\t"
 "vpickve2gr.d %1, $vr0, 1"
-   : "=r"(l), "=r"(h) : "r"(ptr_align), "m"(*ptr_align) : "f0");
+: "=r"(l), "=r"(h) : "r"(ptr_align), "m"(*ptr_align) : "$f0");
 
 return (l >> shr) | (h << (-shr & 63));
 }
-- 
2.43.5




[PULL 0/3] loongarch-to-apply queue

2025-03-20 Thread Bibo Mao
The following changes since commit 1dae461a913f9da88df05de6e2020d3134356f2e:

  Update version for v10.0.0-rc0 release (2025-03-18 10:18:14 -0400)

are available in the Git repository at:

  https://gitlab.com/bibo-mao/qemu.git tags/pull-loongarch-20250321

for you to fetch changes up to b8d5503a3ecf8bcf75e4960d04215f71dbfd5dd2:

  target/loongarch: fix bad shift in check_ps() (2025-03-21 11:31:56 +0800)


pull-loongarch-20250321 queue


Bibo Mao (1):
  docs/system: Add entry for LoongArch system

Song Gao (1):
  target/loongarch: fix bad shift in check_ps()

Yao Zi (1):
  host/include/loongarch64: Fix inline assembly compatibility with Clang

 docs/system/loongarch/virt.rst | 31 +++---
 docs/system/target-loongarch.rst   | 19 +
 docs/system/targets.rst|  1 +
 host/include/loongarch64/host/atomic128-ldst.h.inc |  4 +--
 host/include/loongarch64/host/bufferiszero.c.inc   |  6 +++--
 .../loongarch64/host/load-extract-al16-al8.h.inc   |  2 +-
 target/loongarch/internals.h   |  2 +-
 target/loongarch/tcg/csr_helper.c  |  2 +-
 target/loongarch/tcg/tlb_helper.c  | 10 +++
 9 files changed, 44 insertions(+), 33 deletions(-)
 create mode 100644 docs/system/target-loongarch.rst




[PATCH v2 14/30] exec/cpu-all: remove cpu include

2025-03-20 Thread Pierrick Bouvier
Now we made sure important defines are included using their direct
path, we can remove cpu.h from cpu-all.h.

Signed-off-by: Pierrick Bouvier 
---
 include/exec/cpu-all.h | 2 --
 accel/tcg/cpu-exec.c   | 1 +
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index d4d05d82315..da8f5dd10c5 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -21,6 +21,4 @@
 
 #include "exec/cpu-common.h"
 
-#include "cpu.h"
-
 #endif /* CPU_ALL_H */
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index 207416e0212..813113c51ea 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -36,6 +36,7 @@
 #include "exec/log.h"
 #include "qemu/main-loop.h"
 #include "exec/cpu-all.h"
+#include "cpu.h"
 #include "exec/icount.h"
 #include "exec/replay-core.h"
 #include "system/tcg.h"
-- 
2.39.5




[PATCH v2 00/30] single-binary: start make hw/arm/ common

2025-03-20 Thread Pierrick Bouvier
This series focuses on removing compilation units duplication in hw/arm. We
start with this architecture because it should not be too hard to transform it,
and should give us some good hints on the difficulties we'll meet later.

We first start by making changes in global headers to be able to not rely on
specific target defines. In particular, we completely remove cpu-all.h.
We then focus on removing those defines from target/arm/cpu.h.

>From there, we modify build system to create a new hw common library (per base
architecture, "arm" in this case), instead of compiling the same files for every
target.

Finally, we can declare hw/arm/boot.c, and most of the boards as common as a
first step for this part.

- Based-on: 20250317183417.285700-1-pierrick.bouv...@linaro.org
("[PATCH v6 00/18] make system memory API available for common code")
https://lore.kernel.org/qemu-devel/20250317183417.285700-1-pierrick.bouv...@linaro.org/
- Based-on: 20250318213209.2579218-1-richard.hender...@linaro.org
("[PATCH v2 00/42] accel/tcg, codebase: Build once patches")
https://lore.kernel.org/qemu-devel/20250318213209.2579218-1-richard.hender...@linaro.org

v2:
- rebase on top of Richard series
- add target include in hw_common lib
- hw_common_lib uses -DCOMPILE_SYSTEM_VS_USER introduced by Richard series
- remove cpu-all header
- remove BSWAP_NEEDED define
- new tlb-flags header
- Cleanup i386 KVM_HAVE_MCE_INJECTION definition + move KVM_HAVE_MCE_INJECTION
- remove comment about cs_base in target/arm/cpu.h
- updated commit message about registers visibility between aarch32/aarch64
- tried remove ifdefs in target/arm/helper.c but this resulted in more a ugly
  result. So just comment calls for now, as we'll clean this file later.
- make most of the boards in hw/arm common

Pierrick Bouvier (30):
  exec/cpu-all: remove BSWAP_NEEDED
  exec/cpu-all: extract tlb flags defines to exec/tlb-flags.h
  exec/cpu-all: move cpu_copy to linux-user/qemu.h
  include/exec/cpu-all: move compile time check for CPUArchState to
cpu-target.c
  exec/cpu-all: remove system/memory include
  exec/cpu-all: remove exec/page-protection include
  exec/cpu-all: remove tswap include
  exec/cpu-all: remove exec/cpu-interrupt include
  exec/cpu-all: remove exec/cpu-defs include
  exec/cpu-all: remove exec/target_page include
  exec/cpu-all: remove hw/core/cpu.h include
  accel/tcg: fix missing includes for TCG_GUEST_DEFAULT_MO
  accel/tcg: fix missing includes for TARGET_HAS_PRECISE_SMC
  exec/cpu-all: remove cpu include
  exec/cpu-all: transfer exec/cpu-common include to cpu.h headers
  exec/cpu-all: remove this header
  exec/target_page: runtime defintion for TARGET_PAGE_BITS_MIN
  accel/kvm: move KVM_HAVE_MCE_INJECTION define to kvm-all.c
  exec/poison: KVM_HAVE_MCE_INJECTION can now be poisoned
  target/arm/cpu: always define kvm related registers
  target/arm/cpu: flags2 is always uint64_t
  target/arm/cpu: define same set of registers for aarch32 and aarch64
  target/arm/cpu: remove inline stubs for aarch32 emulation
  meson: add common hw files
  hw/arm/boot: make compilation unit hw common
  hw/arm/armv7m: prepare compilation unit to be common
  hw/arm/digic_boards: prepare compilation unit to be common
  hw/arm/xlnx-zynqmp: prepare compilation unit to be common
  hw/arm/xlnx-versal: prepare compilation unit to be common
  hw/arm: make most of the compilation units common

 meson.build |  37 +++-
 accel/tcg/internal-target.h |   1 +
 accel/tcg/tb-internal.h |   1 -
 hw/s390x/ipl.h  |   2 +
 include/exec/cpu_ldst.h |   1 +
 include/exec/exec-all.h |   1 +
 include/exec/poison.h   |   4 +
 include/exec/target_page.h  |   3 +
 include/exec/{cpu-all.h => tlb-flags.h} |  38 +---
 include/hw/core/cpu.h   |   2 +-
 include/qemu/bswap.h|   2 +-
 include/system/kvm.h|   2 -
 linux-user/qemu.h   |   3 +
 linux-user/sparc/target_syscall.h   |   2 +
 linux-user/syscall_defs.h   |   2 +-
 target/alpha/cpu.h  |   4 +-
 target/arm/cpu.h|  40 ++--
 target/arm/internals.h  |   1 +
 target/avr/cpu.h|   4 +-
 target/hexagon/cpu.h|   3 +-
 target/hppa/cpu.h   |   5 +-
 target/i386/cpu.h   |   5 +-
 target/i386/hvf/vmx.h   |   1 +
 target/loongarch/cpu.h  |   4 +-
 target/m68k/cpu.h   |   4 +-
 target/microblaze/cpu.h |   4 +-
 target/mips/cpu.h   |   4 +-
 target/openrisc/cpu.h   |   4 +-
 target/ppc/cpu.h|   4 +-
 target/ppc/mmu-hash32.h |   2 +
 target/ppc/mmu-hash64.h |   2 +
 target/riscv/cpu.h  |   4 +-

Re: [PATCH v6] hw/misc/vmfwupdate: Introduce hypervisor fw-cfg interface support

2025-03-20 Thread Ani Sinha
On Thu, Mar 20, 2025 at 7:24 PM Alexander Graf  wrote:
>
> Hey Gerd,
>
> On 18.03.25 12:11, Gerd Hoffman wrote:
> >Hi,
> >
> >> Maybe not from the user's point of view, but surely for the vmfwupdate
> >> interface design and for the launch measurement calculations.
> >>
> >> When using igvm parameters for the kernel hashes we need to pass on (at
> >> least) two items via vmfwupdate API:  The igvm image itself and the
> >> kernel hashes, so the VMM can fill the parameters for launch.
> >>
> >> I tend to think it makes sense to keep the region list, so we can
> >> actually pass on multiple items if needed, and simply add region flags
> >> to declare that a region is an IGVM image.
> > Went over the interface spec today, here it is.  Changes:
> >
> >   - Moved descriptions into source code comments.
> >   - Added leftovers noticed in recent discussions, such as cpuid page.
> >   - Added capability flags and region flags for IGVM.
> >
> > Open questions:
> >
> >   - Does the idea to use igvm parameters for the kernel hashes makes
> > sense?  Are parameters part of the launch measurement?
> >   - Do we want actually keep the complete interface (and the functional
> > overlap with igvm)?
>
>
> I think if we want to embrace IGVM, we should embrace it fully and make
> it replace the region list. At the end of the day, IGVM is effectively a
> region list plus data.

Are you suggesting that vmfwupdate only accept IGVM as payload? I am
not sure if I like that idea.

>
> How difficult would it be to put up a prototype that uses only IGVM as
> vmfwupdate payload? We can definitely assemble that IGVM in ukify.py or
> as part of the boot stub. Or for the prototype even pre-assemble by hand.
>
>
> Alex
>




Re: [PATCH 05/10] target/ppc: expand comment on FP/VMX/VSX access functions

2025-03-20 Thread Pierrick Bouvier

On 3/19/25 11:22, Alex Bennée wrote:

Mainly as an aid to myself getting confused too many bswaps deep into
the code.

Signed-off-by: Alex Bennée 
---
  target/ppc/cpu.h | 7 ++-
  1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index efab54a068..1e833ade04 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -2906,7 +2906,12 @@ static inline bool lsw_reg_in_range(int start, int 
nregs, int rx)
 (start + nregs > 32 && (rx >= start || rx < start + nregs - 32));
  }
  
-/* Accessors for FP, VMX and VSX registers */

+/*
+ * Access functions for FP, VMX and VSX registers
+ *
+ * The register is stored as a 128 bit host endian value so we need to
+ * take that into account when accessing smaller parts of it.
+ */
  #if HOST_BIG_ENDIAN
  #define VsrB(i) u8[i]
  #define VsrSB(i) s8[i]


Reviewed-by: Pierrick Bouvier 



Re: [PATCH 02/10] gdbstub: introduce target independent gdb register helper

2025-03-20 Thread Pierrick Bouvier

On 3/19/25 11:22, Alex Bennée wrote:

The current helper.h functions rely on hard coded assumptions about
target endianess to use the tswap macros. We also end up double
swapping a bunch of values if the target can run in multiple endianess
modes. Avoid this by getting the target to pass the endianess and size
via a MemOp and fixing up appropriately.

Signed-off-by: Alex Bennée 
---
  include/gdbstub/registers.h | 30 ++
  gdbstub/gdbstub.c   | 22 ++
  2 files changed, 52 insertions(+)
  create mode 100644 include/gdbstub/registers.h

diff --git a/include/gdbstub/registers.h b/include/gdbstub/registers.h
new file mode 100644
index 00..4abc7a6ae7
--- /dev/null
+++ b/include/gdbstub/registers.h
@@ -0,0 +1,30 @@
+/*
+ * GDB Common Register Helpers
+ *
+ * Copyright (c) 2025 Linaro Ltd
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef GDB_REGISTERS_H
+#define GDB_REGISTERS_H
+
+#include "exec/memop.h"
+
+/**
+ * gdb_get_register_value() - get register value for gdb
+ * mo: size and endian MemOp
+ * buf: GByteArray to store in target order
+ * val: pointer to value in host order
+ *
+ * This replaces the previous legacy read functions with a single
+ * function to handle all sizes. Passing @mo allows the target mode to
+ * be taken into account and avoids using hard coded tswap() macros.
+ *
+ * Returns the number of bytes written to the array.
+ */
+int gdb_get_register_value(MemOp op, GByteArray *buf, uint8_t *val);
+
+#endif /* GDB_REGISTERS_H */
+
+
diff --git a/gdbstub/gdbstub.c b/gdbstub/gdbstub.c
index 282e13e163..3d7b1028e4 100644
--- a/gdbstub/gdbstub.c
+++ b/gdbstub/gdbstub.c
@@ -32,6 +32,7 @@
  #include "exec/gdbstub.h"
  #include "gdbstub/commands.h"
  #include "gdbstub/syscalls.h"
+#include "gdbstub/registers.h"
  #ifdef CONFIG_USER_ONLY
  #include "accel/tcg/vcpu-state.h"
  #include "gdbstub/user.h"
@@ -45,6 +46,7 @@
  #include "system/runstate.h"
  #include "exec/replay-core.h"
  #include "exec/hwaddr.h"
+#include "exec/memop.h"
  
  #include "internals.h"
  
@@ -551,6 +553,26 @@ static int gdb_write_register(CPUState *cpu, uint8_t *mem_buf, int reg)

  return 0;
  }
  
+/*

+ * Target helper function to read value into GByteArray, target
+ * supplies the size and target endianess via the MemOp.
+ */
+int gdb_get_register_value(MemOp op, GByteArray *buf, uint8_t *val)
+{
+size_t bytes = memop_size(op);
+
+if (op & MO_BSWAP) {
+for ( int i = bytes ; i > 0; i--) {
+g_byte_array_append(buf, &val[i - 1], 1);
+};
+} else {
+g_byte_array_append(buf, val, bytes);
+}
+
+return bytes;
+}
+
+
  static void gdb_register_feature(CPUState *cpu, int base_reg,
   gdb_get_reg_cb get_reg, gdb_set_reg_cb 
set_reg,
   const GDBFeature *feature)


It could be preferable to set buf with the value, instead of simply 
appending the value. This way, there is no need to return the size, as 
it's contained in buffer size itself.


If we insist on returning the size, it's better to make it a parameter 
(and use a void parameter type), because at the moment, it gives the 
impression the function itself returns the value, which may be confusing.


Re: [PATCH 02/10] gdbstub: introduce target independent gdb register helper

2025-03-20 Thread Pierrick Bouvier

On 3/20/25 12:30, Pierrick Bouvier wrote:

On 3/19/25 11:22, Alex Bennée wrote:

The current helper.h functions rely on hard coded assumptions about
target endianess to use the tswap macros. We also end up double
swapping a bunch of values if the target can run in multiple endianess
modes. Avoid this by getting the target to pass the endianess and size
via a MemOp and fixing up appropriately.

Signed-off-by: Alex Bennée 
---
   include/gdbstub/registers.h | 30 ++
   gdbstub/gdbstub.c   | 22 ++
   2 files changed, 52 insertions(+)
   create mode 100644 include/gdbstub/registers.h

diff --git a/include/gdbstub/registers.h b/include/gdbstub/registers.h
new file mode 100644
index 00..4abc7a6ae7
--- /dev/null
+++ b/include/gdbstub/registers.h
@@ -0,0 +1,30 @@
+/*
+ * GDB Common Register Helpers
+ *
+ * Copyright (c) 2025 Linaro Ltd
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef GDB_REGISTERS_H
+#define GDB_REGISTERS_H
+
+#include "exec/memop.h"
+
+/**
+ * gdb_get_register_value() - get register value for gdb
+ * mo: size and endian MemOp
+ * buf: GByteArray to store in target order
+ * val: pointer to value in host order
+ *
+ * This replaces the previous legacy read functions with a single
+ * function to handle all sizes. Passing @mo allows the target mode to
+ * be taken into account and avoids using hard coded tswap() macros.
+ *
+ * Returns the number of bytes written to the array.
+ */
+int gdb_get_register_value(MemOp op, GByteArray *buf, uint8_t *val);
+
+#endif /* GDB_REGISTERS_H */
+
+
diff --git a/gdbstub/gdbstub.c b/gdbstub/gdbstub.c
index 282e13e163..3d7b1028e4 100644
--- a/gdbstub/gdbstub.c
+++ b/gdbstub/gdbstub.c
@@ -32,6 +32,7 @@
   #include "exec/gdbstub.h"
   #include "gdbstub/commands.h"
   #include "gdbstub/syscalls.h"
+#include "gdbstub/registers.h"
   #ifdef CONFIG_USER_ONLY
   #include "accel/tcg/vcpu-state.h"
   #include "gdbstub/user.h"
@@ -45,6 +46,7 @@
   #include "system/runstate.h"
   #include "exec/replay-core.h"
   #include "exec/hwaddr.h"
+#include "exec/memop.h"
   
   #include "internals.h"
   
@@ -551,6 +553,26 @@ static int gdb_write_register(CPUState *cpu, uint8_t *mem_buf, int reg)

   return 0;
   }
   
+/*

+ * Target helper function to read value into GByteArray, target
+ * supplies the size and target endianess via the MemOp.
+ */
+int gdb_get_register_value(MemOp op, GByteArray *buf, uint8_t *val)
+{
+size_t bytes = memop_size(op);
+
+if (op & MO_BSWAP) {
+for ( int i = bytes ; i > 0; i--) {
+g_byte_array_append(buf, &val[i - 1], 1);
+};
+} else {
+g_byte_array_append(buf, val, bytes);
+}
+
+return bytes;
+}
+
+
   static void gdb_register_feature(CPUState *cpu, int base_reg,
gdb_get_reg_cb get_reg, gdb_set_reg_cb 
set_reg,
const GDBFeature *feature)


It could be preferable to set buf with the value, instead of simply
appending the value. This way, there is no need to return the size, as
it's contained in buffer size itself.

If we insist on returning the size, it's better to make it a parameter
(and use a void parameter type), because at the moment, it gives the
impression the function itself returns the value, which may be confusing.


Seems like it's the existing convention through 
gdb_set_reg_cb/gdb_get_reg_cb, so we have to follow this.


Re: [PATCH 00/10] gdbstub: conversion to runtime endianess helpers

2025-03-20 Thread Pierrick Bouvier

On 3/19/25 11:22, Alex Bennée wrote:

The aim of this work is to get rid of the endian aware helpers in
gdbstub/helpers.h which due to their use of tswap() mean target
gdbstubs need to be built multiple times. While this series doesn't
actually build each stub once it introduces a new helper -
gdb_get_register_value() which takes a MemOp which can describe the
current endian state of the system. This will be a lot easier to
dynamically feed from a helper function.

The most complex example is PPC which has a helper called
ppc_maybe_bswap_register() which was doing this.

This is still an RFC so I'm interested in feedback:

   - is the API sane
   - can we avoid lots of (uint8_t *) casting?


Even though the series has a good intent, the fact we make everything 
"generic" makes that we lose all guarantees we could get by relying on 
static typing, and that we had possibility of mistakes when passing size 
(which happened in patch 4 if I'm correct). And explicit casting comes 
as a *strong* warning about that.


By patch 7, I was really feeling it's not a win vs explicit functions 
per size.


If the goal of the series is to get rid of endian aware helpers, well, 
this can be fixed in the helpers themselves, without needing to 
introduce a "generic" size helper. Maybe we are trying to solve two 
different problems here?



   - should we have a reverse helper for setting registers

If this seems like the right approach I can have a go at more of the
frontends later.

There are a few other misc clean-ups I did on the way which might be
worth cherry picking for 10.0 but I'll leave that up to maintainers.

Alex.

Alex Bennée (10):
   include/gdbstub: fix include guard in commands.h
   gdbstub: introduce target independent gdb register helper
   target/arm: convert 32 bit gdbstub to new helper
   target/arm: convert 64 bit gdbstub to new helper
   target/ppc: expand comment on FP/VMX/VSX access functions
   target/ppc: make ppc_maybe_bswap_register static
   target/ppc: convert gdbstub to new helper (!hacky)
   gdbstub: assert earlier in handle_read_all_regs
   include/exec: fix assert in size_memop
   target/microblaze: convert gdbstub to new helper

  include/exec/memop.h|   4 +-
  include/gdbstub/commands.h  |   2 +-
  include/gdbstub/registers.h |  30 ++
  target/ppc/cpu.h|   8 +-
  gdbstub/gdbstub.c   |  24 -
  target/arm/gdbstub.c|  57 +++
  target/arm/gdbstub64.c  |  53 ++
  target/microblaze/gdbstub.c |  44 
  target/ppc/gdbstub.c| 194 
  9 files changed, 257 insertions(+), 159 deletions(-)
  create mode 100644 include/gdbstub/registers.h





Re: Best practice for issuing blocking calls in response to an event

2025-03-20 Thread Stefan Hajnoczi
On Thu, Mar 20, 2025 at 12:34 PM Miles Glenn  wrote:
>
> Hello,
>
> I am attempting to simulate a system with multiple CPU
> architectures.  To do this I am starting a unique QEMU process for each
> CPU architecture that is needed. I'm also developing some QEMU code
> that aids in transporting MMIO transactions across the process
> boundaries using sockets.

I have CCed Phil. He has been working on heterogenous target emulation
and might be interested.

>
> The design takes MMIO request messages off of a socket, services the
> request by calling address_space_ldq_be(), then sends a response
> message (containing the requested data) over the same
> socket.  Currently, this is all done inside the socket IOReadHandler
> callback function.

At a high level this is similar to the vfio-user feature where a PCI
device is emulated in a separate process. This also involves sending
messages describing QEMU's MemoryRegion accesses. See the "remote"
machine type in QEMU to look at the code.

>
> This works as long as the targeted register exists in the same QEMU
> process that received the request.  However, If the register exists in
> another QEMU process, then the call to address_space_ldq_be() results
> in another socket message being sent to that QEMU process, requesting
> the data, and then waiting (blocking) for the response message
> containing the data.  In other words, it ends up blocking inside the
> event handler and even though the QEMU process containing the target
> register was able to receive the request and send the response, the
> originator of the request is unable to receive the response until it
> eventually times out and stops blocking.  Once it times out and stops
> blocking, it does receive the response, but now it is too late.
>
> Here's a summary of the stack up to where the code blocks:
>
> IOReadHandler callback
>   calls address_space_ldq_be()
> resolves to mmio read op of a remote device
>   sends request over socket and waits (blocks) for response
>
> So, I'm looking for a way to handle the work of calling
> address_space_ldq_be(), which might block when attempting to read a
> register of a remote device, without blocking inside the IOReadHandler
> callback context.
>
> I've done a lot of searches and reading about how to do this on the web
> and in the QEMU code but it's still not really clear to me how this
> should be done in QEMU.  I've seen a lot about using coroutines to
> handle cases like this. Is that what I should be using here?

The fundamental problem is that address_space_ldq_be() is synchronous,
so there is no way to return back to the caller until the response has
been received.

vfio-user didn't solve this problem. It simply blocks until the
response is received, but it does drop the Big QEMU Lock during this
time so that other vCPU threads can run. For example, see
hw/remote/proxy.c:send_bar_access_msg() and
mpqemu_msg_send_and_await_reply().

QEMU supports nested event loops, but they come with their own set of
gotchas. The way a nested event loop might help here is to send the
request and then call aio_poll() to receive the response in another
IOReadHandler. This way other event loop processing can take place
while waiting in address_space_ldq_be().

The second problem is that this approach where QEMU processes send
requests to each other needs to be implemented carefully to avoid
deadlocks. For example, devices that do DMA could load/store memory
belonging to another device handled by another QEMU. Once there is an
A -> B -> A situation it could deadlock.

Both vfio-user and vhost-user have similar issues with their
bi-directional communication where a device emulation process can send
a message to QEMU while processing a message from QEMU. Deadlock can
be avoided if the code is structured so that QEMU is able to receive
new requests during the time when it is waiting for a response.

Stefan



Re: Raspberry Pi 3B energy consumption

2025-03-20 Thread Peter Maydell
On Thu, 20 Mar 2025 at 20:09, Peter Maydell  wrote:
> If you need WFE to work, that's certainly feasible and something it would
> be nice to see, but potentially quite a bit of work in the guts of QEMU's
> arm emulation. (Basically going to sleep on WFE is easy but then making
> sure that all the events  and situations that need to wake up a WFE is
> tedious. We implement sleep-on-WFI but not sleep-on-WFI because the set

should read "sleep-on-WFI but not sleep-on-WFE", of course. Oops...

> of WFI-wakeup events is rather smaller than the WFE-wakeup events.) It's
> been in the "we really should implement this but since the only downside
> is the host CPUs spinning, we've never got round to it" bucket for years.

-- PMM



RE: [PATCH 22/39] target/hexagon: Implement setprio, resched

2025-03-20 Thread Sid Manning


> -Original Message-
> From: ltaylorsimp...@gmail.com 
> Sent: Thursday, March 20, 2025 2:45 PM
> To: 'Brian Cain' ; qemu-devel@nongnu.org
> Cc: richard.hender...@linaro.org; phi...@linaro.org; Matheus Bernardino
> (QUIC) ; a...@rev.ng; a...@rev.ng; Marco
> Liebel (QUIC) ; alex.ben...@linaro.org; Mark
> Burton (QUIC) ; Sid Manning
> ; Brian Cain 
> Subject: RE: [PATCH 22/39] target/hexagon: Implement setprio, resched
> 
> WARNING: This email originated from outside of Qualcomm. Please be wary
> of any links or attachments, and do not enable macros.
> 
> > -Original Message-
> > From: Brian Cain 
> > Sent: Friday, February 28, 2025 11:28 PM
> > To: qemu-devel@nongnu.org
> > Cc: brian.c...@oss.qualcomm.com; richard.hender...@linaro.org;
> > phi...@linaro.org; quic_mathb...@quicinc.com; a...@rev.ng;
> a...@rev.ng;
> > quic_mlie...@quicinc.com; ltaylorsimp...@gmail.com;
> > alex.ben...@linaro.org; quic_mbur...@quicinc.com;
> sidn...@quicinc.com;
> > Brian Cain 
> > Subject: [PATCH 22/39] target/hexagon: Implement setprio, resched
> >
> > From: Brian Cain 
> >
> > The hardware-assisted scheduler helps manage tasks on the run queue
> > and interrupt steering.
> >
> > This instruction is defined in the Qualcomm Hexagon V71 Programmer's
> > Reference Manual -
> https://docs.qualcomm.com/bundle/publicresource/80-
> > N2040-51_REV_AB_Hexagon_V71_ProgrammerS_Reference_Manual.pdf
> > See §11.9.2 SYSTEM MONITOR.
> 
> See earlier discussion on references to documents.
> 
> >
> > Signed-off-by: Brian Cain 
> > ---
> >  target/hexagon/op_helper.c | 65
> > ++
> >  1 file changed, 65 insertions(+)
> >
> > diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
> > index 0dce133d3a..d0dc4afac7 100644
> > --- a/target/hexagon/op_helper.c
> > +++ b/target/hexagon/op_helper.c
> > @@ -1465,6 +1465,57 @@ void HELPER(stop)(CPUHexagonState *env)
> >  hexagon_stop_thread(env);
> >  }
> >
> > +static inline QEMU_ALWAYS_INLINE void resched(CPUHexagonState
> *env)
> > {
> > +uint32_t schedcfg;
> > +uint32_t schedcfg_en;
> > +int int_number;
> > +CPUState *cs;
> > +uint32_t lowest_th_prio = 0; /* 0 is highest prio */
> > +uint32_t bestwait_reg;
> > +uint32_t best_prio;
> > +
> > +BQL_LOCK_GUARD();
> > +qemu_log_mask(CPU_LOG_INT, "%s: check resched\n", __func__);
> > +schedcfg = arch_get_system_reg(env, HEX_SREG_SCHEDCFG);
> > +schedcfg_en = GET_FIELD(SCHEDCFG_EN, schedcfg);
> > +int_number = GET_FIELD(SCHEDCFG_INTNO, schedcfg);
> > +
> > +if (!schedcfg_en) {
> > +return;
> > +}
> > +
> > +CPU_FOREACH(cs) {
> > +HexagonCPU *thread = HEXAGON_CPU(cs);
> > +CPUHexagonState *thread_env = &(thread->env);
> > +uint32_t th_prio = GET_FIELD(
> > +STID_PRIO, arch_get_system_reg(thread_env, HEX_SREG_STID));
> > +if (!hexagon_thread_is_enabled(thread_env)) {
> > +continue;
> > +}
> > +
> > +lowest_th_prio = (lowest_th_prio > th_prio)
> > +? lowest_th_prio
> > +: th_prio;
> > +}
> > +
> > +bestwait_reg = arch_get_system_reg(env, HEX_SREG_BESTWAIT);
> > +best_prio = GET_FIELD(BESTWAIT_PRIO, bestwait_reg);
> > +
> > +/*
> > + * If the lowest priority thread is lower priority than the
> > + * value in the BESTWAIT register, we must raise the reschedule
> > + * interrupt on the lowest priority thread.
> > + */
> > +if (lowest_th_prio > best_prio) {
> > +qemu_log_mask(CPU_LOG_INT,
> > +"%s: raising resched int %d, cur PC 0x" TARGET_FMT_lx "\n",
> > +__func__, int_number, arch_get_thread_reg(env,
> HEX_REG_PC));
> > +SET_SYSTEM_FIELD(env, HEX_SREG_BESTWAIT, BESTWAIT_PRIO,
> > 0x1ff);
> 
> What is the significance of 0x1ff?  The field is 10 bits, so this isn't 
> setting all
> the bits.
> Should this be lowest_th_prio?
[Sid Manning] 

Hi Taylor,

The value 0x1ff is correct but it does look like BESTWAIT_PRIO is not, it 
should be 9 not 10
target/hexagon/reg_fields_def.h.inc

It looks like it was added in "PATCH 19/38 target/hexagon: Define register 
fields for system regs"
I will make a fixup to that patch and correct the value.

> 
> > +hex_raise_interrupts(env, 1 << int_number, CPU_INTERRUPT_SWI);
> > +}
> > +}
> > +
> 



Re: [PATCH 04/10] target/arm: convert 64 bit gdbstub to new helper

2025-03-20 Thread Pierrick Bouvier

On 3/19/25 11:22, Alex Bennée wrote:

For some of the helpers we need a temporary variable to copy from
although we could add some helpers to return pointers into env in
those cases if we wanted to.

Signed-off-by: Alex Bennée 
---
  target/arm/gdbstub64.c | 53 ++
  1 file changed, 33 insertions(+), 20 deletions(-)

diff --git a/target/arm/gdbstub64.c b/target/arm/gdbstub64.c
index 1a4dbec567..793332af31 100644
--- a/target/arm/gdbstub64.c
+++ b/target/arm/gdbstub64.c
@@ -20,7 +20,7 @@
  #include "qemu/log.h"
  #include "cpu.h"
  #include "internals.h"
-#include "gdbstub/helpers.h"
+#include "gdbstub/registers.h"
  #include "gdbstub/commands.h"
  #include "tcg/mte_helper.h"
  #if defined(CONFIG_USER_ONLY) && defined(CONFIG_LINUX)
@@ -35,15 +35,16 @@ int aarch64_cpu_gdb_read_register(CPUState *cs, GByteArray 
*mem_buf, int n)
  
  if (n < 31) {

  /* Core integer register.  */
-return gdb_get_reg64(mem_buf, env->xregs[n]);
+return gdb_get_register_value(MO_TEUO, mem_buf, (uint8_t *) 
&env->xregs[n]);
  }
  switch (n) {
  case 31:
-return gdb_get_reg64(mem_buf, env->xregs[31]);
+return gdb_get_register_value(MO_TEUO, mem_buf, (uint8_t *) 
&env->xregs[31]);
  case 32:
-return gdb_get_reg64(mem_buf, env->pc);
+return gdb_get_register_value(MO_TEUO, mem_buf, (uint8_t *) &env->pc);


For the 3 occurrences, should it be MO_TEUQ instead?


  case 33:
-return gdb_get_reg32(mem_buf, pstate_read(env));
+uint32_t pstate = pstate_read(env);
+return gdb_get_register_value(MO_TEUL, mem_buf, (uint8_t *) &pstate);
  }
  /* Unknown register.  */
  return 0;
@@ -82,23 +83,27 @@ int aarch64_gdb_get_fpu_reg(CPUState *cs, GByteArray *buf, 
int reg)
  {
  ARMCPU *cpu = ARM_CPU(cs);
  CPUARMState *env = &cpu->env;
+uint32_t fpr;
  
  switch (reg) {

  case 0 ... 31:
  {
  /* 128 bit FP register - quads are in LE order */
  uint64_t *q = aa64_vfp_qreg(env, reg);
-return gdb_get_reg128(buf, q[1], q[0]);
+return gdb_get_register_value(MO_TEUO, buf, (uint8_t *) q);
  }
  case 32:
  /* FPSR */
-return gdb_get_reg32(buf, vfp_get_fpsr(env));
+fpr = vfp_get_fpsr(env);
+break;
  case 33:
  /* FPCR */
-return gdb_get_reg32(buf, vfp_get_fpcr(env));
+fpr = vfp_get_fpcr(env);
+break;
  default:
  return 0;
  }
+return gdb_get_register_value(MO_TEUL, buf, (uint8_t *) &fpr);
  }
  
  int aarch64_gdb_set_fpu_reg(CPUState *cs, uint8_t *buf, int reg)

@@ -132,30 +137,37 @@ int aarch64_gdb_get_sve_reg(CPUState *cs, GByteArray 
*buf, int reg)
  {
  ARMCPU *cpu = ARM_CPU(cs);
  CPUARMState *env = &cpu->env;
+uint32_t fpr;
  
  switch (reg) {

  /* The first 32 registers are the zregs */
  case 0 ... 31:
  {
  int vq, len = 0;
+ARMVectorReg *zreg = &env->vfp.zregs[reg];
+
  for (vq = 0; vq < cpu->sve_max_vq; vq++) {
-len += gdb_get_reg128(buf,
-  env->vfp.zregs[reg].d[vq * 2 + 1],
-  env->vfp.zregs[reg].d[vq * 2]);
+len += gdb_get_register_value(MO_TEUQ, buf,
+  (uint8_t *) &zreg->d[vq * 2 + 1]);
+len += gdb_get_register_value(MO_TEUQ, buf,
+  (uint8_t *) &zreg->d[vq * 2]);
  }
  return len;
  }
  case 32:
-return gdb_get_reg32(buf, vfp_get_fpsr(env));
+fpr = vfp_get_fpsr(env);
+return gdb_get_register_value(MO_TEUL, buf, (uint8_t *) &fpr);
  case 33:
-return gdb_get_reg32(buf, vfp_get_fpcr(env));
+fpr = vfp_get_fpcr(env);
+return gdb_get_register_value(MO_TEUL, buf, (uint8_t *) &fpr);
  /* then 16 predicates and the ffr */
  case 34 ... 50:
  {
  int preg = reg - 34;
  int vq, len = 0;
  for (vq = 0; vq < cpu->sve_max_vq; vq = vq + 4) {
-len += gdb_get_reg64(buf, env->vfp.pregs[preg].p[vq / 4]);
+len += gdb_get_register_value(MO_TEUQ, buf,
+  (uint8_t *) 
&env->vfp.pregs[preg].p[vq / 4]);
  }
  return len;
  }
@@ -165,8 +177,8 @@ int aarch64_gdb_get_sve_reg(CPUState *cs, GByteArray *buf, 
int reg)
   * We report in Vector Granules (VG) which is 64bit in a Z reg
   * while the ZCR works in Vector Quads (VQ) which is 128bit chunks.
   */
-int vq = sve_vqm1_for_el(env, arm_current_el(env)) + 1;
-return gdb_get_reg64(buf, vq * 2);
+uint64_t vq = (sve_vqm1_for_el(env, arm_current_el(env)) + 1) * 2;
+return gdb_get_register_value(MO_TEUL, buf, (uint8_t *) &vq);
  }


Should it be MO_TEUQ instead?


  default:
  /* gdbstub asked for something out our range */
@@ -

RE: [PATCH 28/38] target/hexagon: Initialize htid, modectl regs

2025-03-20 Thread Sid Manning


> -Original Message-
> From: Richard Henderson 
> Sent: Thursday, March 20, 2025 10:34 AM
> To: Sid Manning ; ltaylorsimp...@gmail.com;
> 'Philippe Mathieu-Daudé' ; 'Brian Cain'
> ; qemu-devel@nongnu.org
> Cc: Matheus Bernardino (QUIC) ;
> a...@rev.ng; a...@rev.ng; Marco Liebel (QUIC)
> ; alex.ben...@linaro.org; Mark Burton (QUIC)
> ; Brian Cain 
> Subject: Re: [PATCH 28/38] target/hexagon: Initialize htid, modectl regs
> 
> WARNING: This email originated from outside of Qualcomm. Please be wary
> of any links or attachments, and do not enable macros.
> 
> On 3/19/25 09:08, Sid Manning wrote:
> >
> >
> >> -Original Message-
> >> From: Richard Henderson 
> >> Sent: Thursday, March 13, 2025 2:07 PM
> >> To: ltaylorsimp...@gmail.com; 'Philippe Mathieu-Daudé'
> >> ; 'Brian Cain' ;
> >> qemu- de...@nongnu.org
> >> Cc: Matheus Bernardino (QUIC) ;
> >> a...@rev.ng; a...@rev.ng; Marco Liebel (QUIC)
> >> ; alex.ben...@linaro.org; Mark Burton
> >> (QUIC) ; Sid Manning
> ;
> >> Brian Cain 
> >> Subject: Re: [PATCH 28/38] target/hexagon: Initialize htid, modectl
> >> regs
> >>
> >> WARNING: This email originated from outside of Qualcomm. Please be
> >> wary of any links or attachments, and do not enable macros.
> >>
> >> On 3/13/25 11:47, ltaylorsimp...@gmail.com wrote:
> >>> What we are trying to model is an instance of a Hexagon that has a
> >>> number
> >> of threads and some resources that are shared.  The shared resources
> >> include the TLB and global S registers.  The initial thought was to
> >> tie the shared resources to the thread with cpu_index == 0.  If we
> >> were to model a Qualcomm SoC, there would be multiple ARM cores and
> >> multiple Hexagon instances.  Each Hexagon instance would have
> >> distinct shared resources.  So, you are correct that using cpu_index is not
> going to scale.
> >>>
> >>> What is the recommended way to model this?  I see a "nr_threads"
> >>> field in
> >> CPUCore but no clear way to find the threads.  PPC has some cores
> >> that add a "threads" field.  Should we follow this approach?
> >>
> >> I recommend that the shared resources be modeled as a separate
> >> Object, which is linked via object_property_add_link to all of the cpus 
> >> that
> use it.
> >>
> > [Sid Manning]
> > Hi Richard,
> > An example of shared resources would be the system registers.  They are
> broken down into 2 regions.  Each thread has its own copy of system
> registers 0-15 while registers 16-63 are global.  Right now CPUHexagonState
> contains a pointer, g_sreg which points back to cpu[0]'s state thus keeping
> one copy of the global registers, accesses are done with BQL held to avoid
> race conditions.
> >
> > Your suggestion is to create a new object to represent the set of global
> system registers, I tried this:
> >
> > #define TYPE_HEXAGON_G_SREG "hexagon.global_sreg"
> > OBJECT_DECLARE_SIMPLE_TYPE(HexagonGlobalSREGState,
> HEXAGON_G_SREG)
> > struct HexagonGlobalSREGState {
> >  SysBusDevice parent_obj;
> 
> SysBusDevice is more than you need -- Object is sufficient here.
[Sid Manning] 
Thanks!  Will change that to Object.

> 
> >  uint32_t regs[64];
> > };
> >
> > In our virtual machine init:
> > vms->g_sreg =
> HEXAGON_G_SREG(qdev_new(TYPE_HEXAGON_G_SREG));
> >
> > and
> >   object_property_set_link(OBJECT(cpu), "global-sreg",
> > OBJECT(vms->g_sreg), &error_abort);
> >
> > to attach the global regs to the cpu, but the above doesn't update cpu
> elements the same way calls to qdev_prop_set_uint32 will do,
> object_property_set_link doesn’t error out and returns true.
> 
> Did you add the DEFINE_PROP_LINK to match?  I'd expect something like
> 
>  DEFINE_PROP_LINK("global-sreg", HexagonCPU, g_sreg,
>   TYPE_HEXAGON_G_SREG, HexagonGlobalSREGState *),
> 
> Beyond that, I guess I'd have to see an actual patch to work out what's
> wrong.
[Sid Manning] 
Yes, PROP_LINK above is almost exactly what I added.

Below is a patch representing what I tried.  I hoped that cpu->global_sreg 
would be updated after the call to object_property_set_link but it was not, in 
the patch below I manually set it.

diff --git a/hw/hexagon/sysreg.h b/hw/hexagon/sysreg.h
new file mode 100644
index 00..d7204896cf
--- /dev/null
+++ b/hw/hexagon/sysreg.h
@@ -0,0 +1,47 @@
+/*
+ * Hexagon system reg
+ * FIXME
+ */
+
+#ifndef HW_HEXAGON_HART_H
+#define HW_HEXAGON_HART_H
+#if !defined(CONFIG_USER_ONLY)
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define NUM_SREGS 64
+struct HexagonGlobalSREGState {
+struct Object parent_obj;
+uint32_t regs[NUM_SREGS];
+};
+
+#define TYPE_HEXAGON_G_SREG "hexagon.global_sreg"
+OBJECT_DECLARE_SIMPLE_TYPE(HexagonGlobalSREGState, HEXAGON_G_SREG)
+
+static void hexagon_global_sreg_init(Object *obj)
+{
+HexagonGlobalSREGState *s = HEXAGON_G_SREG(obj);
+/*
+ * The first 16 registers are thread local and should not come from
+ * this structure
+ */
+for (int i = 0; i < 16; i++) {
+s->regs[i] = 0x

Re: [PATCH] contrib/plugins: Install plugins to moddir

2025-03-20 Thread Alex Bennée
Christoph Müllner  writes:

> On Mon, Mar 3, 2025 at 11:24 AM 汪鹏程  wrote:
>>
>> What about plugins under `tests/tcg/plugins/`?
>
> It feels a bit odd to install something from the tests directory.
> If certain plugins in tests/tcg/plugins are of general use (not just
> for testing) then it might be
> reasonable to move them to contrib/plugins.

They are useful in their own right - but the principle use case is to
test the plugin infrastructure.

>
>
>
>> From: "Christoph Müllner"
>> Date: Mon, Mar 3, 2025, 18:09
>> Subject: [External] [PATCH] contrib/plugins: Install plugins to moddir
>> To: , "Alex Bennée", 
>> "Alexandre Iooss", "Mahmoud 
>> Mandour", "Pierrick 
>> Bouvier"
>> Cc: "Wang Pengcheng", "Christoph 
>> Müllner"
>> Currently the built plugins can only be found in the build directory.
>> This patch lists them as installable objects, which will be copied
>> into qemu_moddir with `make install`.
>>
>> Signed-off-by: Christoph Müllner 
>> ---
>>  contrib/plugins/meson.build | 8 ++--
>>  1 file changed, 6 insertions(+), 2 deletions(-)
>>
>> diff --git a/contrib/plugins/meson.build b/contrib/plugins/meson.build
>> index 82c97ca0f5..c25a1871b7 100644
>> --- a/contrib/plugins/meson.build
>> +++ b/contrib/plugins/meson.build
>> @@ -14,11 +14,15 @@ if get_option('plugins')
>>  include_directories: '../../include/qemu',
>>  link_depends: [win32_qemu_plugin_api_lib],
>>  link_args: win32_qemu_plugin_api_link_flags,
>> -dependencies: glib)
>> +dependencies: glib,
>> +install: true,
>> +install_dir: qemu_moddir)
>>  else
>>t += shared_module(i, files(i + '.c'),
>>  include_directories: '../../include/qemu',
>> -dependencies: glib)
>> +dependencies: glib,
>> +install: true,
>> +install_dir: qemu_moddir)
>>  endif
>>endforeach
>>  endif
>> --
>> 2.47.1

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro



Re: [PATCH v3] QIOChannelSocket: Flush zerocopy socket error queue on sendmsg failure due to ENOBUF

2025-03-20 Thread Manish

Hi Daniel, Peter,

Please let me know if this latest patch looks good?


On 17/03/25 7:22 am, Manish Mishra wrote:

We allocate extra metadata SKBs in case of a zerocopy send. This metadata
memory is accounted for in the OPTMEM limit. If there is any error while
sending zerocopy packets or if zerocopy is skipped, these metadata SKBs are
queued in the socket error queue. This error queue is freed when userspace
reads it.

Usually, if there are continuous failures, we merge the metadata into a single
SKB and free another one. As a result, it never exceeds the OPTMEM limit.
However, if there is any out-of-order processing or intermittent zerocopy
failures, this error chain can grow significantly, exhausting the OPTMEM limit.
As a result, all new sendmsg requests fail to allocate any new SKB, leading to
an ENOBUF error. Depending on the amount of data queued before the flush
(i.e., large live migration iterations), even large OPTMEM limits are prone to
failure.

To work around this, if we encounter an ENOBUF error with a zerocopy sendmsg,
we flush the error queue and retry once more.

V2:
   1. Removed the dirty_sync_missed_zero_copy migration stat.
   2. Made the call to qio_channel_socket_flush_internal() from
  qio_channel_socket_writev() non-blocking.

V3:
   1. Add the dirty_sync_missed_zero_copy migration stat again.

Signed-off-by: Manish Mishra 
---
  include/io/channel-socket.h |  5 +++
  io/channel-socket.c | 74 ++---
  2 files changed, 65 insertions(+), 14 deletions(-)

diff --git a/include/io/channel-socket.h b/include/io/channel-socket.h
index ab15577d38..2c48b972e8 100644
--- a/include/io/channel-socket.h
+++ b/include/io/channel-socket.h
@@ -49,6 +49,11 @@ struct QIOChannelSocket {
  socklen_t remoteAddrLen;
  ssize_t zero_copy_queued;
  ssize_t zero_copy_sent;
+/**
+ * This flag indicates whether any new data was successfully sent with
+ * zerocopy since the last qio_channel_socket_flush() call.
+ */
+bool new_zero_copy_sent_success;
  };
  
  
diff --git a/io/channel-socket.c b/io/channel-socket.c

index 608bcf066e..604ca9890d 100644
--- a/io/channel-socket.c
+++ b/io/channel-socket.c
@@ -37,6 +37,12 @@
  
  #define SOCKET_MAX_FDS 16
  
+#ifdef QEMU_MSG_ZEROCOPY

+static int qio_channel_socket_flush_internal(QIOChannel *ioc,
+ bool block,
+ Error **errp);
+#endif
+
  SocketAddress *
  qio_channel_socket_get_local_address(QIOChannelSocket *ioc,
   Error **errp)
@@ -65,6 +71,7 @@ qio_channel_socket_new(void)
  sioc->fd = -1;
  sioc->zero_copy_queued = 0;
  sioc->zero_copy_sent = 0;
+sioc->new_zero_copy_sent_success = FALSE;
  
  ioc = QIO_CHANNEL(sioc);

  qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN);
@@ -566,6 +573,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
  size_t fdsize = sizeof(int) * nfds;
  struct cmsghdr *cmsg;
  int sflags = 0;
+bool zero_copy_flush_pending = TRUE;
  
  memset(control, 0, CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS));
  
@@ -612,9 +620,25 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,

  goto retry;
  case ENOBUFS:
  if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
-error_setg_errno(errp, errno,
- "Process can't lock enough memory for using 
MSG_ZEROCOPY");
-return -1;
+/**
+ * Socket error queueing may exhaust the OPTMEM limit. Try
+ * flushing the error queue once.
+ */
+if (zero_copy_flush_pending) {
+ret = qio_channel_socket_flush_internal(ioc, false, errp);
+if (ret < 0) {
+error_setg_errno(errp, errno,
+ "Zerocopy flush failed");
+return -1;
+}
+zero_copy_flush_pending = FALSE;
+goto retry;
+} else {
+error_setg_errno(errp, errno,
+ "Process can't lock enough memory for "
+ "using MSG_ZEROCOPY");
+return -1;
+}
  }
  break;
  }
@@ -725,8 +749,9 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
  
  
  #ifdef QEMU_MSG_ZEROCOPY

-static int qio_channel_socket_flush(QIOChannel *ioc,
-Error **errp)
+static int qio_channel_socket_flush_internal(QIOChannel *ioc,
+ bool block,
+ Error **errp)
  {
  QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
  struct msghdr msg = {};
@@ -734,7 +759,6 @@ static int qio_channel_soc

[PULL 01/12] ppc/spapr: Fix RTAS stopped state

2025-03-20 Thread Nicholas Piggin
This change takes the CPUPPCState 'quiesced' field added for powernv
hardware CPU core controls (used to stop and start cores), and extends
it to spapr to model the "RTAS stopped" state. This prevents the
schedulers attempting to run stopped CPUs unexpectedly, which can cause
hangs and possibly other unexpected behaviour.

The detail of the problematic situation is this:

A KVM spapr guest boots with all secondary CPUs defined to be in the
"RTAS stopped" state. In this state, the CPU is only responsive to the
start-cpu RTAS call. This behaviour is modeled in QEMU with the
start_powered_off feature, which sets ->halted on secondary CPUs at
boot. ->halted=true looks like an idle / sleep / power-save state which
typically is responsive to asynchronous interrupts, but spapr clears
wake-on-interrupt bits in the LPCR SPR. This more-or-less works.

Commit e8291ec16da8 ("target/ppc: fix timebase register reset state")
recently caused the decrementer to expire sooner at boot, causing a
decrementer exception on secondary CPUs in RTAS stopped state. This
was not a problem on TCG, but KVM limits how a guest can modify LPCR, in
particular it prevents the clearing of wake-on-interrupt bits, and so in
the course of CPU register synchronisation, the LPCR as set by spapr to
model the RTAS stopped state is overwritten with KVM's LPCR value, and
that then causes QEMU's interrupt code to notice the expired decrementer
exception, turn that into an interrupt, and set CPU_INTERRUPT_HARD.

That causes the CPU to be kicked, and the KVM vCPU thread to loop
calling kvm_cpu_exec(). kvm_cpu_exec() calls
kvm_arch_process_async_events(), which on ppc just returns ->halted.
This is still true, so it returns immediately with EXCP_HLT, and the
vCPU never goes to sleep because qemu_wait_io_event() sees
CPU_INTERRUPT_HARD is set. All this while the vCPU holds the bql.  This
causes the boot CPU to eventually lock up when it needs the bql.

So make 'quiesced' represent the "RTAS stopped" state, and have it
explicitly not respond to exceptions (interrupt conditions) rather than
rely on machine register state to model that state. This matches the
powernv quiesced state very well because it essentially turns off the
CPU core via a side-band control unit.

There are still issues with QEMU and KVM idea of LPCR diverging and that
is quite ugly and fragile that should be fixed. spapr should synchronize
its LPCR properly with KVM, and not try to use values that KVM does not
support.

Reported-by: Misbah Anjum N 
Tested-by: Misbah Anjum N 
Signed-off-by: Nicholas Piggin 
---
 hw/ppc/pnv_core.c|  6 +-
 hw/ppc/spapr_cpu_core.c  |  6 ++
 hw/ppc/spapr_rtas.c  |  5 -
 target/ppc/cpu.h | 11 +++
 target/ppc/excp_helper.c |  4 
 5 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
index 99d9644ee3..a33977da18 100644
--- a/hw/ppc/pnv_core.c
+++ b/hw/ppc/pnv_core.c
@@ -248,21 +248,25 @@ static void pnv_core_power10_xscom_write(void *opaque, 
hwaddr addr,
 
 if (val & PPC_BIT(7 + 8 * i)) { /* stop */
 val &= ~PPC_BIT(7 + 8 * i);
-cpu_pause(cs);
 env->quiesced = true;
+ppc_maybe_interrupt(env);
+cpu_pause(cs);
 }
 if (val & PPC_BIT(6 + 8 * i)) { /* start */
 val &= ~PPC_BIT(6 + 8 * i);
 env->quiesced = false;
+ppc_maybe_interrupt(env);
 cpu_resume(cs);
 }
 if (val & PPC_BIT(4 + 8 * i)) { /* sreset */
 val &= ~PPC_BIT(4 + 8 * i);
 env->quiesced = false;
+ppc_maybe_interrupt(env);
 pnv_cpu_do_nmi_resume(cs);
 }
 if (val & PPC_BIT(3 + 8 * i)) { /* clear maint */
 env->quiesced = false;
+ppc_maybe_interrupt(env);
 /*
  * Hardware has very particular cases for where clear maint
  * must be used and where start must be used to resume a
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index 0671d9e44b..faf9170ba6 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -37,6 +37,9 @@ static void spapr_reset_vcpu(PowerPCCPU *cpu)
 
 cpu_reset(cs);
 
+env->quiesced = true; /* set "RTAS stopped" state. */
+ppc_maybe_interrupt(env);
+
 /*
  * "PowerPC Processor binding to IEEE 1275" defines the initial MSR state
  * as 32bit (MSR_SF=0) with MSR_ME=1 and MSR_FP=1 in "8.2.1. Initial
@@ -98,6 +101,9 @@ void spapr_cpu_set_entry_state(PowerPCCPU *cpu, target_ulong 
nip,
 CPU(cpu)->halted = 0;
 /* Enable Power-saving mode Exit Cause exceptions */
 ppc_store_lpcr(cpu, env->spr[SPR_LPCR] | pcc->lpcr_pm);
+
+env->quiesced = false; /* clear "RTAS stopped" state. */
+ppc_maybe_interrupt(env);
 }
 
 /*
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
inde

[PULL 04/12] ppc/xive2: Fix logical / bitwise comparison typo

2025-03-20 Thread Nicholas Piggin
The comparison as written is always false (perhaps confusingly, because
the functions/macros are not really booleans but return 0 or the tested
bit value). Change to use logical-and.

Resolves: Coverity CID 1593721
Reviewed-by: Cédric Le Goater 
Signed-off-by: Nicholas Piggin 
---
 hw/intc/xive2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/intc/xive2.c b/hw/intc/xive2.c
index 311b42e15d..7d584dfafa 100644
--- a/hw/intc/xive2.c
+++ b/hw/intc/xive2.c
@@ -1344,7 +1344,7 @@ static void xive2_router_end_notify(Xive2Router *xrtr, 
uint8_t end_blk,
 return;
 }
 
-if (xive2_end_is_crowd(&end) & !xive2_end_is_ignore(&end)) {
+if (xive2_end_is_crowd(&end) && !xive2_end_is_ignore(&end)) {
 qemu_log_mask(LOG_GUEST_ERROR,
   "XIVE: invalid END, 'crowd' bit requires 'ignore' 
bit\n");
 return;
-- 
2.47.1




[PULL 02/12] ppc/xive: Fix typo in crowd block level calculation

2025-03-20 Thread Nicholas Piggin
I introduced this bug when "tidying" the original patch, not Frederic.
Paper bag for me.

Fixes: 9cb7f6ebed60 ("ppc/xive2: Support group-matching when looking for 
target")
Signed-off-by: Nicholas Piggin 
---
 hw/intc/xive.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/intc/xive.c b/hw/intc/xive.c
index c77df2c1f8..e86f274932 100644
--- a/hw/intc/xive.c
+++ b/hw/intc/xive.c
@@ -1686,7 +1686,7 @@ static uint8_t xive_get_group_level(bool crowd, bool 
ignore,
  * Supported crowd sizes are 2^1, 2^2, and 2^4. 2^3 is not supported.
  * HW will encode level 4 as the value 3.  See xive2_pgofnext().
  */
-switch (level) {
+switch (blk) {
 case 1:
 case 2:
 break;
-- 
2.47.1




[PULL 05/12] ppc/spapr: Fix possible pa_features memory overflow

2025-03-20 Thread Nicholas Piggin
Coverity reports a possible memory overflow in spapr_dt_pa_features().
This should not be a true bug since DAWR1 cap is only be true for
CPU_POWERPC_LOGICAL_3_10. Add an assertion to ensure any bug there is
caught.

Resolves: Coverity CID 1593722
Fixes: 5f361ea187ba ("ppc: spapr: Enable 2nd DAWR on Power10 pSeries machine")
Reviewed-By: Shivaprasad G Bhat 
Reviewed-by: Cédric Le Goater 
Signed-off-by: Nicholas Piggin 
---
 hw/ppc/spapr.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index a415e51d07..9865d7147f 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -296,6 +296,7 @@ static void spapr_dt_pa_features(SpaprMachineState *spapr,
 pa_features[40 + 2] &= ~0x80; /* Radix MMU */
 }
 if (spapr_get_cap(spapr, SPAPR_CAP_DAWR1)) {
+g_assert(pa_size > 66);
 pa_features[66] |= 0x80;
 }
 
-- 
2.47.1




[PULL 12/12] target/ppc: Fix e200 duplicate SPRs

2025-03-20 Thread Nicholas Piggin
DSRR0/1 registers are in the BookE ISA not e200 specific, so
remove the duplicate e200 register definitions.

Cc: Roman Kapl 
Cc: qemu-sta...@nongnu.org
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2768
Fixes: 0e3bf4890906 ("ppc: add DBCR based debugging")
Signed-off-by: Nicholas Piggin 
---
 target/ppc/cpu_init.c | 8 
 1 file changed, 8 deletions(-)

diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index 8b590e7f17..7decc09aec 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -2744,14 +2744,6 @@ static void init_proc_e200(CPUPPCState *env)
  SPR_NOACCESS, SPR_NOACCESS,
  &spr_read_generic, &spr_write_generic,
  0x); /* TOFIX */
-spr_register(env, SPR_BOOKE_DSRR0, "DSRR0",
- SPR_NOACCESS, SPR_NOACCESS,
- &spr_read_generic, &spr_write_generic,
- 0x);
-spr_register(env, SPR_BOOKE_DSRR1, "DSRR1",
- SPR_NOACCESS, SPR_NOACCESS,
- &spr_read_generic, &spr_write_generic,
- 0x);
 
 init_tlbs_emb(env);
 init_excp_e200(env, 0xUL);
-- 
2.47.1




Re: [PATCH 1/1] hw/intc/aspeed: Fix IRQ handler mask check

2025-03-20 Thread Cédric Le Goater

Hello Steven,

On 3/21/25 03:54, Steven Lee wrote:

Hi Cédric,


-Original Message-
From: Cédric Le Goater 
Sent: Thursday, March 20, 2025 11:29 PM
To: Steven Lee ; Peter Maydell
; Troy Lee ; Jamin Lin
; Andrew Jeffery
; Joel Stanley ; open
list:ASPEED BMCs ; open list:All patches CC here

Cc: Troy Lee ; long...@lenovo.com; Yunlin Tang

Subject: Re: [PATCH 1/1] hw/intc/aspeed: Fix IRQ handler mask check

Hello Steven,

On 3/20/25 10:25, Steven Lee wrote:

Updated the IRQ handler mask check to AND with select variable.
This ensures that the interrupt service routine is correctly triggered
for the interrupts within the same irq group.

For example, both `eth0` and the debug UART are handled in `GICINT132`.
Without this fix, the debug console may hang if the `eth0` ISR is not
handled.

Signed-off-by: Steven Lee 
Change-Id: Ic3609eb72218dfd68be6057d78b8953b18828709
---


I think the issue was introduced by the initial commit :

d831c5fd8682 ("aspeed/intc: Add AST2700 support")

Is that correct ?



Yes, and the implementation is based on commit 38ba38d (hw/intc/aspeed: Add 
Support for AST2700 INTCIO Controller).

Additionally, I sent a patch series for AST27x0 multi-SoC support on March 13th. However, I forgot to append the v2 tag to the series. 


That's OK.

We are in -rc phase of the QEMU 10.0 cycle, so I am looking at fixes
first.


Should I resend it with the correct version tag?


No need. This is enough:

Fixes: d831c5fd8682 ("aspeed/intc: Add AST2700 support")



Thanks,

C.





Patchwork link:
https://patchwork.kernel.org/project/qemu-devel/list/?series=943379

Thanks,
Steven


Reviewed-by: Cédric Le Goater 

Thanks,

C.



   hw/intc/aspeed_intc.c | 2 +-
   1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/intc/aspeed_intc.c b/hw/intc/aspeed_intc.c index
3fd417084f..f17bf43925 100644
--- a/hw/intc/aspeed_intc.c
+++ b/hw/intc/aspeed_intc.c
@@ -111,7 +111,7 @@ static void

aspeed_intc_set_irq_handler(AspeedINTCState *s,

   outpin_idx = intc_irq->outpin_idx;
   inpin_idx = intc_irq->inpin_idx;

-if (s->mask[inpin_idx] || s->regs[status_reg]) {
+if ((s->mask[inpin_idx] & select) || (s->regs[status_reg] &
+ select)) {
   /*
* a. mask is not 0 means in ISR mode
* sources interrupt routine are executing.







[PULL 03/12] pnv/xive: Fix possible undefined shift error in group size calculation

2025-03-20 Thread Nicholas Piggin
Coverity discovered a potential shift overflow in group size calculation
in the case of a guest error. Add checks and logs to ensure a issues are
caught.

Make the group and crowd error checking code more similar to one another
while here.

Resolves: Coverity CID 1593724
Fixes: 9cb7f6ebed60 ("ppc/xive2: Support group-matching when looking for 
target")
Reviewed-by: Cédric Le Goater 
Signed-off-by: Nicholas Piggin 
---
 hw/intc/xive.c  | 27 ---
 hw/intc/xive2.c | 19 ++-
 2 files changed, 38 insertions(+), 8 deletions(-)

diff --git a/hw/intc/xive.c b/hw/intc/xive.c
index e86f274932..3eb28c2265 100644
--- a/hw/intc/xive.c
+++ b/hw/intc/xive.c
@@ -1662,12 +1662,20 @@ uint32_t xive_get_vpgroup_size(uint32_t nvp_index)
  * (starting with the least significant bits) in the NVP index
  * gives the size of the group.
  */
-return 1 << (ctz32(~nvp_index) + 1);
+int first_zero = cto32(nvp_index);
+if (first_zero >= 31) {
+qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid group index 0x%08x",
+   nvp_index);
+return 0;
+}
+
+return 1U << (first_zero + 1);
 }
 
 static uint8_t xive_get_group_level(bool crowd, bool ignore,
 uint32_t nvp_blk, uint32_t nvp_index)
 {
+int first_zero;
 uint8_t level;
 
 if (!ignore) {
@@ -1675,12 +1683,25 @@ static uint8_t xive_get_group_level(bool crowd, bool 
ignore,
 return 0;
 }
 
-level = (ctz32(~nvp_index) + 1) & 0b;
+first_zero = cto32(nvp_index);
+if (first_zero >= 31) {
+qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid group index 0x%08x",
+   nvp_index);
+return 0;
+}
+
+level = (first_zero + 1) & 0b;
 if (crowd) {
 uint32_t blk;
 
 /* crowd level is bit position of first 0 from the right in nvp_blk */
-blk = ctz32(~nvp_blk) + 1;
+first_zero = cto32(nvp_blk);
+if (first_zero >= 31) {
+qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid crowd block 0x%08x",
+   nvp_blk);
+return 0;
+}
+blk = first_zero + 1;
 
 /*
  * Supported crowd sizes are 2^1, 2^2, and 2^4. 2^3 is not supported.
diff --git a/hw/intc/xive2.c b/hw/intc/xive2.c
index f8ef615487..311b42e15d 100644
--- a/hw/intc/xive2.c
+++ b/hw/intc/xive2.c
@@ -1153,13 +1153,15 @@ static bool xive2_vp_match_mask(uint32_t cam1, uint32_t 
cam2,
 
 static uint8_t xive2_get_vp_block_mask(uint32_t nvt_blk, bool crowd)
 {
-uint8_t size, block_mask = 0b;
+uint8_t block_mask = 0b;
 
 /* 3 supported crowd sizes: 2, 4, 16 */
 if (crowd) {
-size = xive_get_vpgroup_size(nvt_blk);
-if (size == 8) {
-qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid crowd size of 8n");
+uint32_t size = xive_get_vpgroup_size(nvt_blk);
+
+if (size != 2 && size != 4 && size != 16) {
+qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid crowd size of %d",
+   size);
 return block_mask;
 }
 block_mask &= ~(size - 1);
@@ -1172,7 +1174,14 @@ static uint32_t xive2_get_vp_index_mask(uint32_t 
nvt_index, bool cam_ignore)
 uint32_t index_mask = 0xFF; /* 24 bits */
 
 if (cam_ignore) {
-index_mask &= ~(xive_get_vpgroup_size(nvt_index) - 1);
+uint32_t size = xive_get_vpgroup_size(nvt_index);
+
+if (size < 2) {
+qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid group size of %d",
+   size);
+return index_mask;
+}
+index_mask &= ~(size - 1);
 }
 return index_mask;
 }
-- 
2.47.1




Re: [PATCH for-10.1 v9 0/9] virtio-net: add support for SR-IOV emulation

2025-03-20 Thread Yui Washizu



I tested the following features with this patch series, and there were 
not  issues:

- Creation and deletion of VFs
- Communication with an external machine through VFs

Thank you.


Yui


On 2025/03/14 15:14, Akihiko Odaki wrote:

Based-on:<20250104-reuse-v18-0-c349eafd8...@daynix.com>
("[PATCH v18 00/14] hw/pci: SR-IOV related fixes and improvements")

Introduction


This series is based on the RFC series submitted by Yui Washizu[1].
See also [2] for the context.

This series enables SR-IOV emulation for virtio-net. It is useful
to test SR-IOV support on the guest, or to expose several vDPA devices
in a VM. vDPA devices can also provide L2 switching feature for
offloading though it is out of scope to allow the guest to configure
such a feature.

The PF side code resides in virtio-pci. The VF side code resides in
the PCI common infrastructure, but it is restricted to work only for
virtio-net-pci because of lack of validation.

User Interface
--

A user can configure a SR-IOV capable virtio-net device by adding
virtio-net-pci functions to a bus. Below is a command line example:
   -netdev user,id=n -netdev user,id=o
   -netdev user,id=p -netdev user,id=q
   -device pcie-root-port,id=b
   -device virtio-net-pci,bus=b,addr=0x0.0x3,netdev=q,sriov-pf=f
   -device virtio-net-pci,bus=b,addr=0x0.0x2,netdev=p,sriov-pf=f
   -device virtio-net-pci,bus=b,addr=0x0.0x1,netdev=o,sriov-pf=f
   -device virtio-net-pci,bus=b,addr=0x0.0x0,netdev=n,id=f

The VFs specify the paired PF with "sriov-pf" property. The PF must be
added after all VFs. It is user's responsibility to ensure that VFs have
function numbers larger than one of the PF, and the function numbers
have a consistent stride.

Keeping VF instances


A problem with SR-IOV emulation is that it needs to hotplug the VFs as
the guest requests. Previously, this behavior was implemented by
realizing and unrealizing VFs at runtime. However, this strategy does
not work well for the proposed virtio-net emulation; in this proposal,
device options passed in the command line must be maintained as VFs
are hotplugged, but they are consumed when the machine starts and not
available after that, which makes realizing VFs at runtime impossible.

As an strategy alternative to runtime realization/unrealization, this
series proposes to reuse the code to power down PCI Express devices.
When a PCI Express device is powered down, it will be hidden from the
guest but will be kept realized. This effectively implements the
behavior we need for the SR-IOV emulation.

Summary
---

Patch 1 disables ROM BAR, which virtio-net-pci enables by default, for
VFs.
Patch 2 makes zero stride valid for 1 VF configuration.
Patch 3 and 4 adds validations.
Patch 5 adds user-created SR-IOV VF infrastructure.
Patch 6 makes virtio-pci work as SR-IOV PF for user-created VFs.
Patch 7 allows user to create SR-IOV VFs with virtio-net-pci.

[1]https://patchew.org/QEMU/1689731808-3009-1-git-send-email-yui.wash...@gmail.com/
[2]https://lore.kernel.org/all/5d46f455-f530-4e5e-9ae7-13a2297d4...@daynix.com/

Co-developed-by: Yui Washizu
Signed-off-by: Akihiko Odaki
---
Changes in v9:
- Rebased.
- Link to 
v8:https://lore.kernel.org/r/20250104-sriov-v8-0-56144cfdc...@daynix.com

Changes in v8:
- Rebased.
- Link to 
v7:https://lore.kernel.org/r/20240813-sriov-v7-0-8515e3774...@daynix.com

Changes in v7:
- Removed #include , which is no longer needed.
- Rebased.
- Link to 
v6:https://lore.kernel.org/r/20240802-sriov-v6-0-0c8ff49c4...@daynix.com

Changes in v6:
- Added ARI extended capability.
- Rebased.
- Link to 
v5:https://lore.kernel.org/r/20240715-sriov-v5-0-3f5539093...@daynix.com

Changes in v5:
- Dropped the RFC tag.
- Fixed device unrealization.
- Rebased.
- Link to 
v4:https://lore.kernel.org/r/20240428-sriov-v4-0-ac8ac6212...@daynix.com

Changes in v4:
- Added patch "hw/pci: Fix SR-IOV VF number calculation" to fix division
   by zero reported by Yui Washizu.
- Rebased.
- Link to 
v3:https://lore.kernel.org/r/20240305-sriov-v3-0-abdb75770...@daynix.com

Changes in v3:
- Rebased.
- Link to 
v2:https://lore.kernel.org/r/20231210-sriov-v2-0-b959e8a6d...@daynix.com

Changes in v2:
- Changed to keep VF instances.
- Link to 
v1:https://lore.kernel.org/r/20231202-sriov-v1-0-32b3570f7...@daynix.com

---
Akihiko Odaki (9):
   hw/pci: Do not add ROM BAR for SR-IOV VF
   hw/pci: Fix SR-IOV VF number calculation
   pcie_sriov: Ensure PF and VF are mutually exclusive
   pcie_sriov: Check PCI Express for SR-IOV PF
   pcie_sriov: Allow user to create SR-IOV device
   virtio-pci: Implement SR-IOV PF
   virtio-net: Implement SR-IOV VF
   docs: Document composable SR-IOV device
   pcie_sriov: Make a PCI device with user-created VF ARI-capable

  MAINTAINERS|   1 +
  docs/system/index.rst  |   1 +
  docs/system/sriov.rst  |  37 ++
  include/hw/pci/pci_device.h|   6 +-
  include/hw/pci/pcie_sriov.h| 

[PULL 08/12] ppc/amigaone: Check blk_pwrite return value

2025-03-20 Thread Nicholas Piggin
From: BALATON Zoltan 

Coverity reported that return value of blk_pwrite() maybe should not
be ignored. We can't do much if this happens other than report an
error but let's do that to silence this report.

Resolves: Coverity CID 1593725
Signed-off-by: BALATON Zoltan 
Reviewed-by: Nicholas Piggin 
Message-ID: <20250314200140.2dbe74e6...@zero.eik.bme.hu>
Signed-off-by: Nicholas Piggin 
---
 hw/ppc/amigaone.c | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/hw/ppc/amigaone.c b/hw/ppc/amigaone.c
index 483512125f..5d787c3059 100644
--- a/hw/ppc/amigaone.c
+++ b/hw/ppc/amigaone.c
@@ -108,8 +108,8 @@ static void nvram_write(void *opaque, hwaddr addr, uint64_t 
val,
 uint8_t *p = memory_region_get_ram_ptr(&s->mr);
 
 p[addr] = val;
-if (s->blk) {
-blk_pwrite(s->blk, addr, 1, &val, 0);
+if (s->blk && blk_pwrite(s->blk, addr, 1, &val, 0) < 0) {
+error_report("%s: could not write %s", __func__, blk_name(s->blk));
 }
 }
 
@@ -151,15 +151,17 @@ static void nvram_realize(DeviceState *dev, Error **errp)
 *c = cpu_to_be32(CRC32_DEFAULT_ENV);
 /* Also copies terminating \0 as env is terminated by \0\0 */
 memcpy(p + 4, default_env, sizeof(default_env));
-if (s->blk) {
-blk_pwrite(s->blk, 0, sizeof(crc) + sizeof(default_env), p, 0);
+if (s->blk &&
+blk_pwrite(s->blk, 0, sizeof(crc) + sizeof(default_env), p, 0) < 0
+   ) {
+error_report("%s: could not write %s", __func__, blk_name(s->blk));
 }
 return;
 }
 if (*c == 0) {
 *c = cpu_to_be32(crc32(0, p + 4, NVRAM_SIZE - 4));
-if (s->blk) {
-blk_pwrite(s->blk, 0, 4, p, 0);
+if (s->blk && blk_pwrite(s->blk, 0, 4, p, 0) < 0) {
+error_report("%s: could not write %s", __func__, blk_name(s->blk));
 }
 }
 if (be32_to_cpu(*c) != crc) {
-- 
2.47.1




[PATCH v2] rust: assertions: add static_assert

2025-03-20 Thread Paolo Bonzini
Add a new assertion that is similar to "const { assert!(...) }" but can be used
outside functions and with older versions of Rust.  A similar macro is found in
Linux, whereas the "static_assertions" crate has a const_assert macro that
produces worse error messages.

Suggested-by: Peter Maydell 
Supersedes: <20250320113356.799412-1-pbonz...@redhat.com>
Signed-off-by: Paolo Bonzini 
---
 rust/qemu-api/src/assertions.rs | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/rust/qemu-api/src/assertions.rs b/rust/qemu-api/src/assertions.rs
index 104dec39774..bba38cfda11 100644
--- a/rust/qemu-api/src/assertions.rs
+++ b/rust/qemu-api/src/assertions.rs
@@ -120,3 +120,25 @@ macro_rules! assert_match {
 );
 };
 }
+
+/// Assert at compile time that an expression is true.  This is similar
+/// to `const { assert!(...); }` but it works outside functions, as well as
+/// on versions of Rust before 1.79.
+///
+/// # Examples
+///
+/// ```
+/// # use qemu_api::static_assert;
+/// static_assert!("abc".len() == 3);
+/// ```
+///
+/// ```compile_fail
+/// # use qemu_api::static_assert;
+/// static_assert!("abc".len() == 2); // does not compile
+/// ```
+#[macro_export]
+macro_rules! static_assert {
+($x:expr) => {
+const _: () = assert!($x);
+};
+}
-- 
2.48.1




Re: [PATCH for-10.1 30/32] vfio: Rename VFIO dirty tracking services

2025-03-20 Thread Avihai Horon



On 20/03/2025 13:56, Joao Martins wrote:

External email: Use caution opening links or attachments


On 20/03/2025 11:45, Avihai Horon wrote:

On 20/03/2025 13:18, Joao Martins wrote:

External email: Use caution opening links or attachments


On 20/03/2025 11:13, Avihai Horon wrote:

On 19/03/2025 14:21, Joao Martins wrote:

External email: Use caution opening links or attachments


On 18/03/2025 09:54, Cédric Le Goater wrote:

Rename these routines :

 vfio_devices_all_device_dirty_tracking_started ->
vfio_dirty_tracking_devices_is_started_all
 vfio_devices_all_dirty_tracking_started->
vfio_dirty_tracking_devices_is_started
 vfio_devices_all_device_dirty_tracking ->
vfio_dirty_tracking_devices_is_supported
 vfio_devices_dma_logging_start ->
vfio_dirty_tracking_devices_dma_logging_start
 vfio_devices_dma_logging_stop  ->
vfio_dirty_tracking_devices_dma_logging_stop
 vfio_devices_query_dirty_bitmap->
vfio_dirty_tracking_devices_query_dirty_bitmap
 vfio_get_dirty_bitmap  ->
vfio_dirty_tracking_query_dirty_bitmap

to better reflect the namespace they belong to.

Signed-off-by: Cédric Le Goater 

The change itself is fine.

But on the other hand, it looks relatively long names, no? I am bit at two
minds
(as I generally prefer shorter code), but I can't find any alternatives if you
really wanna have one namespaces associated with the subsystem:file as a C
namespace.

Every once and a while me and Avihai use the acronym DPT (Dirty Page Tracking)
when talking about this stuff, but it seems a detour from the code style to
abbreviate namespaces into acronyms.

Having said that:

   Reviewed-by: Joao Martins 

P.S. We could also remove 'devices' as the prefix for VF dirty tracking after
namespace, and thus drop 'dma logging'. That should put 'start/stop' a little
shorter.

This file is not related only to dirty tracking, but to memory in general.
Maybe a better naming would be memory.{c,h}?
Then we can have vfio_memory_* or vfio_mem_* prefix and rename to the below:>
vfio_devices_all_device_dirty_tracking_started -> vfio_mem_device_dpt_is_started
vfio_devices_all_dirty_tracking_started-> vfio_mem_dpt_is_started
vfio_devices_all_device_dirty_tracking ->
vfio_mem_device_dpt_is_supported
vfio_devices_dma_logging_start -> vfio_mem_device_dpt_start
vfio_devices_dma_logging_stop  -> vfio_mem_device_dpt_stop
vfio_devices_query_dirty_bitmap-> vfio_mem_device_dpt_query
vfio_get_dirty_bitmap  -> vfio_mem_dpt_query

dpt can be changed to dirty_tracking if that's clearer and not too long.
In patch #31 we can rename to vfio_mem_{register,unregister} or
vfio_mem_listener_{register,unregister}.
More internal functions can be gradually renamed and added the vfio_mem_*
prefix.

Will that work?


I would associate to memory if we were talking about Host windows, DMA mapping
and etc. I believe that's more fundamentally related to memory handling of VFIO
to justify said prefix.

Here the code Cedric moved is really about dirty page tracking, or tracking
changes made by VFs to memory. Calling it memory.c would be a bit of a misnomer
   IMHO :(

Hmm, yes, the majority of the code is related to dirty tracking, but maybe we
can view dirty tracking as a sub-field of memory.
Dirty tracking doesn't seem the perfect fit IMHO, as this file also
contains vfio_dirty_tracking_register and .region_add/.region_del which are not
entirely related to dirty tracking.


Ah yes, it's a small portion but still region_{add,del} is indeed about DMA
mapping and not at all related to dirty tracking.

It's almost as if we should be moving ::region_add/region_del alongside
vfio_dirty_tracking_{un,}register into a memory.c file and leave this one as
dirty_tracking.c / dpt.c


Yes, this could also work.
Need to consider if it's worth the additional split churn. I have no 
strong opinion.




Which reminds me that perhaps vfio_dirty_tracking_register() and the name might
be misleading and should instead me vfio_memory_register() /
vfio_memory_unregister().


Indeed.




Thanks.


---
hw/vfio/dirty-tracking.h |  6 +++---
hw/vfio/container.c  |  6 +++---
hw/vfio/dirty-tracking.c | 44 
hw/vfio/trace-events |  2 +-
4 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/hw/vfio/dirty-tracking.h b/hw/vfio/dirty-tracking.h
index
322af30b0d5370600719594d4aed4c407f7d2295..db9494202a780108ce78b642950bfed78ba3f253
 100644
--- a/hw/vfio/dirty-tracking.h
+++ b/hw/vfio/dirty-tracking.h
@@ -11,9 +11,9 @@

extern const MemoryListener vfio_memory_listener;

-bool vfio_devices_all_dirty_tracking_started(const VFIOContainerBase
*bcontainer);
-bool vfio_devices_all_device_dirty_tracking(const VFIOContainerBase
*bcontainer);
-int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint6

[PATCH] target/ppc: Fix e200 duplicate SPRs

2025-03-20 Thread Nicholas Piggin
DSRR0/1 registers are in the BookE ISA not e200 specific, so
remove the duplicate e200 register definitions.

Cc: Author: Roman Kapl 
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2768
Fixes: 0e3bf4890906 ("ppc: add DBCR based debugging")
Signed-off-by: Nicholas Piggin 
---
 target/ppc/cpu_init.c | 8 
 1 file changed, 8 deletions(-)

diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index 8b590e7f17c..7decc09aec8 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -2744,14 +2744,6 @@ static void init_proc_e200(CPUPPCState *env)
  SPR_NOACCESS, SPR_NOACCESS,
  &spr_read_generic, &spr_write_generic,
  0x); /* TOFIX */
-spr_register(env, SPR_BOOKE_DSRR0, "DSRR0",
- SPR_NOACCESS, SPR_NOACCESS,
- &spr_read_generic, &spr_write_generic,
- 0x);
-spr_register(env, SPR_BOOKE_DSRR1, "DSRR1",
- SPR_NOACCESS, SPR_NOACCESS,
- &spr_read_generic, &spr_write_generic,
- 0x);
 
 init_tlbs_emb(env);
 init_excp_e200(env, 0xUL);
-- 
2.47.1




Re: [PATCH for-10.1 31/32] vfio: Introduce vfio_dirty_tracking_un/register() routines

2025-03-20 Thread Joao Martins
On 19/03/2025 13:24, Joao Martins wrote:
> On 18/03/2025 09:54, Cédric Le Goater wrote:
>> This hides the MemoryListener implementation and makes the code common
>> to both IOMMU backends, legacy and IOMMUFD.
>>
>> Signed-off-by: Cédric Le Goater 
> 
> Reviewed-by: Joao Martins 
> 

After discussing with Avihai, maybe the routine should be representative on what
it does and not so much on the namespace i.e.

vfio_dirty_tracking_register -> vfio_memory_register
vfio_dirty_tracking_unregister -> vfio_memory_unregister

Given that these have nothing to do with dirty tracking.

In terms of memory listeners the only function relevant in this area is:

vfio_dirty_tracking_init
vfio_dirty_tracking_update

Which have different purpose where we parse the memory ranges to construct 32/64
bit ranges for VF trackers.

>> ---
>>  hw/vfio/dirty-tracking.h |  4 ++--
>>  hw/vfio/container.c  | 11 +++
>>  hw/vfio/dirty-tracking.c | 21 -
>>  hw/vfio/iommufd.c|  9 ++---
>>  4 files changed, 27 insertions(+), 18 deletions(-)
>>
>> diff --git a/hw/vfio/dirty-tracking.h b/hw/vfio/dirty-tracking.h
>> index 
>> db9494202a780108ce78b642950bfed78ba3f253..6d717f0e918e47e341114c82ffc2cf520fc7b079
>>  100644
>> --- a/hw/vfio/dirty-tracking.h
>> +++ b/hw/vfio/dirty-tracking.h
>> @@ -9,11 +9,11 @@
>>  #ifndef HW_VFIO_DIRTY_TRACKING_H
>>  #define HW_VFIO_DIRTY_TRACKING_H
>>  
>> -extern const MemoryListener vfio_memory_listener;
>> -
>>  bool vfio_dirty_tracking_devices_is_started(const VFIOContainerBase 
>> *bcontainer);
>>  bool vfio_dirty_tracking_devices_is_supported(const VFIOContainerBase 
>> *bcontainer);
>>  int vfio_dirty_tracking_query_dirty_bitmap(const VFIOContainerBase 
>> *bcontainer, uint64_t iova,
>>uint64_t size, ram_addr_t ram_addr, Error **errp);
>> +bool vfio_dirty_tracking_register(VFIOContainerBase *bcontainer, Error 
>> **errp);
>> +void vfio_dirty_tracking_unregister(VFIOContainerBase *bcontainer);
>>  
>>  #endif /* HW_VFIO_DIRTY_TRACKING_H */
>> diff --git a/hw/vfio/container.c b/hw/vfio/container.c
>> index 
>> 7b3ec798a77052b8cb0b47d3dceaca1428cb50bd..1fcca75caba19353ad3063ae97b20c15f61564e9
>>  100644
>> --- a/hw/vfio/container.c
>> +++ b/hw/vfio/container.c
>> @@ -616,12 +616,7 @@ static bool vfio_container_connect(VFIOGroup *group, 
>> AddressSpace *as,
>>  group->container = container;
>>  QLIST_INSERT_HEAD(&container->group_list, group, container_next);
>>  
>> -bcontainer->listener = vfio_memory_listener;
>> -memory_listener_register(&bcontainer->listener, bcontainer->space->as);
>> -
>> -if (bcontainer->error) {
>> -error_propagate_prepend(errp, bcontainer->error,
>> -"memory listener initialization failed: ");
>> +if (!vfio_dirty_tracking_register(bcontainer, errp)) {
>>  goto listener_release_exit;
>>  }
>>  
>> @@ -631,7 +626,7 @@ static bool vfio_container_connect(VFIOGroup *group, 
>> AddressSpace *as,
>>  listener_release_exit:
>>  QLIST_REMOVE(group, container_next);
>>  vfio_group_del_kvm_device(group);
>> -memory_listener_unregister(&bcontainer->listener);
>> +vfio_dirty_tracking_unregister(bcontainer);
>>  if (vioc->release) {
>>  vioc->release(bcontainer);
>>  }
>> @@ -669,7 +664,7 @@ static void vfio_container_disconnect(VFIOGroup *group)
>>   * group.
>>   */
>>  if (QLIST_EMPTY(&container->group_list)) {
>> -memory_listener_unregister(&bcontainer->listener);
>> +vfio_dirty_tracking_unregister(bcontainer);
>>  if (vioc->release) {
>>  vioc->release(bcontainer);
>>  }
>> diff --git a/hw/vfio/dirty-tracking.c b/hw/vfio/dirty-tracking.c
>> index 
>> 8e47ccbb9aea748e57271508ddcd10e394abf16c..d7827f7b64adf3e2b41fafd59aab71e0b28c1567
>>  100644
>> --- a/hw/vfio/dirty-tracking.c
>> +++ b/hw/vfio/dirty-tracking.c
>> @@ -1267,7 +1267,7 @@ static void vfio_listener_log_sync(MemoryListener 
>> *listener,
>>  }
>>  }
>>  
>> -const MemoryListener vfio_memory_listener = {
>> +static const MemoryListener vfio_memory_listener = {
>>  .name = "vfio",
>>  .region_add = vfio_listener_region_add,
>>  .region_del = vfio_listener_region_del,
>> @@ -1275,3 +1275,22 @@ const MemoryListener vfio_memory_listener = {
>>  .log_global_stop = vfio_listener_log_global_stop,
>>  .log_sync = vfio_listener_log_sync,
>>  };
>> +
>> +bool vfio_dirty_tracking_register(VFIOContainerBase *bcontainer, Error 
>> **errp)
>> +{
>> +bcontainer->listener = vfio_memory_listener;
>> +memory_listener_register(&bcontainer->listener, bcontainer->space->as);
>> +
>> +if (bcontainer->error) {
>> +error_propagate_prepend(errp, bcontainer->error,
>> +"memory listener initialization failed: ");
>> +return false;
>> +}
>> +
>> +return true;
>> +}
>> +
>> +void vfio_dirty_tracking_unregister(VFIOContainerBase *b

Re: [PATCH 3/3] rust: pl011: Check size of state struct at compile time

2025-03-20 Thread Zhao Liu
> -use std::{ffi::CStr, ptr::addr_of_mut};
> +use std::{ffi::CStr, mem, ptr::addr_of_mut};

maybe mem::size_of (since there're 2 use cases :-))? 

>  
>  use qemu_api::{
> +bindings,
>  chardev::{CharBackend, Chardev, Event},
> +static_assert,

This one looks like it breaks the alphabetical ordering (this nit can be
checked and fixed by "cargo +nightly fmt" under rust directory, which is
like checkpatch.pl).

>  impl_vmstate_forward,
>  irq::{IRQState, InterruptSource},
>  memory::{hwaddr, MemoryRegion, MemoryRegionOps, MemoryRegionOpsBuilder},
> @@ -124,6 +126,12 @@ pub struct PL011State {
>  pub migrate_clock: bool,
>  }
>  
> +// Some C users of this device embed its state struct into their own
> +// structs, so the size of the Rust version must not be any larger
> +// than the size of the C one. If this assert triggers you need to
> +// expand the padding_for_rust[] array in the C PL011State struct.
> +static_assert!(mem::size_of::() <= 
> mem::size_of::());
> +

maybe use qemu_api::bindings::PL011State directly? Because bindings
contains native C structures/functions and their use should not be
encouraged, I think it's better to 'hide' bindings (not list it at the
beginning of the file).

Otherwise,

Reviewed-by: Zhao Liu 




Re: [PATCH 1/1] hw/intc/aspeed: Fix IRQ handler mask check

2025-03-20 Thread Cédric Le Goater

Hello Steven,

On 3/20/25 10:25, Steven Lee wrote:

Updated the IRQ handler mask check to AND with select variable.
This ensures that the interrupt service routine is correctly triggered
for the interrupts within the same irq group.

For example, both `eth0` and the debug UART are handled in `GICINT132`.
Without this fix, the debug console may hang if the `eth0` ISR is not
handled.

Signed-off-by: Steven Lee 
Change-Id: Ic3609eb72218dfd68be6057d78b8953b18828709
---


I think the issue was introduced by the initial commit :

  d831c5fd8682 ("aspeed/intc: Add AST2700 support")

Is that correct ?

Reviewed-by: Cédric Le Goater 

Thanks,

C.



  hw/intc/aspeed_intc.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/intc/aspeed_intc.c b/hw/intc/aspeed_intc.c
index 3fd417084f..f17bf43925 100644
--- a/hw/intc/aspeed_intc.c
+++ b/hw/intc/aspeed_intc.c
@@ -111,7 +111,7 @@ static void aspeed_intc_set_irq_handler(AspeedINTCState *s,
  outpin_idx = intc_irq->outpin_idx;
  inpin_idx = intc_irq->inpin_idx;
  
-if (s->mask[inpin_idx] || s->regs[status_reg]) {

+if ((s->mask[inpin_idx] & select) || (s->regs[status_reg] & select)) {
  /*
   * a. mask is not 0 means in ISR mode
   * sources interrupt routine are executing.





Re: [PATCH 28/38] target/hexagon: Initialize htid, modectl regs

2025-03-20 Thread Richard Henderson

On 3/19/25 09:08, Sid Manning wrote:




-Original Message-
From: Richard Henderson 
Sent: Thursday, March 13, 2025 2:07 PM
To: ltaylorsimp...@gmail.com; 'Philippe Mathieu-Daudé'
; 'Brian Cain' ; qemu-
de...@nongnu.org
Cc: Matheus Bernardino (QUIC) ;
a...@rev.ng; a...@rev.ng; Marco Liebel (QUIC)
; alex.ben...@linaro.org; Mark Burton (QUIC)
; Sid Manning ; Brian
Cain 
Subject: Re: [PATCH 28/38] target/hexagon: Initialize htid, modectl regs

WARNING: This email originated from outside of Qualcomm. Please be wary
of any links or attachments, and do not enable macros.

On 3/13/25 11:47, ltaylorsimp...@gmail.com wrote:

What we are trying to model is an instance of a Hexagon that has a number

of threads and some resources that are shared.  The shared resources
include the TLB and global S registers.  The initial thought was to tie the
shared resources to the thread with cpu_index == 0.  If we were to model a
Qualcomm SoC, there would be multiple ARM cores and multiple Hexagon
instances.  Each Hexagon instance would have distinct shared resources.  So,
you are correct that using cpu_index is not going to scale.


What is the recommended way to model this?  I see a "nr_threads" field in

CPUCore but no clear way to find the threads.  PPC has some cores that add a
"threads" field.  Should we follow this approach?

I recommend that the shared resources be modeled as a separate Object,
which is linked via object_property_add_link to all of the cpus that use it.


[Sid Manning]
Hi Richard,
An example of shared resources would be the system registers.  They are broken 
down into 2 regions.  Each thread has its own copy of system registers 0-15 
while registers 16-63 are global.  Right now CPUHexagonState contains a 
pointer, g_sreg which points back to cpu[0]'s state thus keeping one copy of 
the global registers, accesses are done with BQL held to avoid race conditions.

Your suggestion is to create a new object to represent the set of global system 
registers, I tried this:

#define TYPE_HEXAGON_G_SREG "hexagon.global_sreg"
OBJECT_DECLARE_SIMPLE_TYPE(HexagonGlobalSREGState, HEXAGON_G_SREG)
struct HexagonGlobalSREGState {
 SysBusDevice parent_obj;


SysBusDevice is more than you need -- Object is sufficient here.


 uint32_t regs[64];
};

In our virtual machine init:
vms->g_sreg = HEXAGON_G_SREG(qdev_new(TYPE_HEXAGON_G_SREG));

and
  object_property_set_link(OBJECT(cpu), "global-sreg", OBJECT(vms->g_sreg), 
&error_abort);

to attach the global regs to the cpu, but the above doesn't update cpu elements the same way calls to qdev_prop_set_uint32 will do, object_property_set_link doesn’t error out and returns true. 


Did you add the DEFINE_PROP_LINK to match?  I'd expect something like

DEFINE_PROP_LINK("global-sreg", HexagonCPU, g_sreg,
 TYPE_HEXAGON_G_SREG, HexagonGlobalSREGState *),

Beyond that, I guess I'd have to see an actual patch to work out what's wrong.


r~



Re: [PATCH v2 06/42] include/exec: Split out cpu-mmu-index.h

2025-03-20 Thread Pierrick Bouvier

On 3/20/25 07:58, Richard Henderson wrote:

On 3/19/25 10:16, Pierrick Bouvier wrote:

If I understand correctly, this is simply an (arbitrary) choice related to each 
target
architecture implemented in QEMU, and it does not match any property of a 
*real* cpu. Is
that correct?


Correct.


In other words, it could have been implemented in a way that MMU_USER_IDX is 
the same for
all arch, but it hasn't been done this way. Is that correct?

I'm not looking for modifying anything related to this, just want to make sure 
I get it
right.


With minor effort, perhaps.

Take loongarch64 for example.  The system register field CRMD.PVL contains the 
priv level,
where 0 is most priv and 3 is least.  Simply extracting the field is the easiest
implementation, so MMU_USER_IDX == 3.

If there were some requirement that MMU_USER_IDX be 0, then obviously we could 
rearrange,
but so far there is not.



Thanks Richard, that's all clear for me now.



r~





Re: [PATCH] 9pfs: fix 'total_open_fd' decrementation

2025-03-20 Thread Christian Schoenebeck
On Wednesday, March 19, 2025 7:52:51 PM CET Greg Kurz wrote:
> On Wed, 19 Mar 2025 13:14:27 +0100
> Christian Schoenebeck  wrote:
> 
> > On Wednesday, March 19, 2025 11:08:58 AM CET Christian Schoenebeck wrote:
> > > According to 'man 2 close' errors returned by close() should only be used
> > > for either diagnostic purposes or for catching data loss due to a previous
> > > write error, as an error result of close() usually indicates a deferred
> > > error of a previous write operation.
> > > 
> > > Therefore not decrementing 'total_open_fd' on a close() error is wrong
> > > and would yield in a higher open file descriptor count than actually the
> > > case, leading to 9p server reclaiming open file descriptors too soon.
> > > 
> > > Based-on: <20250312152933.383967-7-gr...@kaod.org>
> > > Signed-off-by: Christian Schoenebeck 
> > > ---
> > >  hw/9pfs/9p.c | 14 --
> > >  hw/9pfs/codir.c  |  3 ++-
> > >  hw/9pfs/cofile.c |  3 ++-
> > >  3 files changed, 12 insertions(+), 8 deletions(-)
[...]
> > > diff --git a/hw/9pfs/codir.c b/hw/9pfs/codir.c
> > > index 2068a4779d..f1fd97c8a7 100644
> > > --- a/hw/9pfs/codir.c
> > > +++ b/hw/9pfs/codir.c
> > > @@ -353,7 +353,8 @@ int coroutine_fn v9fs_co_closedir(V9fsPDU *pdu, 
> > > V9fsFidOpenState *fs)
> > >  err = -errno;
> > >  }
> > >  });
> > > -if (!err) {
> > > +/* 'man 2 close' suggests to ignore close() errors except of EBADF */
> > > +if (!err || errno != EBADF) {
> > >  total_open_fd--;
> > >  }
> > >  return err;
> > 
> > Or, as EBADF is somewhat unexpected here (assuming v9fs_co_closedir() was
> > called by checking for a valid file handle), maybe it would make sense to 
> > log
> > this?
> > 
> 
> Getting EBADF could be the result of some unrelated code that closed
> the fd from another thread or the 9p code using some stale fid structure
> or some other serious bug. I'd personally g_assert().

Wouldn't that be too harsh? Killing QEMU should be last resort if continuing
to run resulted in a security threat or undefined behaviour. I'm not sure that
would apply here.

> > if (unlikely(err && errno == EBADF)) {
> > error_report("v9fs_co_closedir() failed with EBADF");  
> > } else {
> > total_open_fd--;
> > }
> > 
> > In the sense, if EBADF happens here, it's an indication for a bug in 9p
> > server.





Re: [PATCH v3 01/14] arm/cpu: Add sysreg definitions in cpu-sysregs.h

2025-03-20 Thread Sebastian Ott

On Tue, 11 Mar 2025, Cornelia Huck wrote:

+++ b/target/arm/cpu.h

[...]

+/* REG is ID_XXX */
+#define FIELD_DP64_IDREG(ISAR, REG, FIELD, VALUE)   \
+({  \
+ARMISARegisters *i_ = (ISAR);   \
+uint64_t regval = i_->idregs[REG ## _EL1_IDX];  \


This won't work for regs that don't fit this naming scheme. Up to this
point in the series that's only CTR_EL0, later with the auto generation it
will be more. But I guess this can be extended when/if there's an actual
need..

Sebastian




Re: [PATCH v3 00/14] arm: rework id register storage

2025-03-20 Thread Sebastian Ott

On Tue, 11 Mar 2025, Cornelia Huck wrote:

Yet another update of the id register series, less changes this time
around.

Changed from v2:
- changed generation of the various register defines via the "DEF"
 magic suggested by Richard
- some kvm-only code moved to kvm.c; some code potentially useful to
 non-kvm code stayed out of there (the cpu model code will make use
 of it, and that one should be extendable outside of kvm -- a
 revised version of those patches is still in the works, but I'll be
 off for a few days and rather wanted to get this one out first)

Also available at
https://gitlab.com/cohuck/qemu/-/commits/arm-rework-idreg-storage-v3



Changed from v1:
- Noticed that we missed the hvf code. Converted, compiled, but not tested
 as I'm lacking an environment for testing.
- Hopefully incorporated most of the suggested changes -- if I missed
 something, it was unintentional unless mentioned below.
 - fixed repeated inclusion of definitions
 - hopefully made macros more robust
 - removed distinction between reading 32/64 values, which was mostly
   adding churn for little value
 - postponed generating property definitions to the cpu model patches,
   where they are actually used
 - juggled hunks and moved them to the right patches
 - fixed some typos
- rebased to a more recent code base

NOT changed from v1:
- definitions are still generated from the Linux sysregs file
 - I still think updating the generated files on demand (so that we can
   double check the result) is the right thing to do
 - I'm open to changing the source of the definitions from the sysregs
   file to the JSON definitions published by Arm; however, I first wanted
   to get the code using it right -- we can switch out the code generating
   the file to use a different source easily later on, and I'd also like
   to steal parts of the script from Linux once integrated (which I think
   hasn't happened yet?)



[Note: I've kept the cc list from the last round of cpu model patches;
so if you're confused as to why you're cc:ed here, take it as a
heads-up that a new cpu model series will come along soon]

This patch series contains patches extracted from the larger cpu model
series (RFC v2 last posted at
https://lore.kernel.org/qemu-devel/20241206112213.88394-1-coh...@redhat.com/)
and aims at providing a base upon which we can continue with building
support for cpu models, but which is hopefully already an improvement
on its own.

Main changes from the patches in that series include:
- post-pone the changes to handle KVM writable ID registers for cpu models
 (I have a series including that on top of this one)
- change how we store the list of ID registers, and access them
 basically, use an enum for indexing, and an enum doing encodings in a
 pattern similar to cpregs
- move some hunks to different patches
- update the scripts to generate the register descriptions, and run
 them against a recent Linux sysregs file

What I've kept:
- generating the register descriptions from the Linux sysregs file
 I think that file is still our best bet to generate the descriptions
 easily, and updating the definitions is a manual step that can be checked
 for unintended changes
- most of the hard work that Eric had been doing; all new bugs in there
 are my own :)




Cornelia Huck (2):
 arm/kvm: add accessors for storing host features into idregs
 arm/cpu: switch to a generated cpu-sysregs.h.inc

Eric Auger (12):
 arm/cpu: Add sysreg definitions in cpu-sysregs.h
 arm/cpu: Store aa64isar0/aa64zfr0 into the idregs arrays
 arm/cpu: Store aa64isar1/2 into the idregs array
 arm/cpu: Store aa64pfr0/1 into the idregs array
 arm/cpu: Store aa64mmfr0-3 into the idregs array
 arm/cpu: Store aa64dfr0/1 into the idregs array
 arm/cpu: Store aa64smfr0 into the idregs array
 arm/cpu: Store id_isar0-7 into the idregs array
 arm/cpu: Store id_pfr0/1/2 into the idregs array
 arm/cpu: Store id_dfr0/1 into the idregs array
 arm/cpu: Store id_mmfr0-5 into the idregs array
 arm/cpu: Add sysreg generation scripts

hw/intc/armv7m_nvic.c |  27 +-
scripts/gen-cpu-sysregs-header.awk|  39 +++
scripts/update-aarch64-sysreg-code.sh |  25 ++
target/arm/cpu-features.h | 317 +-
target/arm/cpu-sysregs.h  |  41 +++
target/arm/cpu-sysregs.h.inc  | 170 ++
target/arm/cpu.c  | 111 +++
target/arm/cpu.h  |  80 +++--
target/arm/cpu64.c| 128 +++
target/arm/helper.c   |  68 ++--
target/arm/hvf/hvf.c  |  36 +-
target/arm/internals.h|   6 +-
target/arm/kvm.c  | 129 
target/arm/ptw.c  |   6 +-
target/arm/tcg/cpu-v7m.c  | 174 +-
target/arm/tcg/cpu32.c| 320 +-
target/arm/tcg/cpu64.c| 460 +-
17 files changed, 1218 insertions(+), 919 deletions(-)
create mode 100755 script

Re: [PATCH 1/2] migration: Add some documentation for multifd

2025-03-20 Thread Prasad Pandit
On Tue, 11 Mar 2025 at 00:59, Fabiano Rosas  wrote:
> Peter Xu  writes:
> > To me, this is a fairly important question to ask.  Fundamentally, the very
> > initial question is why do we need periodic flush and sync at all.  It's
> > because we want to make sure new version of pages to land later than old
> > versions.
...
> > Then v1 and v2 of the page P are ordered.
> > If without the message on the main channel:
> > Then I don't see what protects reorder of arrival of messages like:
...
> That's all fine. As long as the recv part doesn't see them out of
> order. I'll try to write some code to confirm so I don't waste too much
> of your time.

* Relying on this receive order seems like a passive solution. On one
side we are saying there is no defined 'requirement' on the network or
compute capacity/quality for migration. ie. compute and network can be
as bad as possible, yet migration shall always work reliably.

* When receiving different versions of pages, couldn't multifd_recv
check the latest version present in guest RAM and accept the incoming
version only if it is fresher than the already present one? ie. if v1
arrives later than v2 on the receive side, the receive side
could/should discard v1 because v2 is already received.

Thank you.
---
  - Prasad




Re: [PATCH] 9pfs: fix 'total_open_fd' decrementation

2025-03-20 Thread Greg Kurz
On Thu, 20 Mar 2025 10:48:11 +0100
Christian Schoenebeck  wrote:

> On Wednesday, March 19, 2025 7:52:51 PM CET Greg Kurz wrote:
> > On Wed, 19 Mar 2025 13:14:27 +0100
> > Christian Schoenebeck  wrote:
> > 
> > > On Wednesday, March 19, 2025 11:08:58 AM CET Christian Schoenebeck wrote:
> > > > According to 'man 2 close' errors returned by close() should only be 
> > > > used
> > > > for either diagnostic purposes or for catching data loss due to a 
> > > > previous
> > > > write error, as an error result of close() usually indicates a deferred
> > > > error of a previous write operation.
> > > > 
> > > > Therefore not decrementing 'total_open_fd' on a close() error is wrong
> > > > and would yield in a higher open file descriptor count than actually the
> > > > case, leading to 9p server reclaiming open file descriptors too soon.
> > > > 
> > > > Based-on: <20250312152933.383967-7-gr...@kaod.org>
> > > > Signed-off-by: Christian Schoenebeck 
> > > > ---
> > > >  hw/9pfs/9p.c | 14 --
> > > >  hw/9pfs/codir.c  |  3 ++-
> > > >  hw/9pfs/cofile.c |  3 ++-
> > > >  3 files changed, 12 insertions(+), 8 deletions(-)
> [...]
> > > > diff --git a/hw/9pfs/codir.c b/hw/9pfs/codir.c
> > > > index 2068a4779d..f1fd97c8a7 100644
> > > > --- a/hw/9pfs/codir.c
> > > > +++ b/hw/9pfs/codir.c
> > > > @@ -353,7 +353,8 @@ int coroutine_fn v9fs_co_closedir(V9fsPDU *pdu, 
> > > > V9fsFidOpenState *fs)
> > > >  err = -errno;
> > > >  }
> > > >  });
> > > > -if (!err) {
> > > > +/* 'man 2 close' suggests to ignore close() errors except of EBADF 
> > > > */
> > > > +if (!err || errno != EBADF) {
> > > >  total_open_fd--;
> > > >  }
> > > >  return err;
> > > 
> > > Or, as EBADF is somewhat unexpected here (assuming v9fs_co_closedir() was
> > > called by checking for a valid file handle), maybe it would make sense to 
> > > log
> > > this?
> > > 
> > 
> > Getting EBADF could be the result of some unrelated code that closed
> > the fd from another thread or the 9p code using some stale fid structure
> > or some other serious bug. I'd personally g_assert().
> 
> Wouldn't that be too harsh? Killing QEMU should be last resort if continuing
> to run resulted in a security threat or undefined behaviour. I'm not sure that
> would apply here.
> 

Getting EBADF on a file descriptor this code is supposed to own already
smells like undefined behavior IMHO and, hopefully, such an assert should
never trigger, but I understand your concern and it's up to you to decide :-)

> > > if (unlikely(err && errno == EBADF)) {
> > > error_report("v9fs_co_closedir() failed with EBADF");  
> > > } else {
> > > total_open_fd--;
> > > }
> > > 
> > > In the sense, if EBADF happens here, it's an indication for a bug in 9p
> > > server.
> 
> 



-- 
Greg



Re: [PATCH for-10.1 24/32] vfio: Introduce new files for dirty tracking definitions and declarations

2025-03-20 Thread Joao Martins
On 20/03/2025 09:52, Duan, Zhenzhong wrote:
>> -Original Message-
>> From: Cédric Le Goater 
>> Subject: [PATCH for-10.1 24/32] vfio: Introduce new files for dirty tracking
>> definitions and declarations
>>
>> File "common.c" has been emptied of most of its definitions by the
>> previous changes and the only definitions left are related to dirty
>> tracking. Rename it to "dirty-tracking.c" and introduce its associated
>> "dirty-tracking.h" header file for the declarations.
>>
>> Cleanup a little the includes while at it.
>>
>> Signed-off-by: Cédric Le Goater 
>> --- a/hw/vfio/common.c
>> +++ b/hw/vfio/dirty-tracking.c
>> @@ -20,14 +20,10 @@
>>
>> #include "qemu/osdep.h"
>> #include 
>> -#ifdef CONFIG_KVM
>> -#include 
>> -#endif
> 
> It looks this change unrelated to this patch?
> 
>> #include 
>>
>> #include "hw/vfio/vfio-common.h"
>> #include "hw/vfio/pci.h"
>> -#include "exec/address-spaces.h"
> 
> Same here.
> 

It's written in the commit message:

"Cleanup a little the includes while at it."



Re: [PATCH 1/2] migration: Add some documentation for multifd

2025-03-20 Thread Prasad Pandit
Hello Fabiano,

* First big thank you for starting/writing this document. It is a
great resource.

On Fri, 7 Mar 2025 at 19:13, Fabiano Rosas  wrote:
> +++ b/docs/devel/migration/multifd.rst
> @@ -0,0 +1,254 @@
> +Multifd
> +Multifd is the name given for the migration capability that enables
> +data transfer using multiple threads. Multifd supports all the
> +transport types currently in use with migration (inet, unix, vsock,
> +fd, file).

* Multifd is Multiple File Descriptors, right? Ie. Does it work with
one thread but multiple file descriptors? OR one thread per file
descriptor is always the case? I have not used/tried 'multifd +
file://' migration, but I imagined there one thread might be able to
read/write to multiple file descriptors at a time.

> +Usage
> +-
> +
> +On both source and destination, enable the ``multifd`` capability:
> +
> +``migrate_set_capability multifd on``
> +
> +Define a number of channels to use (default is 2, but 8 usually
> +provides best performance).
> +
> +``migrate_set_parameter multifd-channels 8``
> +

* I get that this is a QEMU documentation, but for users/reader's
convenience it'll help to point to libvirt:virsh migrate usage here ->
https://www.libvirt.org/manpages/virsh.html#migrate , just as an
alternative. Because doing migration via QMP commands is not as
straightforward, I wonder who might do that and why.


> +Restrictions
> +
> +
> +For migration to a file, support is conditional on the presence of the
> +mapped-ram capability, see `mapped-ram`.
> +
> +Snapshots are currently not supported.

* Maybe: Sanpshot using multiple threads (multifd) is not supported.

> +`postcopy` migration is currently not supported.

* Maybe - 'postcopy' migration using multiple threads (multifd) is not
supported. ie. 'postcopy' uses a single thread to transfer migration
data.

* Reason for these suggestions: as a writer it is easy to think
everything written in this page is to be taken with multifd context,
but readers may not do that, they may take sentences in isolation.
(just sharing thoughts)

> +Multifd consists of:
> +
> +- A client that produces the data on the migration source side and
> +  consumes it on the destination. Currently the main client code is
> +  ram.c, which selects the RAM pages for migration;

* So multifd mechanism can be used to transfer non-ram data as well? I
thought it's only used for RAM migration. Are device/gpu states etc
bits also transferred via multifd threads?

> +- A packet which is the final result of all the data aggregation
> +  and/or transformation. The packet contains: a *header* with magic and
> +  version numbers and flags that inform of special processing needed
> +  on the destination; a *payload-specific header* with metadata referent
> +  to the packet's data portion, e.g. page counts; and a variable-size
> +  *data portion* which contains the actual opaque payload data.

* It'll help to define the exact packet format here. Like they do in RFCs.

Thank you for writing this.
---
  - Prasad




Re: [PATCH] load_aout: replace bswap_needed with big_endian

2025-03-20 Thread Philippe Mathieu-Daudé

On 20/3/25 13:43, Paolo Bonzini wrote:

Targets know whether they are big-endian more than they know if
the endianness is different from the host: the former is mostly
a constant, at least in machine creation code, while the latter
has to be computed with TARGET_BIG_ENDIAN != HOST_BIG_ENDIAN or
something like that.

load_aout, however, takes a "bswap_needed" argument.  Replace
it with a "big_endian" argument; even though all users are
big-endian, it is cheap enough to keep the optional swapping
functionality even for little-endian boards.

Signed-off-by: Paolo Bonzini 
---
  include/hw/loader.h   | 2 +-
  hw/core/loader.c  | 4 ++--
  hw/ppc/mac_newworld.c | 7 +--
  hw/ppc/mac_oldworld.c | 7 +--
  hw/sparc/sun4m.c  | 9 +
  hw/sparc64/sun4u.c| 9 +
  6 files changed, 7 insertions(+), 31 deletions(-)


Reviewed-by: Philippe Mathieu-Daudé 




Re: [PATCH for-10.1 30/32] vfio: Rename VFIO dirty tracking services

2025-03-20 Thread Joao Martins
On 20/03/2025 11:13, Avihai Horon wrote:
> 
> On 19/03/2025 14:21, Joao Martins wrote:
>> External email: Use caution opening links or attachments
>>
>>
>> On 18/03/2025 09:54, Cédric Le Goater wrote:
>>> Rename these routines :
>>>
>>>    vfio_devices_all_device_dirty_tracking_started ->
>>> vfio_dirty_tracking_devices_is_started_all
>>>    vfio_devices_all_dirty_tracking_started    ->
>>> vfio_dirty_tracking_devices_is_started
>>>    vfio_devices_all_device_dirty_tracking ->
>>> vfio_dirty_tracking_devices_is_supported
>>>    vfio_devices_dma_logging_start ->
>>> vfio_dirty_tracking_devices_dma_logging_start
>>>    vfio_devices_dma_logging_stop  ->
>>> vfio_dirty_tracking_devices_dma_logging_stop
>>>    vfio_devices_query_dirty_bitmap    ->
>>> vfio_dirty_tracking_devices_query_dirty_bitmap
>>>    vfio_get_dirty_bitmap  ->
>>> vfio_dirty_tracking_query_dirty_bitmap
>>>
>>> to better reflect the namespace they belong to.
>>>
>>> Signed-off-by: Cédric Le Goater 
>> The change itself is fine.
>>
>> But on the other hand, it looks relatively long names, no? I am bit at two 
>> minds
>> (as I generally prefer shorter code), but I can't find any alternatives if 
>> you
>> really wanna have one namespaces associated with the subsystem:file as a C
>> namespace.
>>
>> Every once and a while me and Avihai use the acronym DPT (Dirty Page 
>> Tracking)
>> when talking about this stuff, but it seems a detour from the code style to
>> abbreviate namespaces into acronyms.
>>
>> Having said that:
>>
>>  Reviewed-by: Joao Martins 
>>
>> P.S. We could also remove 'devices' as the prefix for VF dirty tracking after
>> namespace, and thus drop 'dma logging'. That should put 'start/stop' a little
>> shorter.
> 
> This file is not related only to dirty tracking, but to memory in general.
> Maybe a better naming would be memory.{c,h}?
> Then we can have vfio_memory_* or vfio_mem_* prefix and rename to the below:>
> vfio_devices_all_device_dirty_tracking_started -> 
> vfio_mem_device_dpt_is_started
> vfio_devices_all_dirty_tracking_started    -> vfio_mem_dpt_is_started
> vfio_devices_all_device_dirty_tracking -> 
> vfio_mem_device_dpt_is_supported
> vfio_devices_dma_logging_start -> vfio_mem_device_dpt_start
> vfio_devices_dma_logging_stop  -> vfio_mem_device_dpt_stop
> vfio_devices_query_dirty_bitmap    -> vfio_mem_device_dpt_query
> vfio_get_dirty_bitmap  -> vfio_mem_dpt_query
> 
> dpt can be changed to dirty_tracking if that's clearer and not too long.
> In patch #31 we can rename to vfio_mem_{register,unregister} or
> vfio_mem_listener_{register,unregister}.
> More internal functions can be gradually renamed and added the vfio_mem_* 
> prefix.
> 
> Will that work?
> 

I would associate to memory if we were talking about Host windows, DMA mapping
and etc. I believe that's more fundamentally related to memory handling of VFIO
to justify said prefix.

Here the code Cedric moved is really about dirty page tracking, or tracking
changes made by VFs to memory. Calling it memory.c would be a bit of a misnomer
 IMHO :(

> Thanks.
> 
>>> ---
>>>   hw/vfio/dirty-tracking.h |  6 +++---
>>>   hw/vfio/container.c  |  6 +++---
>>>   hw/vfio/dirty-tracking.c | 44 
>>>   hw/vfio/trace-events |  2 +-
>>>   4 files changed, 29 insertions(+), 29 deletions(-)
>>>
>>> diff --git a/hw/vfio/dirty-tracking.h b/hw/vfio/dirty-tracking.h
>>> index
>>> 322af30b0d5370600719594d4aed4c407f7d2295..db9494202a780108ce78b642950bfed78ba3f253
>>>  100644
>>> --- a/hw/vfio/dirty-tracking.h
>>> +++ b/hw/vfio/dirty-tracking.h
>>> @@ -11,9 +11,9 @@
>>>
>>>   extern const MemoryListener vfio_memory_listener;
>>>
>>> -bool vfio_devices_all_dirty_tracking_started(const VFIOContainerBase
>>> *bcontainer);
>>> -bool vfio_devices_all_device_dirty_tracking(const VFIOContainerBase
>>> *bcontainer);
>>> -int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t 
>>> iova,
>>> +bool vfio_dirty_tracking_devices_is_started(const VFIOContainerBase
>>> *bcontainer);
>>> +bool vfio_dirty_tracking_devices_is_supported(const VFIOContainerBase
>>> *bcontainer);
>>> +int vfio_dirty_tracking_query_dirty_bitmap(const VFIOContainerBase
>>> *bcontainer, uint64_t iova,
>>>     uint64_t size, ram_addr_t ram_addr, Error 
>>> **errp);
>>>
>>>   #endif /* HW_VFIO_DIRTY_TRACKING_H */
>>> diff --git a/hw/vfio/container.c b/hw/vfio/container.c
>>> index
>>> 40d6c1fecbf9c37c22b8c19f8e9e8b6c5c381249..7b3ec798a77052b8cb0b47d3dceaca1428cb50bd
>>>  100644
>>> --- a/hw/vfio/container.c
>>> +++ b/hw/vfio/container.c
>>> @@ -138,8 +138,8 @@ static int vfio_legacy_dma_unmap(const VFIOContainerBase
>>> *bcontainer,
>>>   int ret;
>>>   Error *local_err = NULL;
>>>
>>> -    if (iotlb && vfio_devices_all_dirty_tracking_started(bcontainer)) {
>>> -   

[PATCH v2] 9pfs: fix 'total_open_fd' decrementation

2025-03-20 Thread Christian Schoenebeck
According to 'man 2 close' errors returned by close() should only be used
for either diagnostic purposes or for catching data loss due to a previous
write error, as an error result of close() usually indicates a deferred
error of a previous write operation.

Therefore not decrementing 'total_open_fd' on a close() error is wrong
and would yield in a higher open file descriptor count than actually the
case, leading to 9p server reclaiming open file descriptors too soon.

Based-on: <20250312152933.383967-7-gr...@kaod.org>
Signed-off-by: Christian Schoenebeck 
---
 V2: log a warning message on unexpected close() -> EBADF case

 hw/9pfs/9p.c | 10 +-
 hw/9pfs/codir.c  |  7 ++-
 hw/9pfs/cofile.c |  7 ++-
 3 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
index b22df3aa2b..8b001b9112 100644
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -510,7 +510,15 @@ void coroutine_fn v9fs_reclaim_fd(V9fsPDU *pdu)
 err = (f->fid_type == P9_FID_DIR) ?
 s->ops->closedir(&s->ctx, &f->fs_reclaim) :
 s->ops->close(&s->ctx, &f->fs_reclaim);
-if (!err) {
+
+/* 'man 2 close' suggests to ignore close() errors except of EBADF 
*/
+if (unlikely(err && errno == EBADF)) {
+/*
+ * unexpected case as FIDs were picked above by having a valid
+ * file descriptor
+ */
+error_report("9pfs: v9fs_reclaim_fd() WARNING: close() failed 
with EBADF");
+} else {
 /* total_open_fd must only be mutated on main thread */
 nclosed++;
 }
diff --git a/hw/9pfs/codir.c b/hw/9pfs/codir.c
index 2068a4779d..bce7dd96e9 100644
--- a/hw/9pfs/codir.c
+++ b/hw/9pfs/codir.c
@@ -20,6 +20,7 @@
 #include "fsdev/qemu-fsdev.h"
 #include "qemu/thread.h"
 #include "qemu/main-loop.h"
+#include "qemu/error-report.h"
 #include "coth.h"
 #include "9p-xattr.h"
 #include "9p-util.h"
@@ -353,7 +354,11 @@ int coroutine_fn v9fs_co_closedir(V9fsPDU *pdu, 
V9fsFidOpenState *fs)
 err = -errno;
 }
 });
-if (!err) {
+/* 'man 2 close' suggests to ignore close() errors except of EBADF */
+if (unlikely(err && errno == EBADF)) {
+/* unexpected case as we should have checked for a valid file handle */
+error_report("9pfs: WARNING: v9fs_co_closedir() failed with EBADF");
+} else {
 total_open_fd--;
 }
 return err;
diff --git a/hw/9pfs/cofile.c b/hw/9pfs/cofile.c
index 71174c3e4a..6e775c8e41 100644
--- a/hw/9pfs/cofile.c
+++ b/hw/9pfs/cofile.c
@@ -20,6 +20,7 @@
 #include "fsdev/qemu-fsdev.h"
 #include "qemu/thread.h"
 #include "qemu/main-loop.h"
+#include "qemu/error-report.h"
 #include "coth.h"
 
 int coroutine_fn v9fs_co_st_gen(V9fsPDU *pdu, V9fsPath *path, mode_t st_mode,
@@ -197,7 +198,11 @@ int coroutine_fn v9fs_co_close(V9fsPDU *pdu, 
V9fsFidOpenState *fs)
 err = -errno;
 }
 });
-if (!err) {
+/* 'man 2 close' suggests to ignore close() errors except of EBADF */
+if (unlikely(err && errno == EBADF)) {
+/* unexpected case as we should have checked for a valid file handle */
+error_report("9pfs: WARNING: v9fs_co_close() failed with EBADF");
+} else {
 total_open_fd--;
 }
 return err;
-- 
2.39.5




Re: [PATCH v2 00/30] single-binary: start make hw/arm/ common

2025-03-20 Thread Pierrick Bouvier

On 3/20/25 15:29, Pierrick Bouvier wrote:

This series focuses on removing compilation units duplication in hw/arm. We
start with this architecture because it should not be too hard to transform it,
and should give us some good hints on the difficulties we'll meet later.

We first start by making changes in global headers to be able to not rely on
specific target defines. In particular, we completely remove cpu-all.h.
We then focus on removing those defines from target/arm/cpu.h.

 From there, we modify build system to create a new hw common library (per base
architecture, "arm" in this case), instead of compiling the same files for every
target.

Finally, we can declare hw/arm/boot.c, and most of the boards as common as a
first step for this part.

- Based-on: 20250317183417.285700-1-pierrick.bouv...@linaro.org
("[PATCH v6 00/18] make system memory API available for common code")
https://lore.kernel.org/qemu-devel/20250317183417.285700-1-pierrick.bouv...@linaro.org/
- Based-on: 20250318213209.2579218-1-richard.hender...@linaro.org
("[PATCH v2 00/42] accel/tcg, codebase: Build once patches")
https://lore.kernel.org/qemu-devel/20250318213209.2579218-1-richard.hender...@linaro.org

v2:
- rebase on top of Richard series
- add target include in hw_common lib
- hw_common_lib uses -DCOMPILE_SYSTEM_VS_USER introduced by Richard series
- remove cpu-all header
- remove BSWAP_NEEDED define
- new tlb-flags header
- Cleanup i386 KVM_HAVE_MCE_INJECTION definition + move KVM_HAVE_MCE_INJECTION
- remove comment about cs_base in target/arm/cpu.h
- updated commit message about registers visibility between aarch32/aarch64
- tried remove ifdefs in target/arm/helper.c but this resulted in more a ugly
   result. So just comment calls for now, as we'll clean this file later.
- make most of the boards in hw/arm common

Pierrick Bouvier (30):
   exec/cpu-all: remove BSWAP_NEEDED
   exec/cpu-all: extract tlb flags defines to exec/tlb-flags.h
   exec/cpu-all: move cpu_copy to linux-user/qemu.h
   include/exec/cpu-all: move compile time check for CPUArchState to
 cpu-target.c
   exec/cpu-all: remove system/memory include
   exec/cpu-all: remove exec/page-protection include
   exec/cpu-all: remove tswap include
   exec/cpu-all: remove exec/cpu-interrupt include
   exec/cpu-all: remove exec/cpu-defs include
   exec/cpu-all: remove exec/target_page include
   exec/cpu-all: remove hw/core/cpu.h include
   accel/tcg: fix missing includes for TCG_GUEST_DEFAULT_MO
   accel/tcg: fix missing includes for TARGET_HAS_PRECISE_SMC
   exec/cpu-all: remove cpu include
   exec/cpu-all: transfer exec/cpu-common include to cpu.h headers
   exec/cpu-all: remove this header
   exec/target_page: runtime defintion for TARGET_PAGE_BITS_MIN
   accel/kvm: move KVM_HAVE_MCE_INJECTION define to kvm-all.c
   exec/poison: KVM_HAVE_MCE_INJECTION can now be poisoned
   target/arm/cpu: always define kvm related registers
   target/arm/cpu: flags2 is always uint64_t
   target/arm/cpu: define same set of registers for aarch32 and aarch64
   target/arm/cpu: remove inline stubs for aarch32 emulation
   meson: add common hw files
   hw/arm/boot: make compilation unit hw common
   hw/arm/armv7m: prepare compilation unit to be common
   hw/arm/digic_boards: prepare compilation unit to be common
   hw/arm/xlnx-zynqmp: prepare compilation unit to be common
   hw/arm/xlnx-versal: prepare compilation unit to be common
   hw/arm: make most of the compilation units common

  meson.build |  37 +++-
  accel/tcg/internal-target.h |   1 +
  accel/tcg/tb-internal.h |   1 -
  hw/s390x/ipl.h  |   2 +
  include/exec/cpu_ldst.h |   1 +
  include/exec/exec-all.h |   1 +
  include/exec/poison.h   |   4 +
  include/exec/target_page.h  |   3 +
  include/exec/{cpu-all.h => tlb-flags.h} |  38 +---
  include/hw/core/cpu.h   |   2 +-
  include/qemu/bswap.h|   2 +-
  include/system/kvm.h|   2 -
  linux-user/qemu.h   |   3 +
  linux-user/sparc/target_syscall.h   |   2 +
  linux-user/syscall_defs.h   |   2 +-
  target/alpha/cpu.h  |   4 +-
  target/arm/cpu.h|  40 ++--
  target/arm/internals.h  |   1 +
  target/avr/cpu.h|   4 +-
  target/hexagon/cpu.h|   3 +-
  target/hppa/cpu.h   |   5 +-
  target/i386/cpu.h   |   5 +-
  target/i386/hvf/vmx.h   |   1 +
  target/loongarch/cpu.h  |   4 +-
  target/m68k/cpu.h   |   4 +-
  target/microblaze/cpu.h |   4 +-
  target/mips/cpu.h   |   4 +-
  target/openrisc/cpu.h   |   4 +-
  target/ppc/cpu.h|   4 +-
  target/ppc/mmu-hash32.h |

[PATCH v2 29/30] hw/arm/xlnx-versal: prepare compilation unit to be common

2025-03-20 Thread Pierrick Bouvier
Signed-off-by: Pierrick Bouvier 
---
 hw/arm/xlnx-versal.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/hw/arm/xlnx-versal.c b/hw/arm/xlnx-versal.c
index 278545a3f7b..f0b383b29ee 100644
--- a/hw/arm/xlnx-versal.c
+++ b/hw/arm/xlnx-versal.c
@@ -17,9 +17,7 @@
 #include "hw/sysbus.h"
 #include "net/net.h"
 #include "system/system.h"
-#include "system/kvm.h"
 #include "hw/arm/boot.h"
-#include "kvm_arm.h"
 #include "hw/misc/unimp.h"
 #include "hw/arm/xlnx-versal.h"
 #include "qemu/log.h"
-- 
2.39.5




Re: [PATCH] load_aout: replace bswap_needed with big_endian

2025-03-20 Thread Pierrick Bouvier

On 3/20/25 05:43, Paolo Bonzini wrote:

Targets know whether they are big-endian more than they know if
the endianness is different from the host: the former is mostly
a constant, at least in machine creation code, while the latter
has to be computed with TARGET_BIG_ENDIAN != HOST_BIG_ENDIAN or
something like that.

load_aout, however, takes a "bswap_needed" argument.  Replace
it with a "big_endian" argument; even though all users are
big-endian, it is cheap enough to keep the optional swapping
functionality even for little-endian boards.

Signed-off-by: Paolo Bonzini 
---
  include/hw/loader.h   | 2 +-
  hw/core/loader.c  | 4 ++--
  hw/ppc/mac_newworld.c | 7 +--
  hw/ppc/mac_oldworld.c | 7 +--
  hw/sparc/sun4m.c  | 9 +
  hw/sparc64/sun4u.c| 9 +
  6 files changed, 7 insertions(+), 31 deletions(-)

diff --git a/include/hw/loader.h b/include/hw/loader.h
index 784a93d6c17..d280dc33e96 100644
--- a/include/hw/loader.h
+++ b/include/hw/loader.h
@@ -190,7 +190,7 @@ ssize_t load_elf(const char *filename,
  void load_elf_hdr(const char *filename, void *hdr, bool *is64, Error **errp);
  
  ssize_t load_aout(const char *filename, hwaddr addr, int max_sz,

-  int bswap_needed, hwaddr target_page_size);
+  bool big_endian, hwaddr target_page_size);
  
  #define LOAD_UIMAGE_LOADADDR_INVALID (-1)
  
diff --git a/hw/core/loader.c b/hw/core/loader.c

index ce6ff1b52e3..2e35f0aa905 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -226,7 +226,7 @@ static void bswap_ahdr(struct exec *e)
  
  
  ssize_t load_aout(const char *filename, hwaddr addr, int max_sz,

-  int bswap_needed, hwaddr target_page_size)
+  bool big_endian, hwaddr target_page_size)
  {
  int fd;
  ssize_t size, ret;
@@ -241,7 +241,7 @@ ssize_t load_aout(const char *filename, hwaddr addr, int 
max_sz,
  if (size < 0)
  goto fail;
  
-if (bswap_needed) {

+if (big_endian != HOST_BIG_ENDIAN) {
  bswap_ahdr(&e);
  }
  
diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c

index cb3dc3ab482..2d5309d6f55 100644
--- a/hw/ppc/mac_newworld.c
+++ b/hw/ppc/mac_newworld.c
@@ -197,11 +197,6 @@ static void ppc_core99_init(MachineState *machine)
  }
  
  if (machine->kernel_filename) {

-int bswap_needed = 0;
-
-#ifdef BSWAP_NEEDED
-bswap_needed = 1;
-#endif
  kernel_base = KERNEL_LOAD_ADDR;
  kernel_size = load_elf(machine->kernel_filename, NULL,
 translate_kernel_address, NULL, NULL, NULL,
@@ -209,7 +204,7 @@ static void ppc_core99_init(MachineState *machine)
  if (kernel_size < 0) {
  kernel_size = load_aout(machine->kernel_filename, kernel_base,
  machine->ram_size - kernel_base,
-bswap_needed, TARGET_PAGE_SIZE);
+true, TARGET_PAGE_SIZE);
  }
  if (kernel_size < 0) {
  kernel_size = load_image_targphys(machine->kernel_filename,
diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c
index 0dbcea035c3..b5814690f5a 100644
--- a/hw/ppc/mac_oldworld.c
+++ b/hw/ppc/mac_oldworld.c
@@ -153,11 +153,6 @@ static void ppc_heathrow_init(MachineState *machine)
  }
  
  if (machine->kernel_filename) {

-int bswap_needed = 0;
-
-#ifdef BSWAP_NEEDED
-bswap_needed = 1;
-#endif
  kernel_base = KERNEL_LOAD_ADDR;
  kernel_size = load_elf(machine->kernel_filename, NULL,
 translate_kernel_address, NULL, NULL, NULL,
@@ -165,7 +160,7 @@ static void ppc_heathrow_init(MachineState *machine)
  if (kernel_size < 0) {
  kernel_size = load_aout(machine->kernel_filename, kernel_base,
  machine->ram_size - kernel_base,
-bswap_needed, TARGET_PAGE_SIZE);
+true, TARGET_PAGE_SIZE);
  }
  if (kernel_size < 0) {
  kernel_size = load_image_targphys(machine->kernel_filename,
diff --git a/hw/sparc/sun4m.c b/hw/sparc/sun4m.c
index a48d3622c5a..5aaafb40dac 100644
--- a/hw/sparc/sun4m.c
+++ b/hw/sparc/sun4m.c
@@ -233,20 +233,13 @@ static unsigned long sun4m_load_kernel(const char 
*kernel_filename,
  
  kernel_size = 0;

  if (linux_boot) {
-int bswap_needed;
-
-#ifdef BSWAP_NEEDED
-bswap_needed = 1;
-#else
-bswap_needed = 0;
-#endif
  kernel_size = load_elf(kernel_filename, NULL,
 translate_kernel_address, NULL,
 NULL, NULL, NULL, NULL,
 ELFDATA2MSB, EM_SPARC, 0, 0);
  if (kernel_size < 0)
  kernel_size = load_aout(kernel_filename, KERNEL_LOAD_ADDR,
-RAM_size - KERNEL_LOAD_ADDR, bswap_needed,
+

[PATCH v2 19/30] exec/poison: KVM_HAVE_MCE_INJECTION can now be poisoned

2025-03-20 Thread Pierrick Bouvier
We prevent common code to use this define by mistake.

Reviewed-by: Richard Henderson 
Signed-off-by: Pierrick Bouvier 
---
 include/exec/poison.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/exec/poison.h b/include/exec/poison.h
index c72f56df921..d8495b1d358 100644
--- a/include/exec/poison.h
+++ b/include/exec/poison.h
@@ -74,4 +74,6 @@
 #pragma GCC poison CONFIG_SOFTMMU
 #endif
 
+#pragma GCC poison KVM_HAVE_MCE_INJECTION
+
 #endif
-- 
2.39.5




[PATCH v2 30/30] hw/arm: make most of the compilation units common

2025-03-20 Thread Pierrick Bouvier
Signed-off-by: Pierrick Bouvier 
---
 hw/arm/meson.build | 112 ++---
 1 file changed, 56 insertions(+), 56 deletions(-)

diff --git a/hw/arm/meson.build b/hw/arm/meson.build
index 9e8c96059eb..09b1cfe5b57 100644
--- a/hw/arm/meson.build
+++ b/hw/arm/meson.build
@@ -2,43 +2,43 @@ arm_ss = ss.source_set()
 arm_common_ss = ss.source_set()
 arm_ss.add(when: 'CONFIG_ARM_VIRT', if_true: files('virt.c'))
 arm_ss.add(when: 'CONFIG_ACPI', if_true: files('virt-acpi-build.c'))
-arm_ss.add(when: 'CONFIG_DIGIC', if_true: files('digic_boards.c'))
-arm_ss.add(when: 'CONFIG_EMCRAFT_SF2', if_true: files('msf2-som.c'))
-arm_ss.add(when: 'CONFIG_HIGHBANK', if_true: files('highbank.c'))
-arm_ss.add(when: 'CONFIG_INTEGRATOR', if_true: files('integratorcp.c'))
-arm_ss.add(when: 'CONFIG_MICROBIT', if_true: files('microbit.c'))
-arm_ss.add(when: 'CONFIG_MPS3R', if_true: files('mps3r.c'))
-arm_ss.add(when: 'CONFIG_MUSICPAL', if_true: files('musicpal.c'))
-arm_ss.add(when: 'CONFIG_NETDUINOPLUS2', if_true: files('netduinoplus2.c'))
-arm_ss.add(when: 'CONFIG_OLIMEX_STM32_H405', if_true: 
files('olimex-stm32-h405.c'))
-arm_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx.c', 
'npcm7xx_boards.c'))
-arm_ss.add(when: 'CONFIG_NPCM8XX', if_true: files('npcm8xx.c', 
'npcm8xx_boards.c'))
-arm_ss.add(when: 'CONFIG_REALVIEW', if_true: files('realview.c'))
+arm_common_ss.add(when: 'CONFIG_DIGIC', if_true: files('digic_boards.c'))
+arm_common_ss.add(when: 'CONFIG_EMCRAFT_SF2', if_true: files('msf2-som.c'))
+arm_common_ss.add(when: 'CONFIG_HIGHBANK', if_true: files('highbank.c'))
+arm_common_ss.add(when: 'CONFIG_INTEGRATOR', if_true: files('integratorcp.c'))
+arm_common_ss.add(when: 'CONFIG_MICROBIT', if_true: files('microbit.c'))
+arm_common_ss.add(when: 'CONFIG_MPS3R', if_true: files('mps3r.c'))
+arm_common_ss.add(when: 'CONFIG_MUSICPAL', if_true: [pixman, 
files('musicpal.c')])
+arm_common_ss.add(when: 'CONFIG_NETDUINOPLUS2', if_true: 
files('netduinoplus2.c'))
+arm_common_ss.add(when: 'CONFIG_OLIMEX_STM32_H405', if_true: 
files('olimex-stm32-h405.c'))
+arm_common_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx.c', 
'npcm7xx_boards.c'))
+arm_common_ss.add(when: 'CONFIG_NPCM8XX', if_true: files('npcm8xx.c', 
'npcm8xx_boards.c'))
+arm_common_ss.add(when: 'CONFIG_REALVIEW', if_true: files('realview.c'))
 arm_ss.add(when: 'CONFIG_SBSA_REF', if_true: files('sbsa-ref.c'))
-arm_ss.add(when: 'CONFIG_STELLARIS', if_true: files('stellaris.c'))
-arm_ss.add(when: 'CONFIG_STM32VLDISCOVERY', if_true: 
files('stm32vldiscovery.c'))
-arm_ss.add(when: 'CONFIG_ZYNQ', if_true: files('xilinx_zynq.c'))
-arm_ss.add(when: 'CONFIG_SABRELITE', if_true: files('sabrelite.c'))
+arm_common_ss.add(when: 'CONFIG_STELLARIS', if_true: files('stellaris.c'))
+arm_common_ss.add(when: 'CONFIG_STM32VLDISCOVERY', if_true: 
files('stm32vldiscovery.c'))
+arm_common_ss.add(when: 'CONFIG_ZYNQ', if_true: files('xilinx_zynq.c'))
+arm_common_ss.add(when: 'CONFIG_SABRELITE', if_true: files('sabrelite.c'))
 
-arm_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('armv7m.c'))
-arm_ss.add(when: 'CONFIG_EXYNOS4', if_true: files('exynos4210.c'))
-arm_ss.add(when: 'CONFIG_DIGIC', if_true: files('digic.c'))
-arm_ss.add(when: 'CONFIG_OMAP', if_true: files('omap1.c'))
-arm_ss.add(when: 'CONFIG_ALLWINNER_A10', if_true: files('allwinner-a10.c', 
'cubieboard.c'))
-arm_ss.add(when: 'CONFIG_ALLWINNER_H3', if_true: files('allwinner-h3.c', 
'orangepi.c'))
-arm_ss.add(when: 'CONFIG_ALLWINNER_R40', if_true: files('allwinner-r40.c', 
'bananapi_m2u.c'))
+arm_common_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('armv7m.c'))
+arm_common_ss.add(when: 'CONFIG_EXYNOS4', if_true: files('exynos4210.c'))
+arm_common_ss.add(when: 'CONFIG_DIGIC', if_true: files('digic.c'))
+arm_common_ss.add(when: 'CONFIG_OMAP', if_true: files('omap1.c'))
+arm_common_ss.add(when: 'CONFIG_ALLWINNER_A10', if_true: 
files('allwinner-a10.c', 'cubieboard.c'))
+arm_common_ss.add(when: 'CONFIG_ALLWINNER_H3', if_true: 
files('allwinner-h3.c', 'orangepi.c'))
+arm_common_ss.add(when: 'CONFIG_ALLWINNER_R40', if_true: 
files('allwinner-r40.c', 'bananapi_m2u.c'))
 arm_ss.add(when: 'CONFIG_RASPI', if_true: files('bcm2836.c', 'raspi.c'))
-arm_ss.add(when: ['CONFIG_RASPI', 'TARGET_AARCH64'], if_true: 
files('bcm2838.c', 'raspi4b.c'))
-arm_ss.add(when: 'CONFIG_STM32F100_SOC', if_true: files('stm32f100_soc.c'))
-arm_ss.add(when: 'CONFIG_STM32F205_SOC', if_true: files('stm32f205_soc.c'))
-arm_ss.add(when: 'CONFIG_STM32F405_SOC', if_true: files('stm32f405_soc.c'))
-arm_ss.add(when: 'CONFIG_B_L475E_IOT01A', if_true: files('b-l475e-iot01a.c'))
-arm_ss.add(when: 'CONFIG_STM32L4X5_SOC', if_true: files('stm32l4x5_soc.c'))
-arm_ss.add(when: 'CONFIG_XLNX_ZYNQMP_ARM', if_true: files('xlnx-zynqmp.c', 
'xlnx-zcu102.c'))
-arm_ss.add(when: 'CONFIG_XLNX_VERSAL', if_true: files('xlnx-versal.c', 
'xlnx-versal-virt.c'))
-arm_ss.add(when: 'CONFIG_FSL_IMX25', if_true: files('fsl-imx25.c', 
'imx25_pdk.c'))
-arm_ss.add(when: 'C

[PATCH v2 05/30] exec/cpu-all: remove system/memory include

2025-03-20 Thread Pierrick Bouvier
We include this header where needed. When includes set already have
ifdef CONFIG_USER_ONLY, we add it here, else, we don't condition the
include.

Signed-off-by: Pierrick Bouvier 
---
 hw/s390x/ipl.h   | 1 +
 include/exec/cpu-all.h   | 3 ---
 target/arm/internals.h   | 1 +
 target/hppa/cpu.h| 1 +
 target/i386/hvf/vmx.h| 1 +
 target/ppc/mmu-hash32.h  | 2 ++
 hw/ppc/spapr_ovec.c  | 1 +
 target/alpha/helper.c| 1 +
 target/arm/hvf/hvf.c | 1 +
 target/avr/helper.c  | 1 +
 target/i386/arch_memory_mapping.c| 1 +
 target/i386/helper.c | 1 +
 target/i386/tcg/system/misc_helper.c | 1 +
 target/i386/tcg/system/tcg-cpu.c | 1 +
 target/m68k/helper.c | 1 +
 target/ppc/excp_helper.c | 1 +
 target/ppc/mmu-book3s-v3.c   | 1 +
 target/ppc/mmu-hash64.c  | 1 +
 target/ppc/mmu-radix64.c | 1 +
 target/riscv/cpu_helper.c| 1 +
 target/sparc/ldst_helper.c   | 1 +
 target/sparc/mmu_helper.c| 1 +
 target/xtensa/mmu_helper.c   | 1 +
 target/xtensa/op_helper.c| 1 +
 24 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/hw/s390x/ipl.h b/hw/s390x/ipl.h
index c6ecb3433cc..6557ac3be5b 100644
--- a/hw/s390x/ipl.h
+++ b/hw/s390x/ipl.h
@@ -15,6 +15,7 @@
 
 #include "cpu.h"
 #include "system/address-spaces.h"
+#include "system/memory.h"
 #include "hw/qdev-core.h"
 #include "hw/s390x/ipl/qipl.h"
 #include "qom/object.h"
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index b1067259e6b..eb029b65552 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -24,9 +24,6 @@
 #include "exec/cpu-interrupt.h"
 #include "exec/tswap.h"
 #include "hw/core/cpu.h"
-#ifndef CONFIG_USER_ONLY
-#include "system/memory.h"
-#endif
 
 /* page related stuff */
 #include "exec/cpu-defs.h"
diff --git a/target/arm/internals.h b/target/arm/internals.h
index 28585c07555..895d60218e3 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -28,6 +28,7 @@
 #include "exec/breakpoint.h"
 #include "hw/registerfields.h"
 #include "tcg/tcg-gvec-desc.h"
+#include "system/memory.h"
 #include "syndrome.h"
 #include "cpu-features.h"
 
diff --git a/target/hppa/cpu.h b/target/hppa/cpu.h
index 7be4a1d3800..bb997d07516 100644
--- a/target/hppa/cpu.h
+++ b/target/hppa/cpu.h
@@ -22,6 +22,7 @@
 
 #include "cpu-qom.h"
 #include "exec/cpu-defs.h"
+#include "system/memory.h"
 #include "qemu/cpu-float.h"
 #include "qemu/interval-tree.h"
 #include "hw/registerfields.h"
diff --git a/target/i386/hvf/vmx.h b/target/i386/hvf/vmx.h
index 87a478f7fde..3ddf7982ff3 100644
--- a/target/i386/hvf/vmx.h
+++ b/target/i386/hvf/vmx.h
@@ -34,6 +34,7 @@
 #include "system/hvf_int.h"
 
 #include "system/address-spaces.h"
+#include "system/memory.h"
 
 static inline uint64_t rreg(hv_vcpuid_t vcpu, hv_x86_reg_t reg)
 {
diff --git a/target/ppc/mmu-hash32.h b/target/ppc/mmu-hash32.h
index 2838de031c7..04c23ea75ed 100644
--- a/target/ppc/mmu-hash32.h
+++ b/target/ppc/mmu-hash32.h
@@ -3,6 +3,8 @@
 
 #ifndef CONFIG_USER_ONLY
 
+#include "system/memory.h"
+
 bool ppc_hash32_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type,
   hwaddr *raddrp, int *psizep, int *protp, int mmu_idx,
   bool guest_visible);
diff --git a/hw/ppc/spapr_ovec.c b/hw/ppc/spapr_ovec.c
index 6d6eaf67cba..75ab4fe2623 100644
--- a/hw/ppc/spapr_ovec.c
+++ b/hw/ppc/spapr_ovec.c
@@ -16,6 +16,7 @@
 #include "migration/vmstate.h"
 #include "qemu/bitmap.h"
 #include "system/address-spaces.h"
+#include "system/memory.h"
 #include "qemu/error-report.h"
 #include "trace.h"
 #include 
diff --git a/target/alpha/helper.c b/target/alpha/helper.c
index 57cefcba144..f6261a3a53c 100644
--- a/target/alpha/helper.c
+++ b/target/alpha/helper.c
@@ -25,6 +25,7 @@
 #include "fpu/softfloat-types.h"
 #include "exec/helper-proto.h"
 #include "qemu/qemu-print.h"
+#include "system/memory.h"
 
 
 #define CONVERT_BIT(X, SRC, DST) \
diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
index 93a3f9b53d4..34ca36fab55 100644
--- a/target/arm/hvf/hvf.c
+++ b/target/arm/hvf/hvf.c
@@ -23,6 +23,7 @@
 #include 
 
 #include "system/address-spaces.h"
+#include "system/memory.h"
 #include "hw/boards.h"
 #include "hw/irq.h"
 #include "qemu/main-loop.h"
diff --git a/target/avr/helper.c b/target/avr/helper.c
index 6b90fa82c3d..64781bbf826 100644
--- a/target/avr/helper.c
+++ b/target/avr/helper.c
@@ -27,6 +27,7 @@
 #include "exec/page-protection.h"
 #include "exec/cpu_ldst.h"
 #include "system/address-spaces.h"
+#include "system/memory.h"
 #include "exec/helper-proto.h"
 #include "qemu/plugin.h"
 
diff --git a/target/i386/arch_memory_mapping.c 
b/target/i386/arch_memory_mapping.c
index ced199862dd..a2398c21732 100644
--- a/target/i386/arch_memory_mapping.c
+++ b/target/i386/arch_memory_mapping.c
@@ -14,6 +

[PATCH v6 1/6] target/loongarch: Fix error handling of KVM feature checks

2025-03-20 Thread Bibo Mao
For some paravirt KVM features, if user forces to enable it however
KVM does not support, qemu should fail to run and exit immediately,
rather than continue to run. Here set error message and return directly
in function kvm_arch_init_vcpu().

Fixes: 6edd2a9bec90 (target/loongarch/kvm: Implement LoongArch PMU extension)
Fixes: 936c3f4d7916 (target/loongarch: Use auto method with LSX feature)
Fixes: 5e360dabedb1 (target/loongarch: Use auto method with LASX feature)
Fixes: 620d9bd0022e (target/loongarch: Add paravirt ipi feature detection)
Signed-off-by: Bibo Mao 
Reviewed-by: Markus Armbruster 
Reviewed-by: Philippe Mathieu-Daudé 
---
 target/loongarch/kvm/kvm.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c
index 28735c80be..7f63e7c8fe 100644
--- a/target/loongarch/kvm/kvm.c
+++ b/target/loongarch/kvm/kvm.c
@@ -1081,7 +1081,6 @@ int kvm_arch_init_vcpu(CPUState *cs)
 int ret;
 Error *local_err = NULL;
 
-ret = 0;
 qemu_add_vm_change_state_handler(kvm_loongarch_vm_stage_change, cs);
 
 if (!kvm_get_one_reg(cs, KVM_REG_LOONGARCH_DEBUG_INST, &val)) {
@@ -1091,29 +1090,34 @@ int kvm_arch_init_vcpu(CPUState *cs)
 ret = kvm_cpu_check_lsx(cs, &local_err);
 if (ret < 0) {
 error_report_err(local_err);
+return ret;
 }
 
 ret = kvm_cpu_check_lasx(cs, &local_err);
 if (ret < 0) {
 error_report_err(local_err);
+return ret;
 }
 
 ret = kvm_cpu_check_lbt(cs, &local_err);
 if (ret < 0) {
 error_report_err(local_err);
+return ret;
 }
 
 ret = kvm_cpu_check_pmu(cs, &local_err);
 if (ret < 0) {
 error_report_err(local_err);
+return ret;
 }
 
 ret = kvm_cpu_check_pv_features(cs, &local_err);
 if (ret < 0) {
 error_report_err(local_err);
+return ret;
 }
 
-return ret;
+return 0;
 }
 
 static bool loongarch_get_lbt(Object *obj, Error **errp)
-- 
2.39.3




[PATCH v6 4/6] hw/loongarch/virt: Eliminate error_propagate()

2025-03-20 Thread Bibo Mao
When there is an error, it is put into a local variable and then
propagated to somewhere else. Instead the error can be set right
away, error propagation can be removed.

Signed-off-by: Bibo Mao 
Reviewed-by: Markus Armbruster 
---
 hw/loongarch/virt.c | 32 +++-
 1 file changed, 11 insertions(+), 21 deletions(-)

diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c
index 503362a69e..562e44e112 100644
--- a/hw/loongarch/virt.c
+++ b/hw/loongarch/virt.c
@@ -859,30 +859,29 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev,
 LoongArchCPU *cpu = LOONGARCH_CPU(dev);
 CPUState *cs = CPU(dev);
 CPUArchId *cpu_slot;
-Error *err = NULL;
 LoongArchCPUTopo topo;
 int arch_id;
 
 if (lvms->acpi_ged) {
 if ((cpu->thread_id < 0) || (cpu->thread_id >= ms->smp.threads)) {
-error_setg(&err,
+error_setg(errp,
"Invalid thread-id %u specified, must be in range 1:%u",
cpu->thread_id, ms->smp.threads - 1);
-goto out;
+return;
 }
 
 if ((cpu->core_id < 0) || (cpu->core_id >= ms->smp.cores)) {
-error_setg(&err,
+error_setg(errp,
"Invalid core-id %u specified, must be in range 1:%u",
cpu->core_id, ms->smp.cores - 1);
-goto out;
+return;
 }
 
 if ((cpu->socket_id < 0) || (cpu->socket_id >= ms->smp.sockets)) {
-error_setg(&err,
+error_setg(errp,
"Invalid socket-id %u specified, must be in range 1:%u",
cpu->socket_id, ms->smp.sockets - 1);
-goto out;
+return;
 }
 
 topo.socket_id = cpu->socket_id;
@@ -891,11 +890,11 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev,
 arch_id =  virt_get_arch_id_from_topo(ms, &topo);
 cpu_slot = virt_find_cpu_slot(ms, arch_id);
 if (CPU(cpu_slot->cpu)) {
-error_setg(&err,
+error_setg(errp,
"cpu(id%d=%d:%d:%d) with arch-id %" PRIu64 " exists",
cs->cpu_index, cpu->socket_id, cpu->core_id,
cpu->thread_id, cpu_slot->arch_id);
-goto out;
+return;
 }
 } else {
 /* For cold-add cpu, find empty cpu slot */
@@ -911,33 +910,24 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev,
 cpu->env.address_space_iocsr = &lvms->as_iocsr;
 cpu->phy_id = cpu_slot->arch_id;
 cs->cpu_index = cpu_slot - ms->possible_cpus->cpus;
-numa_cpu_pre_plug(cpu_slot, dev, &err);
-out:
-if (err) {
-error_propagate(errp, err);
-}
+numa_cpu_pre_plug(cpu_slot, dev, errp);
 }
 
 static void virt_cpu_unplug_request(HotplugHandler *hotplug_dev,
 DeviceState *dev, Error **errp)
 {
 LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(hotplug_dev);
-Error *err = NULL;
 LoongArchCPU *cpu = LOONGARCH_CPU(dev);
 CPUState *cs = CPU(dev);
 
 if (cs->cpu_index == 0) {
-error_setg(&err, "hot-unplug of boot cpu(id%d=%d:%d:%d) not supported",
+error_setg(errp, "hot-unplug of boot cpu(id%d=%d:%d:%d) not supported",
cs->cpu_index, cpu->socket_id,
cpu->core_id, cpu->thread_id);
-error_propagate(errp, err);
 return;
 }
 
-hotplug_handler_unplug_request(HOTPLUG_HANDLER(lvms->acpi_ged), dev, &err);
-if (err) {
-error_propagate(errp, err);
-}
+hotplug_handler_unplug_request(HOTPLUG_HANDLER(lvms->acpi_ged), dev, errp);
 }
 
 static void virt_cpu_unplug(HotplugHandler *hotplug_dev,
-- 
2.39.3




[PATCH v6 6/6] target/loongarch: Clean up virt_cpu_irq_init() error handling

2025-03-20 Thread Bibo Mao
The Error ** argument must be NULL, &error_abort, &error_fatal, or a
pointer to a variable containing NULL. Passing an argument of the
latter kind twice without clearing it in between is wrong: if the
first call sets an error, it no longer points to NULL for the second
call.

virt_cpu_irq_init() is wrong that way: it passes &err to
hotplug_handler_plug() twice.  If both calls failed, this could trip
error_setv()'s assertion.  Moreover, if just one fails, the Error
object leaks. Fortunately, these calls can't actually fail.

Messed up in commit 50ebc3fc47f7 (hw/intc/loongarch_ipi: Notify ipi
object when cpu is plugged) and commit 087a23a87c57
(hw/intc/loongarch_extioi: Use cpu plug notification).

Clean this up by passing &error_abort instead.

Signed-off-by: Bibo Mao 
Acked-by: Markus Armbruster 
---
 hw/loongarch/virt.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c
index 562e44e112..d7ede30af3 100644
--- a/hw/loongarch/virt.c
+++ b/hw/loongarch/virt.c
@@ -327,7 +327,6 @@ static void virt_cpu_irq_init(LoongArchVirtMachineState 
*lvms)
 MachineClass *mc = MACHINE_GET_CLASS(ms);
 const CPUArchIdList *possible_cpus;
 CPUState *cs;
-Error *err = NULL;
 
 /* cpu nodes */
 possible_cpus = mc->possible_cpu_arch_ids(ms);
@@ -337,8 +336,10 @@ static void virt_cpu_irq_init(LoongArchVirtMachineState 
*lvms)
 continue;
 }
 
-hotplug_handler_plug(HOTPLUG_HANDLER(lvms->ipi), DEVICE(cs), &err);
-hotplug_handler_plug(HOTPLUG_HANDLER(lvms->extioi), DEVICE(cs), &err);
+hotplug_handler_plug(HOTPLUG_HANDLER(lvms->ipi), DEVICE(cs),
+ &error_abort);
+hotplug_handler_plug(HOTPLUG_HANDLER(lvms->extioi), DEVICE(cs),
+ &error_abort);
 }
 }
 
-- 
2.39.3




[PATCH v6 2/6] hw/loongarch/virt: Fix error handling in cpu plug

2025-03-20 Thread Bibo Mao
In function virt_cpu_plug(), it will send cpu plug message to interrupt
controller extioi and ipi irqchip. If there is problem in this function,
system should continue to run and keep state the same before cpu is
added.

Object cpuslot::cpu is set at last only when there is no any error.
If there is, send cpu unplug message to extioi and ipi irqchip, and then
return immediately.

Fixes: ab9935d2991e (hw/loongarch/virt: Implement cpu plug interface)
Signed-off-by: Bibo Mao 
---
 hw/loongarch/virt.c | 18 --
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c
index a5840ff968..8563967c8b 100644
--- a/hw/loongarch/virt.c
+++ b/hw/loongarch/virt.c
@@ -981,8 +981,6 @@ static void virt_cpu_plug(HotplugHandler *hotplug_dev,
 LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(hotplug_dev);
 Error *err = NULL;
 
-cpu_slot = virt_find_cpu_slot(MACHINE(lvms), cpu->phy_id);
-cpu_slot->cpu = CPU(dev);
 if (lvms->ipi) {
 hotplug_handler_plug(HOTPLUG_HANDLER(lvms->ipi), dev, &err);
 if (err) {
@@ -995,6 +993,10 @@ static void virt_cpu_plug(HotplugHandler *hotplug_dev,
 hotplug_handler_plug(HOTPLUG_HANDLER(lvms->extioi), dev, &err);
 if (err) {
 error_propagate(errp, err);
+if (lvms->ipi) {
+hotplug_handler_unplug(HOTPLUG_HANDLER(lvms->ipi), dev,
+   &error_abort);
+}
 return;
 }
 }
@@ -1003,9 +1005,21 @@ static void virt_cpu_plug(HotplugHandler *hotplug_dev,
 hotplug_handler_plug(HOTPLUG_HANDLER(lvms->acpi_ged), dev, &err);
 if (err) {
 error_propagate(errp, err);
+if (lvms->ipi) {
+hotplug_handler_unplug(HOTPLUG_HANDLER(lvms->ipi), dev,
+   &error_abort);
+}
+
+if (lvms->extioi) {
+hotplug_handler_unplug(HOTPLUG_HANDLER(lvms->extioi), dev,
+   &error_abort);
+}
+return;
 }
 }
 
+cpu_slot = virt_find_cpu_slot(MACHINE(lvms), cpu->phy_id);
+cpu_slot->cpu = CPU(dev);
 return;
 }
 
-- 
2.39.3




Re: [PATCH v2] target/loongarch: fix bad shift in check_ps()

2025-03-20 Thread bibo mao

On 2025/3/21 上午9:13, Song Gao wrote:

  In expression 1ULL << tlb_ps, left shifting by more than 63 bits has 
undefined behavior.
The shift amount, tlb_ps, is as much as 64. check "tlb_ps >=64" to fix.

Resolves: Coverity CID 1593475

Fixes: d882c284a3 ("target/loongarch: check tlb_ps")
Suggested-by: Peter Maydell 
Signed-off-by: Song Gao 
---
v2:  define parameter tlb_ps as uint type

  target/loongarch/internals.h  |  2 +-
  target/loongarch/tcg/csr_helper.c |  2 +-
  target/loongarch/tcg/tlb_helper.c | 10 +-
  3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/target/loongarch/internals.h b/target/loongarch/internals.h
index 1cd959a766..9fdc3059d8 100644
--- a/target/loongarch/internals.h
+++ b/target/loongarch/internals.h
@@ -43,7 +43,7 @@ enum {
  TLBRET_PE = 7,
  };
  
-bool check_ps(CPULoongArchState *ent, int ps);

+bool check_ps(CPULoongArchState *ent, uint8_t ps);
  
  extern const VMStateDescription vmstate_loongarch_cpu;
  
diff --git a/target/loongarch/tcg/csr_helper.c b/target/loongarch/tcg/csr_helper.c

index 379c71e741..6a7a65c860 100644
--- a/target/loongarch/tcg/csr_helper.c
+++ b/target/loongarch/tcg/csr_helper.c
@@ -115,7 +115,7 @@ target_ulong helper_csrwr_ticlr(CPULoongArchState *env, 
target_ulong val)
  
  target_ulong helper_csrwr_pwcl(CPULoongArchState *env, target_ulong val)

  {
-int shift, ptbase;
+uint8_t shift, ptbase;
  int64_t old_v = env->CSR_PWCL;
  
  /*

diff --git a/target/loongarch/tcg/tlb_helper.c 
b/target/loongarch/tcg/tlb_helper.c
index 646dbf59de..bd8081e886 100644
--- a/target/loongarch/tcg/tlb_helper.c
+++ b/target/loongarch/tcg/tlb_helper.c
@@ -19,12 +19,12 @@
  #include "exec/log.h"
  #include "cpu-csr.h"
  
-bool check_ps(CPULoongArchState *env, int tlb_ps)

+bool check_ps(CPULoongArchState *env, uint8_t tlb_ps)
  {
- if (tlb_ps > 64) {
- return false;
- }
- return BIT_ULL(tlb_ps) & (env->CSR_PRCFG2);
+if (tlb_ps >= 64) {
+return false;
+}
+return BIT_ULL(tlb_ps) & (env->CSR_PRCFG2);
  }
  
  void get_dir_base_width(CPULoongArchState *env, uint64_t *dir_base,




Reviewed-by: Bibo Mao 




[RFC 2/3] vdagent: Set up mouse and clipboard after live migration

2025-03-20 Thread yong . huang
From: Hyman Huang 

The struct VDAgentChardev's caps, last_serial, and cbpending
fields need to be migrated in order to allow live migration
for vdagent. And the clipboard and mouse should be configured
to correspond with the previously negotiated caps on the
destination.

Signed-off-by: Hyman Huang 
---
 ui/vdagent.c | 68 
 1 file changed, 68 insertions(+)

diff --git a/ui/vdagent.c b/ui/vdagent.c
index 7a1cf674d0..4635e8fa56 100644
--- a/ui/vdagent.c
+++ b/ui/vdagent.c
@@ -7,6 +7,7 @@
 #include "qemu/units.h"
 #include "hw/qdev-core.h"
 #include "migration/blocker.h"
+#include "migration/vmstate.h"
 #include "ui/clipboard.h"
 #include "ui/console.h"
 #include "ui/input.h"
@@ -912,6 +913,72 @@ static void vdagent_chr_parse(QemuOpts *opts, 
ChardevBackend *backend,
 cfg->clipboard = qemu_opt_get_bool(opts, "clipboard", 
VDAGENT_CLIPBOARD_DEFAULT);
 }
 
+static void vdagent_release_clipboard_all_types(VDAgentChardev *vd,
+QemuClipboardSelection s)
+{
+uint32_t type;
+
+for (type = 0; type < QEMU_CLIPBOARD_TYPE__COUNT; type++) {
+if (vd->cbpending[s] & (1 << type)) {
+vd->cbpending[s] &= ~(1 << type);
+g_autofree VDAgentMessage *msg =
+g_malloc0(sizeof(VDAgentMessage) + sizeof(uint32_t));
+
+uint8_t *selection = msg->data;
+*selection = s;
+msg->size += sizeof(uint32_t);
+msg->type = VD_AGENT_CLIPBOARD_RELEASE;
+
+vdagent_send_msg(vd, msg);
+}
+}
+}
+
+static int vdagent_post_load(void *opaque, int version_id)
+{
+VDAgentChardev *vd = QEMU_VDAGENT_CHARDEV(opaque);
+QemuClipboardSelection s = QEMU_CLIPBOARD_SELECTION_CLIPBOARD;
+
+if (vd->caps) {
+if (have_mouse(vd)) {
+vd->mouse_hs =
+qemu_input_handler_register(&vd->mouse_dev,
+&vdagent_mouse_handler);
+if (vd->mouse_hs) {
+qemu_input_handler_activate(vd->mouse_hs);
+}
+}
+
+if (have_clipboard(vd)) {
+vdagent_register_to_qemu_clipboard(vd);
+if (have_selection(vd)) {
+for (; s < QEMU_CLIPBOARD_SELECTION__COUNT; s++) {
+vdagent_release_clipboard_all_types(vd, s);
+}
+} else {
+vdagent_release_clipboard_all_types(vd, s);
+}
+}
+}
+
+return 0;
+}
+
+static const VMStateDescription vmstate_vdagent = {
+.name = "vdagent",
+.version_id = 1,
+.minimum_version_id = 1,
+.post_load = vdagent_post_load,
+.fields = (VMStateField[]){
+VMSTATE_UINT32(caps, VDAgentChardev),
+VMSTATE_UINT32_ARRAY(last_serial, VDAgentChardev,
+QEMU_CLIPBOARD_SELECTION__COUNT),
+VMSTATE_UINT32_ARRAY(cbpending, VDAgentChardev,
+QEMU_CLIPBOARD_SELECTION__COUNT),
+VMSTATE_END_OF_LIST()
+}
+};
+
 /* -- */
 
 static void vdagent_chr_class_init(ObjectClass *oc, void *data)
@@ -930,6 +997,7 @@ static void vdagent_chr_init(Object *obj)
 VDAgentChardev *vd = QEMU_VDAGENT_CHARDEV(obj);
 
 buffer_init(&vd->outbuf, "vdagent-outbuf");
+vmstate_register(NULL, 0, &vmstate_vdagent, vd);
 error_setg(&vd->migration_blocker,
"The vdagent chardev doesn't yet support migration");
 }
-- 
2.27.0




[RFC 0/3] Support live migration for qemu-vdagent chardev

2025-03-20 Thread yong . huang
From: Hyman Huang 

Our goal is to migrate VMs that are configured with qemu-vdagent-typed
chardev while allowing the agent to continue working without having
to restart the service in guest.

Let's justify which fields should be taken into account for struct
VDAgentChardev.

struct VDAgentChardev {
Chardev parent;

/* config */
bool mouse;
bool clipboard;

/* guest vdagent */
uint32_t caps;
VDIChunkHeader chunk;
uint32_t chunksize;
uint8_t *msgbuf;
uint32_t msgsize;
uint8_t *xbuf;
uint32_t xoff, xsize;
Buffer outbuf;

/* mouse */
DeviceState mouse_dev;
uint32_t mouse_x;
uint32_t mouse_y;
uint32_t mouse_btn;
uint32_t mouse_display;
QemuInputHandlerState *mouse_hs;

/* clipboard */
QemuClipboardPeer cbpeer;
uint32_t last_serial[QEMU_CLIPBOARD_SELECTION__COUNT];
uint32_t cbpending[QEMU_CLIPBOARD_SELECTION__COUNT];
};

parent:
No dynamic information is generated. skip migrating.

mouse, clipboard:
The mouse and clipboard should be set up identically on both sides.
Skip migrating.

caps:
Store the negotiated caps between the client and the guest.
Should migrate.

chunk, ... outbuf:
The spice agent protocol's message transportation between the client
and the guest is implemented using all of these fields, however the
message loss can be tolerated by guests because the issue may occur
in the real world as well.
Could skip migrating.

mouse_dev, ... mouse_hs:
The mouse state can be reset after a live migration since the agent
working inside the guest does not heavily depend on them.
Could skip migrating

cbpeer:
Since the cbpeer would lose the data it references to if the qemu
clipboard data was not migrated, this field can also be initialized
after live migration.
Could skip migrating

last_serial, cbpending:
It is necessary for the agent to function after live migration.
Should migrate.

For the last_serial, saving & loading its value to make ensure the
client receives the most recent clipboard data from the guest after
live migration.

For the cbpending, saving & loading its value aims to inform the
guest that the clipboard has been released and is now empty in
case that the guest acts strangely while supposing that the
requested data can be properly retrieved.

To summarize, all we need to do is migrate the caps, last_serial
and cbpendings fields of the struct VDAgentChardev,

Please review, thanks

Yong

Hyman Huang (3):
  vdagent: Wrap vdagent_register_to_qemu_clipboard function
  vdagent: Set up mouse and clipboard after live migration
  vdagent: Drop blocker to support migration

 ui/trace-events |   1 +
 ui/vdagent.c| 102 +++-
 2 files changed, 85 insertions(+), 18 deletions(-)

-- 
2.27.0




Re: [PATCH v2 26/42] semihosting: Move user-only implementation out-of-line

2025-03-20 Thread Richard Henderson

On 3/19/25 00:16, Philippe Mathieu-Daudé wrote:

On 18/3/25 22:31, Richard Henderson wrote:

Avoid testing CONFIG_USER_ONLY in semihost.h.
The only function that's required is semihosting_enabled.

Signed-off-by: Richard Henderson 
---
  include/semihosting/semihost.h | 29 ++---
  semihosting/user.c | 15 +++
  semihosting/meson.build    |  2 ++
  3 files changed, 19 insertions(+), 27 deletions(-)
  create mode 100644 semihosting/user.c




diff --git a/semihosting/user.c b/semihosting/user.c
new file mode 100644
index 00..9473729beb
--- /dev/null
+++ b/semihosting/user.c
@@ -0,0 +1,15 @@
+/*
+ * Semihosting for user emulation
+ *
+ * Copyright (c) 2019 Linaro Ltd
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "semihosting/semihost.h"
+
+bool semihosting_enabled(bool is_user)
+{


While moving this code, we could also add:

    assert(is_user);


I have added this as a separate patch.


r~



[PULL 3/3] target/loongarch: fix bad shift in check_ps()

2025-03-20 Thread Bibo Mao
From: Song Gao 

In expression 1ULL << tlb_ps, left shifting by more than 63 bits has
undefined behavior. The shift amount, tlb_ps, is as much as 64. check
"tlb_ps >=64" to fix.

Resolves: Coverity CID 1593475

Fixes: d882c284a3 ("target/loongarch: check tlb_ps")
Suggested-by: Peter Maydell 
Signed-off-by: Song Gao 
Reviewed-by: Bibo Mao 
Signed-off-by: Bibo Mao 
---
 target/loongarch/internals.h  |  2 +-
 target/loongarch/tcg/csr_helper.c |  2 +-
 target/loongarch/tcg/tlb_helper.c | 10 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/target/loongarch/internals.h b/target/loongarch/internals.h
index 1cd959a766..9fdc3059d8 100644
--- a/target/loongarch/internals.h
+++ b/target/loongarch/internals.h
@@ -43,7 +43,7 @@ enum {
 TLBRET_PE = 7,
 };
 
-bool check_ps(CPULoongArchState *ent, int ps);
+bool check_ps(CPULoongArchState *ent, uint8_t ps);
 
 extern const VMStateDescription vmstate_loongarch_cpu;
 
diff --git a/target/loongarch/tcg/csr_helper.c 
b/target/loongarch/tcg/csr_helper.c
index 379c71e741..6a7a65c860 100644
--- a/target/loongarch/tcg/csr_helper.c
+++ b/target/loongarch/tcg/csr_helper.c
@@ -115,7 +115,7 @@ target_ulong helper_csrwr_ticlr(CPULoongArchState *env, 
target_ulong val)
 
 target_ulong helper_csrwr_pwcl(CPULoongArchState *env, target_ulong val)
 {
-int shift, ptbase;
+uint8_t shift, ptbase;
 int64_t old_v = env->CSR_PWCL;
 
 /*
diff --git a/target/loongarch/tcg/tlb_helper.c 
b/target/loongarch/tcg/tlb_helper.c
index 646dbf59de..bd8081e886 100644
--- a/target/loongarch/tcg/tlb_helper.c
+++ b/target/loongarch/tcg/tlb_helper.c
@@ -19,12 +19,12 @@
 #include "exec/log.h"
 #include "cpu-csr.h"
 
-bool check_ps(CPULoongArchState *env, int tlb_ps)
+bool check_ps(CPULoongArchState *env, uint8_t tlb_ps)
 {
- if (tlb_ps > 64) {
- return false;
- }
- return BIT_ULL(tlb_ps) & (env->CSR_PRCFG2);
+if (tlb_ps >= 64) {
+return false;
+}
+return BIT_ULL(tlb_ps) & (env->CSR_PRCFG2);
 }
 
 void get_dir_base_width(CPULoongArchState *env, uint64_t *dir_base,
-- 
2.43.5




[PATCH v6 0/6] target/loongarch: Fix some issues reported from coccinelle

2025-03-20 Thread Bibo Mao
This patch set solves errors reported by coccinelle tool with commands:
  spatch --sp-file scripts/coccinelle/*.cocci --dir target/loongarch/
  spatch --sp-file scripts/coccinelle/*.cocci --dir hw/loongarch/

The main problem is that qemu should fail to run when feature is forced
to enabled however KVM does not support it, rather than report error and
continue to run.

Also there is fixup for cpu plug and unplug. If there is error when cpu
is plug/unplug at runtime,  system should restore to previous state and
continue to run.

---
  v5 ... v6:
1. If there is nested error report when restore from error in function
   virt_cpu_plug(), set output Error object with &error_abort rather
   than NULL, since it is almost impossible now.
2. If there is nested error report when restore from error in function
   virt_cpu_unplug(), set output Error object with &error_abort rather
   than NULL, since it is almost impossible now.

  v4 ... v5:
1. Split patch2 in v4 into three small patches, two are fixup for error
   handing when cpu plug/unplug fails so that system can continue to
   run, one is to remove error_propagate() and refresh title.
2. Refresh changelog in last patch and remove fixes information
   since it is impossible to happen.

  v3 ... v4:
1. Add missed this cleanup with error and remove some local error
   object.
2. Replace local error object with error_abort object in
   virt_cpu_irq_init(), since its return value is not checked.

  v2 ... v3:
1. Add missing modification replacing error_propagate() + error_setg()
  with error_setg().
2. Some enhancement about error handling, handling error
   symmetrically in many places

  v1 ... v2:
1. Add fixes tag and change title with fix prefix in patch 1.
2. Replace error_propagate() with error_setg(), and return directly
   for any error.
---
Bibo Mao (6):
  target/loongarch: Fix error handling of KVM feature checks
  hw/loongarch/virt: Fix error handling in cpu plug
  hw/loongarch/virt: Fix error handling in cpu unplug
  hw/loongarch/virt: Eliminate error_propagate()
  target/loongarch: Remove unnecessary temporary variable assignment
  target/loongarch: Clean up virt_cpu_irq_init() error handling

 hw/loongarch/virt.c   | 63 ++-
 target/loongarch/kvm/kvm.c|  8 +++-
 target/loongarch/tcg/tlb_helper.c |  5 +--
 3 files changed, 45 insertions(+), 31 deletions(-)


base-commit: 1dae461a913f9da88df05de6e2020d3134356f2e
-- 
2.39.3




[PATCH v2 26/30] hw/arm/armv7m: prepare compilation unit to be common

2025-03-20 Thread Pierrick Bouvier
Signed-off-by: Pierrick Bouvier 
---
 hw/arm/armv7m.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/hw/arm/armv7m.c b/hw/arm/armv7m.c
index 98a69846119..c367c2dcb99 100644
--- a/hw/arm/armv7m.c
+++ b/hw/arm/armv7m.c
@@ -139,8 +139,9 @@ static MemTxResult v7m_sysreg_ns_write(void *opaque, hwaddr 
addr,
 if (attrs.secure) {
 /* S accesses to the alias act like NS accesses to the real region */
 attrs.secure = 0;
+MemOp end = target_words_bigendian() ? MO_BE : MO_LE;
 return memory_region_dispatch_write(mr, addr, value,
-size_memop(size) | MO_TE, attrs);
+size_memop(size) | end, attrs);
 } else {
 /* NS attrs are RAZ/WI for privileged, and BusFault for user */
 if (attrs.user) {
@@ -159,8 +160,9 @@ static MemTxResult v7m_sysreg_ns_read(void *opaque, hwaddr 
addr,
 if (attrs.secure) {
 /* S accesses to the alias act like NS accesses to the real region */
 attrs.secure = 0;
+MemOp end = target_words_bigendian() ? MO_BE : MO_LE;
 return memory_region_dispatch_read(mr, addr, data,
-   size_memop(size) | MO_TE, attrs);
+   size_memop(size) | end, attrs);
 } else {
 /* NS attrs are RAZ/WI for privileged, and BusFault for user */
 if (attrs.user) {
@@ -186,8 +188,9 @@ static MemTxResult v7m_systick_write(void *opaque, hwaddr 
addr,
 
 /* Direct the access to the correct systick */
 mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->systick[attrs.secure]), 0);
+MemOp end = target_words_bigendian() ? MO_BE : MO_LE;
 return memory_region_dispatch_write(mr, addr, value,
-size_memop(size) | MO_TE, attrs);
+size_memop(size) | end, attrs);
 }
 
 static MemTxResult v7m_systick_read(void *opaque, hwaddr addr,
@@ -199,7 +202,8 @@ static MemTxResult v7m_systick_read(void *opaque, hwaddr 
addr,
 
 /* Direct the access to the correct systick */
 mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->systick[attrs.secure]), 0);
-return memory_region_dispatch_read(mr, addr, data, size_memop(size) | 
MO_TE,
+MemOp end = target_words_bigendian() ? MO_BE : MO_LE;
+return memory_region_dispatch_read(mr, addr, data, size_memop(size) | end,
attrs);
 }
 
-- 
2.39.5




[PATCH v2 27/30] hw/arm/digic_boards: prepare compilation unit to be common

2025-03-20 Thread Pierrick Bouvier
Signed-off-by: Pierrick Bouvier 
---
 hw/arm/digic_boards.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/arm/digic_boards.c b/hw/arm/digic_boards.c
index 2492fafeb85..466b8b84c0e 100644
--- a/hw/arm/digic_boards.c
+++ b/hw/arm/digic_boards.c
@@ -80,7 +80,7 @@ static void digic4_board_init(MachineState *machine, 
DigicBoard *board)
 static void digic_load_rom(DigicState *s, hwaddr addr,
hwaddr max_size, const char *filename)
 {
-target_long rom_size;
+ssize_t rom_size;
 
 if (qtest_enabled()) {
 /* qtest runs no code so don't attempt a ROM load which
-- 
2.39.5




[PATCH v2 01/30] exec/cpu-all: remove BSWAP_NEEDED

2025-03-20 Thread Pierrick Bouvier
This identifier is poisoned, so it can't be used from common code
anyway. We replace all occurrences with its definition directly.

Signed-off-by: Pierrick Bouvier 
---
 include/exec/cpu-all.h| 12 
 linux-user/syscall_defs.h |  2 +-
 bsd-user/elfload.c|  6 +++---
 hw/ppc/mac_newworld.c |  4 +---
 hw/ppc/mac_oldworld.c |  4 +---
 hw/sparc/sun4m.c  |  6 +-
 hw/sparc64/sun4u.c|  6 +-
 linux-user/elfload.c  |  8 
 8 files changed, 12 insertions(+), 36 deletions(-)

diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index 981a08e3bb3..013fcc9412a 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -28,18 +28,6 @@
 #include "system/memory.h"
 #endif
 
-/* some important defines:
- *
- * HOST_BIG_ENDIAN : whether the host cpu is big endian and
- * otherwise little endian.
- *
- * TARGET_BIG_ENDIAN : same for the target cpu
- */
-
-#if HOST_BIG_ENDIAN != TARGET_BIG_ENDIAN
-#define BSWAP_NEEDED
-#endif
-
 /* page related stuff */
 #include "exec/cpu-defs.h"
 #include "exec/target_page.h"
diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h
index 86d773add75..5d227599924 100644
--- a/linux-user/syscall_defs.h
+++ b/linux-user/syscall_defs.h
@@ -462,7 +462,7 @@ typedef struct {
 abi_ulong sig[TARGET_NSIG_WORDS];
 } target_sigset_t;
 
-#ifdef BSWAP_NEEDED
+#if HOST_BIG_ENDIAN != TARGET_BIG_ENDIAN
 static inline void tswap_sigset(target_sigset_t *d, const target_sigset_t *s)
 {
 int i;
diff --git a/bsd-user/elfload.c b/bsd-user/elfload.c
index 833fa3bd057..3bca0cc9ede 100644
--- a/bsd-user/elfload.c
+++ b/bsd-user/elfload.c
@@ -44,7 +44,7 @@ static inline void memcpy_fromfs(void *to, const void *from, 
unsigned long n)
 memcpy(to, from, n);
 }
 
-#ifdef BSWAP_NEEDED
+#if HOST_BIG_ENDIAN != TARGET_BIG_ENDIAN
 static void bswap_ehdr(struct elfhdr *ehdr)
 {
 bswap16s(&ehdr->e_type);/* Object file type */
@@ -111,7 +111,7 @@ static void bswap_note(struct elf_note *en)
 bswap32s(&en->n_type);
 }
 
-#else /* ! BSWAP_NEEDED */
+#else
 
 static void bswap_ehdr(struct elfhdr *ehdr) { }
 static void bswap_phdr(struct elf_phdr *phdr, int phnum) { }
@@ -119,7 +119,7 @@ static void bswap_shdr(struct elf_shdr *shdr, int shnum) { }
 static void bswap_sym(struct elf_sym *sym) { }
 static void bswap_note(struct elf_note *en) { }
 
-#endif /* ! BSWAP_NEEDED */
+#endif /* HOST_BIG_ENDIAN != TARGET_BIG_ENDIAN */
 
 #include "elfcore.c"
 
diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c
index cb3dc3ab482..624c2731a65 100644
--- a/hw/ppc/mac_newworld.c
+++ b/hw/ppc/mac_newworld.c
@@ -199,9 +199,7 @@ static void ppc_core99_init(MachineState *machine)
 if (machine->kernel_filename) {
 int bswap_needed = 0;
 
-#ifdef BSWAP_NEEDED
-bswap_needed = 1;
-#endif
+bswap_needed = HOST_BIG_ENDIAN != TARGET_BIG_ENDIAN;
 kernel_base = KERNEL_LOAD_ADDR;
 kernel_size = load_elf(machine->kernel_filename, NULL,
translate_kernel_address, NULL, NULL, NULL,
diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c
index 0dbcea035c3..439953fc29e 100644
--- a/hw/ppc/mac_oldworld.c
+++ b/hw/ppc/mac_oldworld.c
@@ -155,9 +155,7 @@ static void ppc_heathrow_init(MachineState *machine)
 if (machine->kernel_filename) {
 int bswap_needed = 0;
 
-#ifdef BSWAP_NEEDED
-bswap_needed = 1;
-#endif
+bswap_needed = HOST_BIG_ENDIAN != TARGET_BIG_ENDIAN;
 kernel_base = KERNEL_LOAD_ADDR;
 kernel_size = load_elf(machine->kernel_filename, NULL,
translate_kernel_address, NULL, NULL, NULL,
diff --git a/hw/sparc/sun4m.c b/hw/sparc/sun4m.c
index a48d3622c5a..d27a9b693a5 100644
--- a/hw/sparc/sun4m.c
+++ b/hw/sparc/sun4m.c
@@ -235,11 +235,7 @@ static unsigned long sun4m_load_kernel(const char 
*kernel_filename,
 if (linux_boot) {
 int bswap_needed;
 
-#ifdef BSWAP_NEEDED
-bswap_needed = 1;
-#else
-bswap_needed = 0;
-#endif
+bswap_needed = HOST_BIG_ENDIAN != TARGET_BIG_ENDIAN;
 kernel_size = load_elf(kernel_filename, NULL,
translate_kernel_address, NULL,
NULL, NULL, NULL, NULL,
diff --git a/hw/sparc64/sun4u.c b/hw/sparc64/sun4u.c
index 8ab5cf0461f..c7bccf584e6 100644
--- a/hw/sparc64/sun4u.c
+++ b/hw/sparc64/sun4u.c
@@ -170,11 +170,7 @@ static uint64_t sun4u_load_kernel(const char 
*kernel_filename,
 if (linux_boot) {
 int bswap_needed;
 
-#ifdef BSWAP_NEEDED
-bswap_needed = 1;
-#else
-bswap_needed = 0;
-#endif
+bswap_needed = HOST_BIG_ENDIAN != TARGET_BIG_ENDIAN;
 kernel_size = load_elf(kernel_filename, NULL, NULL, NULL, kernel_entry,
kernel_addr, &kernel_top, NULL,
ELFDATA2MSB, EM_SPARCV9, 0, 0);
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index f54054dce3d..99811af

[PATCH v2 02/30] exec/cpu-all: extract tlb flags defines to exec/tlb-flags.h

2025-03-20 Thread Pierrick Bouvier
Signed-off-by: Pierrick Bouvier 
---
 include/exec/cpu-all.h   | 63 
 include/exec/tlb-flags.h | 87 
 accel/tcg/cputlb.c   |  1 +
 accel/tcg/user-exec.c|  1 +
 semihosting/uaccess.c|  1 +
 target/arm/ptw.c |  1 +
 target/arm/tcg/helper-a64.c  |  1 +
 target/arm/tcg/mte_helper.c  |  1 +
 target/arm/tcg/sve_helper.c  |  1 +
 target/i386/tcg/system/excp_helper.c |  1 +
 target/riscv/op_helper.c |  1 +
 target/riscv/vector_helper.c |  1 +
 target/s390x/tcg/mem_helper.c|  1 +
 target/sparc/mmu_helper.c|  1 +
 14 files changed, 99 insertions(+), 63 deletions(-)
 create mode 100644 include/exec/tlb-flags.h

diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index 013fcc9412a..d2895fb55b1 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -36,69 +36,6 @@ CPUArchState *cpu_copy(CPUArchState *env);
 
 #include "cpu.h"
 
-#ifdef CONFIG_USER_ONLY
-
-/*
- * Allow some level of source compatibility with softmmu.  We do not
- * support any of the more exotic features, so only invalid pages may
- * be signaled by probe_access_flags().
- */
-#define TLB_INVALID_MASK(1 << (TARGET_PAGE_BITS_MIN - 1))
-#define TLB_MMIO(1 << (TARGET_PAGE_BITS_MIN - 2))
-#define TLB_WATCHPOINT  0
-
-#else
-
-/*
- * Flags stored in the low bits of the TLB virtual address.
- * These are defined so that fast path ram access is all zeros.
- * The flags all must be between TARGET_PAGE_BITS and
- * maximum address alignment bit.
- *
- * Use TARGET_PAGE_BITS_MIN so that these bits are constant
- * when TARGET_PAGE_BITS_VARY is in effect.
- *
- * The count, if not the placement of these bits is known
- * to tcg/tcg-op-ldst.c, check_max_alignment().
- */
-/* Zero if TLB entry is valid.  */
-#define TLB_INVALID_MASK(1 << (TARGET_PAGE_BITS_MIN - 1))
-/* Set if TLB entry references a clean RAM page.  The iotlb entry will
-   contain the page physical address.  */
-#define TLB_NOTDIRTY(1 << (TARGET_PAGE_BITS_MIN - 2))
-/* Set if TLB entry is an IO callback.  */
-#define TLB_MMIO(1 << (TARGET_PAGE_BITS_MIN - 3))
-/* Set if TLB entry writes ignored.  */
-#define TLB_DISCARD_WRITE   (1 << (TARGET_PAGE_BITS_MIN - 4))
-/* Set if the slow path must be used; more flags in CPUTLBEntryFull. */
-#define TLB_FORCE_SLOW  (1 << (TARGET_PAGE_BITS_MIN - 5))
-
-/*
- * Use this mask to check interception with an alignment mask
- * in a TCG backend.
- */
-#define TLB_FLAGS_MASK \
-(TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_MMIO \
-| TLB_FORCE_SLOW | TLB_DISCARD_WRITE)
-
-/*
- * Flags stored in CPUTLBEntryFull.slow_flags[x].
- * TLB_FORCE_SLOW must be set in CPUTLBEntry.addr_idx[x].
- */
-/* Set if TLB entry requires byte swap.  */
-#define TLB_BSWAP(1 << 0)
-/* Set if TLB entry contains a watchpoint.  */
-#define TLB_WATCHPOINT   (1 << 1)
-/* Set if TLB entry requires aligned accesses.  */
-#define TLB_CHECK_ALIGNED(1 << 2)
-
-#define TLB_SLOW_FLAGS_MASK  (TLB_BSWAP | TLB_WATCHPOINT | TLB_CHECK_ALIGNED)
-
-/* The two sets of flags must not overlap. */
-QEMU_BUILD_BUG_ON(TLB_FLAGS_MASK & TLB_SLOW_FLAGS_MASK);
-
-#endif /* !CONFIG_USER_ONLY */
-
 /* Validate correct placement of CPUArchState. */
 QEMU_BUILD_BUG_ON(offsetof(ArchCPU, parent_obj) != 0);
 QEMU_BUILD_BUG_ON(offsetof(ArchCPU, env) != sizeof(CPUState));
diff --git a/include/exec/tlb-flags.h b/include/exec/tlb-flags.h
new file mode 100644
index 000..c371ae77602
--- /dev/null
+++ b/include/exec/tlb-flags.h
@@ -0,0 +1,87 @@
+/*
+ * TLB flags definition
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+#ifndef TLB_FLAGS_H
+#define TLB_FLAGS_H
+
+#include "exec/cpu-defs.h"
+
+#ifdef CONFIG_USER_ONLY
+
+/*
+ * Allow some level of source compatibility with softmmu.  We do not
+ * support any of the more exotic features, so only invalid pages may
+ * be signaled by probe_access_flags().
+ */
+#define TLB_INVALID_MASK(1 << (TARGET_PAGE_BITS_MIN - 1))
+#define TLB_MMIO(1 << (TARGET_PAGE_BITS_MIN - 2))
+#define TLB_WATCHPOINT  0
+
+#else
+
+/*
+ * Flags stored in the low bits of the TLB virtual address.
+ * These are defined so that

Re: [RFC PATCH v2 03/20] hw/arm/smmuv3-accel: Add initial infrastructure for smmuv3-accel device

2025-03-20 Thread Donald Dutile




On 3/19/25 2:09 PM, Eric Auger wrote:

Hi Nicolin,


On 3/19/25 6:14 PM, Nicolin Chen wrote:

On Wed, Mar 19, 2025 at 05:45:51PM +0100, Eric Auger wrote:



On 3/17/25 8:10 PM, Nicolin Chen wrote:

On Mon, Mar 17, 2025 at 07:07:52PM +0100, Eric Auger wrote:

On 3/17/25 6:54 PM, Nicolin Chen wrote:

On Wed, Mar 12, 2025 at 04:15:10PM +0100, Eric Auger wrote:

On 3/11/25 3:10 PM, Shameer Kolothum wrote:

Based on SMMUv3 as a parent device, add a user-creatable smmuv3-accel
device. In order to support vfio-pci dev assignment with a Guest

guest

SMMUv3, the physical SMMUv3 has to be configured in nested(S1+s2)

nested (s1+s2)

mode, with Guest owning the S1 page tables. Subsequent patches will

the guest

add support for smmuv3-accel to provide this.

Can't this -accel smmu also works with emulated devices? Do we want an
exclusive usage?

Is there any benefit from emulated devices working in the HW-
accelerated nested translation mode?

Not really but do we have any justification for using different device
name in accel mode? I am not even sure that accel option is really
needed. Ideally the qemu device should be able to detect it is
protecting a VFIO device, in which case it shall check whether nested is
supported by host SMMU and then automatically turn accel mode?

I gave the example of the vfio device which has different class
implementration depending on the iommufd option being set or not.

Do you mean that we should just create a regular smmuv3 device and
let a VFIO device to turn on this smmuv3's accel mode depending on
its LEGACY/IOMMUFD class?

no this is not what I meant. I gave an example where depending on an
option passed to thye VFIO device you choose one class implement or the
other.

Option means something like this:
-device smmuv3,accel=on
instead of
-device "smmuv3-accel"
?

Yea, I think that's good.

Yeah actually that's a big debate for not much. From an implementation
pov that shall not change much. The only doubt I have is if we need to
conditionnaly expose the MSI RESV regions it is easier to do if we
detect we have a smmuv3-accel. what the option allows is the auto mode.



Another question: how does an emulated device work with a vSMMUv3?

I don't get your question. vSMMUv3 currently only works with emulated
devices. Did you mean accelerated SMMUv3?

Yea. If "accel=on", how does an emulated device work with that?


I could imagine that all the accel steps would be bypassed since
!sdev->idev. Yet, the emulated iotlb should cache its translation
so we will need to flush the iotlb, which will increase complexity
as the TLBI command dispatching function will need to be aware what
ASID is for emulated device and what is for vfio device..

I don't get the issue. For emulated device you go through the usual
translate path which indeed caches configs and translations. In case the
guest invalidates something, you know the SID and you find the entries
in the cache that are tagged by this SID.

In case you have an accelerated device (indeed if sdev->idev) you don't
exercise that path. On invalidation you detect the SID matches a VFIO
devoce, propagate the invalidations to the host instead. on the
invalidation you should be able to detect pretty easily if you need to
flush the emulated caches or propagate the invalidations. Do I miss some
extra problematic?

I do not say we should support emulated devices and VFIO devices in the
same guest iommu group. But I don't see why we couldn't easily plug the
accelerated logic in the current logical for emulation/vhost and do not
require a different qemu device.

Hmm, feels like I fundamentally misunderstood your point.
  a) We implement the device model with the same piece of code but
 only provide an option "accel=on/off" to switch mode. And both
 passthrough devices and emulated devices can attach to the same
 "accel=on" device.

I think we all agree we don't want that use case in general. However
effectively I was questioning why it couldn't work maybe at the expense
of some perf degration.

  b) We implement the device model with the same piece of code but
 only provide an option "accel=on/off" to switch mode. Then, an
 passthrough device can attach to an "accel=on" device, but an
 emulated device can only attach to an "accel=off" SMMU device.

I was thinking that you want case (a). But actually you were just
talking about case (b)? I think (b) is totally fine.

We certainly can't do case (a): not all TLBI commands gives an "SID"
field (so would have to broadcast, i.e. underlying SMMU HW would run
commands that were supposed for emulated devices only); in case of
vCMDQ, commands for emulated devices would be issued to real HW and

I am still confused about that. For instance if the guest sends an
NH_ASID, NH_VA invalidation and it happens both the emulated device and
VFIO-device share the same cd.asid (same guest iommu domain, which
practically should not happen) why shouldn't we propagate the

it can't ...

Re: [PATCH] host/include/loongarch64: Fix inline assembly compatibility with Clang

2025-03-20 Thread bibo mao

applied to loongarch-next.

Regards
Bibo Mao

On 2025/3/14 上午11:31, Yao Zi wrote:

Clang on LoongArch only accepts fp register names in the dollar-prefixed
form, while GCC allows omitting the dollar. Change registers in ASM
clobbers to the dollar-prefixed form to make user emulators buildable
with Clang on loongarch64. No functional change invovled.

Signed-off-by: Yao Zi 
---
  host/include/loongarch64/host/atomic128-ldst.h.inc| 4 ++--
  host/include/loongarch64/host/bufferiszero.c.inc  | 6 --
  host/include/loongarch64/host/load-extract-al16-al8.h.inc | 2 +-
  3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/host/include/loongarch64/host/atomic128-ldst.h.inc 
b/host/include/loongarch64/host/atomic128-ldst.h.inc
index 9a4a8f8b9e..754d2143f0 100644
--- a/host/include/loongarch64/host/atomic128-ldst.h.inc
+++ b/host/include/loongarch64/host/atomic128-ldst.h.inc
@@ -28,7 +28,7 @@ static inline Int128 atomic16_read_ro(const Int128 *ptr)
  asm("vld $vr0, %2, 0\n\t"
  "vpickve2gr.d %0, $vr0, 0\n\t"
  "vpickve2gr.d %1, $vr0, 1"
-   : "=r"(l), "=r"(h) : "r"(ptr), "m"(*ptr) : "f0");
+: "=r"(l), "=r"(h) : "r"(ptr), "m"(*ptr) : "$f0");
  
  return int128_make128(l, h);

  }
@@ -46,7 +46,7 @@ static inline void atomic16_set(Int128 *ptr, Int128 val)
  asm("vinsgr2vr.d $vr0, %1, 0\n\t"
  "vinsgr2vr.d $vr0, %2, 1\n\t"
  "vst $vr0, %3, 0"
-   : "=m"(*ptr) : "r"(l), "r"(h), "r"(ptr) : "f0");
+: "=m"(*ptr) : "r"(l), "r"(h), "r"(ptr) : "$f0");
  }
  
  #endif /* LOONGARCH_ATOMIC128_LDST_H */

diff --git a/host/include/loongarch64/host/bufferiszero.c.inc 
b/host/include/loongarch64/host/bufferiszero.c.inc
index 69891eac80..bb2598fdc3 100644
--- a/host/include/loongarch64/host/bufferiszero.c.inc
+++ b/host/include/loongarch64/host/bufferiszero.c.inc
@@ -61,7 +61,8 @@ static bool buffer_is_zero_lsx(const void *buf, size_t len)
  "2:"
  : "=&r"(ret), "+r"(p)
  : "r"(buf), "r"(e), "r"(l)
-: "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "fcc0");
+: "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", "$f8",
+  "$fcc0");
  
  return ret;

  }
@@ -119,7 +120,8 @@ static bool buffer_is_zero_lasx(const void *buf, size_t len)
  "3:"
  : "=&r"(ret), "+r"(p)
  : "r"(buf), "r"(e), "r"(l)
-: "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "fcc0");
+: "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", "$f8",
+  "$fcc0");
  
  return ret;

  }
diff --git a/host/include/loongarch64/host/load-extract-al16-al8.h.inc 
b/host/include/loongarch64/host/load-extract-al16-al8.h.inc
index d1fb59d8af..9528521e7d 100644
--- a/host/include/loongarch64/host/load-extract-al16-al8.h.inc
+++ b/host/include/loongarch64/host/load-extract-al16-al8.h.inc
@@ -31,7 +31,7 @@ static inline uint64_t load_atom_extract_al16_or_al8(void 
*pv, int s)
  asm("vld $vr0, %2, 0\n\t"
  "vpickve2gr.d %0, $vr0, 0\n\t"
  "vpickve2gr.d %1, $vr0, 1"
-   : "=r"(l), "=r"(h) : "r"(ptr_align), "m"(*ptr_align) : "f0");
+: "=r"(l), "=r"(h) : "r"(ptr_align), "m"(*ptr_align) : "$f0");
  
  return (l >> shr) | (h << (-shr & 63));

  }






Re: [RFC 0/3] Support live migration for qemu-vdagent chardev

2025-03-20 Thread Marc-André Lureau
Hi

On Fri, Mar 21, 2025 at 7:40 AM  wrote:
>
> From: Hyman Huang 
>
> Our goal is to migrate VMs that are configured with qemu-vdagent-typed
> chardev while allowing the agent to continue working without having
> to restart the service in guest.
>

I sent a more complete series last week: "[PATCH for-10.1 00/10]
Support vdagent migration"
https://patchew.org/QEMU/20250311155932.1472092-1-marcandre.lur...@redhat.com/



> Let's justify which fields should be taken into account for struct
> VDAgentChardev.
>
> struct VDAgentChardev {
> Chardev parent;
>
> /* config */
> bool mouse;
> bool clipboard;
>
> /* guest vdagent */
> uint32_t caps;
> VDIChunkHeader chunk;
> uint32_t chunksize;
> uint8_t *msgbuf;
> uint32_t msgsize;
> uint8_t *xbuf;
> uint32_t xoff, xsize;
> Buffer outbuf;
>
> /* mouse */
> DeviceState mouse_dev;
> uint32_t mouse_x;
> uint32_t mouse_y;
> uint32_t mouse_btn;
> uint32_t mouse_display;
> QemuInputHandlerState *mouse_hs;
>
> /* clipboard */
> QemuClipboardPeer cbpeer;
> uint32_t last_serial[QEMU_CLIPBOARD_SELECTION__COUNT];
> uint32_t cbpending[QEMU_CLIPBOARD_SELECTION__COUNT];
> };
>
> parent:
> No dynamic information is generated. skip migrating.
>
> mouse, clipboard:
> The mouse and clipboard should be set up identically on both sides.
> Skip migrating.
>
> caps:
> Store the negotiated caps between the client and the guest.
> Should migrate.
>
> chunk, ... outbuf:
> The spice agent protocol's message transportation between the client
> and the guest is implemented using all of these fields, however the
> message loss can be tolerated by guests because the issue may occur
> in the real world as well.
> Could skip migrating.

It's part of the host/guest state, data will be lost and it's likely
the communication will break if it's not migrated.

>
> mouse_dev, ... mouse_hs:
> The mouse state can be reset after a live migration since the agent
> working inside the guest does not heavily depend on them.
> Could skip migrating

same

> cbpeer:
> Since the cbpeer would lose the data it references to if the qemu
> clipboard data was not migrated, this field can also be initialized
> after live migration.
> Could skip migrating
>

We should migrate the clipboard content too, to avoid having to
request it again, or have a noticeable effect.

> last_serial, cbpending:
> It is necessary for the agent to function after live migration.
> Should migrate.
>
> For the last_serial, saving & loading its value to make ensure the
> client receives the most recent clipboard data from the guest after
> live migration.
>
> For the cbpending, saving & loading its value aims to inform the
> guest that the clipboard has been released and is now empty in
> case that the guest acts strangely while supposing that the
> requested data can be properly retrieved.
>
> To summarize, all we need to do is migrate the caps, last_serial
> and cbpendings fields of the struct VDAgentChardev,
>
> Please review, thanks
>
> Yong
>
> Hyman Huang (3):
>   vdagent: Wrap vdagent_register_to_qemu_clipboard function
>   vdagent: Set up mouse and clipboard after live migration
>   vdagent: Drop blocker to support migration
>
>  ui/trace-events |   1 +
>  ui/vdagent.c| 102 +++-
>  2 files changed, 85 insertions(+), 18 deletions(-)
>
> --
> 2.27.0
>




Re: [PATCH v3 3/3] target/riscv/kvm: add missing KVM CSRs

2025-03-20 Thread Daniel Henrique Barboza




On 3/20/25 11:41 AM, Andrew Jones wrote:

On Thu, Mar 20, 2025 at 07:25:07AM -0700, Andrea Bolognani wrote:

On Mon, Mar 03, 2025 at 01:46:53PM +1000, Alistair Francis wrote:

On Mon, Feb 24, 2025 at 10:32 PM Daniel Henrique Barboza 
 wrote:

We're missing scounteren and senvcfg CSRs, both already present in the
KVM UAPI.

Signed-off-by: Daniel Henrique Barboza 
Reviewed-by: Andrew Jones 


Acked-by: Alistair Francis 


This patch seems to have broken KVM acceleration for me:

   $ ./build/qemu-system-riscv64 -display none -M virt,accel=kvm -cpu host
   qemu-system-riscv64: Failed to put registers after init: No such
file or directory

Reverting it makes QEMU work again.

My host is a SiFive HiFive Premier P550 board running Fedora 41. Note
that, since the upstreaming effort for this SoC has just recently
started, I'm using the 6.6-based vendor kernel.


Ancient :-)



Perhaps the KVM UAPI additions mentioned in the commit message are
more recent than that, and we need to make QEMU's use of them
conditional rather than unconditional?


scounteren has been around since the dawn of riscv kvm, but senvcfg has
only been there since 6.7 (just missed your ancient cut-off).

The true fix for this is to start using get-reg-list, which should
hopefully work with the 6.6 kernel too since get-reg-list support has
been around since 6.6.


That's the plan. We need to make the same treatment we're already doing with
the extensions with these CSRs.



A quick fix for this is to just drop senvcfg for now since nobody
noticed it was missing before (well, I noticed it was missing, but by
inspection, not test).


We can do that in case the proper fix turns out to be more complex than
we've anticipated and we'll miss the cut for the release. I think we have
time to do it properly, but let's see.

Thanks,

Daniel



Thanks,
drew





Re: [PATCH v6 3/6] hw/loongarch/virt: Fix error handling in cpu unplug

2025-03-20 Thread Markus Armbruster
Bibo Mao  writes:

> In function virt_cpu_unplug(), it will send cpu unplug message to
> interrupt controller extioi and ipi irqchip. If there is problem in
> this function, system should continue to run and keep state the same
> before cpu is removed.
>
> If error happends in cpu unplug stage, send cpu plug message to extioi
> and ipi irqchip to restore to previous stage, and then return immediately.
>
> Fixes: 2cd6857f6f5b (hw/loongarch/virt: Implement cpu unplug interface)
> Signed-off-by: Bibo Mao 
> ---
>  hw/loongarch/virt.c | 6 ++
>  1 file changed, 6 insertions(+)
>
> diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c
> index 8563967c8b..503362a69e 100644
> --- a/hw/loongarch/virt.c
> +++ b/hw/loongarch/virt.c
> @@ -958,6 +958,8 @@ static void virt_cpu_unplug(HotplugHandler *hotplug_dev,
>  hotplug_handler_unplug(HOTPLUG_HANDLER(lvms->extioi), dev, &err);
>  if (err) {
>  error_propagate(errp, err);
> +hotplug_handler_plug(HOTPLUG_HANDLER(lvms->ipi), dev,
> + &error_abort);
>  return;
>  }
>  
> @@ -965,6 +967,10 @@ static void virt_cpu_unplug(HotplugHandler *hotplug_dev,
>  hotplug_handler_unplug(HOTPLUG_HANDLER(lvms->acpi_ged), dev, &err);
>  if (err) {
>  error_propagate(errp, err);
> +hotplug_handler_plug(HOTPLUG_HANDLER(lvms->ipi), dev,
> + &error_abort);
> +hotplug_handler_plug(HOTPLUG_HANDLER(lvms->extioi), dev,
> + &error_abort);
>  return;
>  }

virt_cpu_unplug() calls hotplug_handler_unplug() three times to notify
ipi, extioi, and acpi_get.  If any notification fails, virt_cpu_unplug()
calls hotplug_handler_plug() to "un-notify" the preceeding ones, if any.
This must not fail.

virt_cpu_plug() does it the other way round (see previous patch).

So, hotplug_handler_plug() must not fail in virt_cpu_unplug(), yet we
check for it to fail in virt_cpu_plug().

Can it really fail in virt_cpu_plug()?

If yes, why can't it fail in virt_cpu_unplug()?

Same questions for hotplug_handler_unplug().




[PATCH v2 21/30] target/arm/cpu: flags2 is always uint64_t

2025-03-20 Thread Pierrick Bouvier
Do not rely on target dependent type, but use a fixed type instead.
Since the original type is unsigned, it should be safe to extend its
size without any side effect.

Reviewed-by: Richard Henderson 
Signed-off-by: Pierrick Bouvier 
---
 target/arm/cpu.h| 10 --
 target/arm/tcg/hflags.c |  4 ++--
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index ab7412772bc..cc975175c61 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -194,7 +194,7 @@ typedef struct ARMPACKey {
 /* See the commentary above the TBFLAG field definitions.  */
 typedef struct CPUARMTBFlags {
 uint32_t flags;
-target_ulong flags2;
+uint64_t flags2;
 } CPUARMTBFlags;
 
 typedef struct ARMMMUFaultInfo ARMMMUFaultInfo;
@@ -2968,11 +2968,9 @@ uint64_t arm_sctlr(CPUARMState *env, int el);
  * We collect these two parts in CPUARMTBFlags where they are named
  * flags and flags2 respectively.
  *
- * The flags that are shared between all execution modes, TBFLAG_ANY,
- * are stored in flags.  The flags that are specific to a given mode
- * are stores in flags2.  Since cs_base is sized on the configured
- * address size, flags2 always has 64-bits for A64, and a minimum of
- * 32-bits for A32 and M32.
+ * The flags that are shared between all execution modes, TBFLAG_ANY, are 
stored
+ * in flags. The flags that are specific to a given mode are stored in flags2.
+ * flags2 always has 64-bits, even though only 32-bits are used for A32 and 
M32.
  *
  * The bits for 32-bit A-profile and M-profile partially overlap:
  *
diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c
index 8d79b8b7ae1..e51d9f7b159 100644
--- a/target/arm/tcg/hflags.c
+++ b/target/arm/tcg/hflags.c
@@ -506,8 +506,8 @@ void assert_hflags_rebuild_correctly(CPUARMState *env)
 
 if (unlikely(c.flags != r.flags || c.flags2 != r.flags2)) {
 fprintf(stderr, "TCG hflags mismatch "
-"(current:(0x%08x,0x" TARGET_FMT_lx ")"
-" rebuilt:(0x%08x,0x" TARGET_FMT_lx ")\n",
+"(current:(0x%08x,0x%016" PRIx64 ")"
+" rebuilt:(0x%08x,0x%016" PRIx64 ")\n",
 c.flags, c.flags2, r.flags, r.flags2);
 abort();
 }
-- 
2.39.5




[PATCH v2 07/30] exec/cpu-all: remove tswap include

2025-03-20 Thread Pierrick Bouvier
Signed-off-by: Pierrick Bouvier 
---
 include/exec/cpu-all.h   | 1 -
 target/ppc/mmu-hash64.h  | 2 ++
 target/i386/tcg/system/excp_helper.c | 1 +
 target/i386/xsave_helper.c   | 1 +
 target/riscv/vector_helper.c | 1 +
 5 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index 4a2cac1252d..1539574a22a 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -21,7 +21,6 @@
 
 #include "exec/cpu-common.h"
 #include "exec/cpu-interrupt.h"
-#include "exec/tswap.h"
 #include "hw/core/cpu.h"
 
 /* page related stuff */
diff --git a/target/ppc/mmu-hash64.h b/target/ppc/mmu-hash64.h
index ae8d4b37aed..b8fb12a9705 100644
--- a/target/ppc/mmu-hash64.h
+++ b/target/ppc/mmu-hash64.h
@@ -1,6 +1,8 @@
 #ifndef MMU_HASH64_H
 #define MMU_HASH64_H
 
+#include "exec/tswap.h"
+
 #ifndef CONFIG_USER_ONLY
 
 #ifdef TARGET_PPC64
diff --git a/target/i386/tcg/system/excp_helper.c 
b/target/i386/tcg/system/excp_helper.c
index b0b74df72fd..4badd739432 100644
--- a/target/i386/tcg/system/excp_helper.c
+++ b/target/i386/tcg/system/excp_helper.c
@@ -23,6 +23,7 @@
 #include "exec/cputlb.h"
 #include "exec/page-protection.h"
 #include "exec/tlb-flags.h"
+#include "exec/tswap.h"
 #include "tcg/helper-tcg.h"
 
 typedef struct TranslateParams {
diff --git a/target/i386/xsave_helper.c b/target/i386/xsave_helper.c
index 996e9f3bfef..24ab7be8e9a 100644
--- a/target/i386/xsave_helper.c
+++ b/target/i386/xsave_helper.c
@@ -5,6 +5,7 @@
 #include "qemu/osdep.h"
 
 #include "cpu.h"
+#include "exec/tswap.h"
 
 void x86_cpu_xsave_all_areas(X86CPU *cpu, void *buf, uint32_t buflen)
 {
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index ff05390baef..ff8b2b395f5 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -26,6 +26,7 @@
 #include "exec/page-protection.h"
 #include "exec/helper-proto.h"
 #include "exec/tlb-flags.h"
+#include "exec/tswap.h"
 #include "fpu/softfloat.h"
 #include "tcg/tcg-gvec-desc.h"
 #include "internals.h"
-- 
2.39.5




[PATCH v2 11/30] exec/cpu-all: remove hw/core/cpu.h include

2025-03-20 Thread Pierrick Bouvier
Signed-off-by: Pierrick Bouvier 
---
 include/exec/cpu-all.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index d4705210370..d4d05d82315 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -20,7 +20,6 @@
 #define CPU_ALL_H
 
 #include "exec/cpu-common.h"
-#include "hw/core/cpu.h"
 
 #include "cpu.h"
 
-- 
2.39.5




[PATCH v2 25/30] hw/arm/boot: make compilation unit hw common

2025-03-20 Thread Pierrick Bouvier
Now we eliminated poisoned identifiers from headers, this file can now
be compiled once for all arm targets.

Signed-off-by: Pierrick Bouvier 
---
 hw/arm/boot.c  | 1 +
 hw/arm/meson.build | 5 -
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index d3811b896fd..f94b940bc31 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -14,6 +14,7 @@
 #include 
 #include "hw/arm/boot.h"
 #include "hw/arm/linux-boot-if.h"
+#include "cpu.h"
 #include "exec/target_page.h"
 #include "system/kvm.h"
 #include "system/tcg.h"
diff --git a/hw/arm/meson.build b/hw/arm/meson.build
index ac473ce7cda..9e8c96059eb 100644
--- a/hw/arm/meson.build
+++ b/hw/arm/meson.build
@@ -1,5 +1,5 @@
 arm_ss = ss.source_set()
-arm_ss.add(files('boot.c'))
+arm_common_ss = ss.source_set()
 arm_ss.add(when: 'CONFIG_ARM_VIRT', if_true: files('virt.c'))
 arm_ss.add(when: 'CONFIG_ACPI', if_true: files('virt-acpi-build.c'))
 arm_ss.add(when: 'CONFIG_DIGIC', if_true: files('digic_boards.c'))
@@ -75,4 +75,7 @@ system_ss.add(when: 'CONFIG_SX1', if_true: 
files('omap_sx1.c'))
 system_ss.add(when: 'CONFIG_VERSATILE', if_true: files('versatilepb.c'))
 system_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c'))
 
+arm_common_ss.add(fdt, files('boot.c'))
+
 hw_arch += {'arm': arm_ss}
+hw_common_arch += {'arm': arm_common_ss}
-- 
2.39.5




[PATCH v2 20/30] target/arm/cpu: always define kvm related registers

2025-03-20 Thread Pierrick Bouvier
This does not hurt, even if they are not used.

Signed-off-by: Pierrick Bouvier 
---
 target/arm/cpu.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index a8a1a8faf6b..ab7412772bc 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -971,7 +971,6 @@ struct ArchCPU {
  */
 uint32_t kvm_target;
 
-#ifdef CONFIG_KVM
 /* KVM init features for this CPU */
 uint32_t kvm_init_features[7];
 
@@ -984,7 +983,6 @@ struct ArchCPU {
 
 /* KVM steal time */
 OnOffAuto kvm_steal_time;
-#endif /* CONFIG_KVM */
 
 /* Uniprocessor system with MP extensions */
 bool mp_is_up;
-- 
2.39.5




[PATCH v2 03/30] exec/cpu-all: move cpu_copy to linux-user/qemu.h

2025-03-20 Thread Pierrick Bouvier
Signed-off-by: Pierrick Bouvier 
---
 include/exec/cpu-all.h | 2 --
 linux-user/qemu.h  | 3 +++
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index d2895fb55b1..74017a5ce7c 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -32,8 +32,6 @@
 #include "exec/cpu-defs.h"
 #include "exec/target_page.h"
 
-CPUArchState *cpu_copy(CPUArchState *env);
-
 #include "cpu.h"
 
 /* Validate correct placement of CPUArchState. */
diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index 5f007501518..948de8431a5 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -362,4 +362,7 @@ void *lock_user_string(abi_ulong guest_addr);
 #define unlock_user_struct(host_ptr, guest_addr, copy) \
 unlock_user(host_ptr, guest_addr, (copy) ? sizeof(*host_ptr) : 0)
 
+/* Clone cpu state */
+CPUArchState *cpu_copy(CPUArchState *env);
+
 #endif /* QEMU_H */
-- 
2.39.5




Re: [PATCH v2 12/42] accel/tcg: Use cpu_ld*_code_mmu in translator.c

2025-03-20 Thread Richard Henderson

On 3/18/25 17:23, Pierrick Bouvier wrote:

  uint64_t translator_ldq(CPUArchState *env, DisasContextBase *db, vaddr pc)
  {
-    uint64_t raw, tgt;
+    uint64_t val;
-    if (translator_ld(env, db, &raw, pc, sizeof(raw))) {
-    tgt = tswap64(raw);
-    } else {
-    tgt = cpu_ldq_code(env, pc);
-    raw = tswap64(tgt);
-    record_save(db, pc, &raw, sizeof(raw));
+    if (!translator_ld(env, db, &val, pc, sizeof(val))) {
+    MemOpIdx oi = make_memop_idx(MO_UQ, db->code_mmuidx);
+    val = cpu_ldq_code_mmu(env, pc, oi, 0);
+    record_save(db, pc, &val, sizeof(val));
  }
-    return tgt;
+    return tswap64(val);
  }
  void translator_fake_ld(DisasContextBase *db, const void *data, size_t len)


If I understand correctly, cpu_ldq_code_mmu performs the tswap call we used to before. 


Incorrect: cpu_ldq_code_mmu has no tswap.

It has a conditional bswap, if MO_BSWAP is set, but that's not true for the MO_UQ used 
here.  Therefore both the direct load in translator_ld and the cpu_ld*_code_mmu function 
call both produce host-endian values.


Therefore the tswap at the end correctly swaps host to target-endianness.


r~



RE: [PATCH 23/39] target/hexagon: Add sysemu_ops, cpu_get_phys_page_debug()

2025-03-20 Thread ltaylorsimpson



> -Original Message-
> From: Brian Cain 
> Sent: Friday, February 28, 2025 11:28 PM
> To: qemu-devel@nongnu.org
> Cc: brian.c...@oss.qualcomm.com; richard.hender...@linaro.org;
> phi...@linaro.org; quic_mathb...@quicinc.com; a...@rev.ng; a...@rev.ng;
> quic_mlie...@quicinc.com; ltaylorsimp...@gmail.com;
> alex.ben...@linaro.org; quic_mbur...@quicinc.com;
> sidn...@quicinc.com; Brian Cain 
> Subject: [PATCH 23/39] target/hexagon: Add sysemu_ops,
> cpu_get_phys_page_debug()
> 
> From: Brian Cain 
> 
> Signed-off-by: Brian Cain 

Reviewed-by: Taylor Simpson 





[PATCH v2 10/30] exec/cpu-all: remove exec/target_page include

2025-03-20 Thread Pierrick Bouvier
Signed-off-by: Pierrick Bouvier 
---
 hw/s390x/ipl.h  | 1 +
 include/exec/cpu-all.h  | 3 ---
 include/exec/exec-all.h | 1 +
 include/exec/tlb-flags.h| 1 +
 linux-user/sparc/target_syscall.h   | 2 ++
 hw/alpha/dp264.c| 1 +
 hw/arm/boot.c   | 1 +
 hw/arm/smmuv3.c | 1 +
 hw/hppa/machine.c   | 1 +
 hw/i386/multiboot.c | 1 +
 hw/i386/pc.c| 1 +
 hw/i386/pc_sysfw_ovmf.c | 1 +
 hw/i386/vapic.c | 1 +
 hw/loongarch/virt.c | 1 +
 hw/m68k/q800.c  | 1 +
 hw/m68k/virt.c  | 1 +
 hw/openrisc/boot.c  | 1 +
 hw/pci-host/astro.c | 1 +
 hw/ppc/e500.c   | 1 +
 hw/ppc/mac_newworld.c   | 1 +
 hw/ppc/mac_oldworld.c   | 1 +
 hw/ppc/ppc_booke.c  | 1 +
 hw/ppc/prep.c   | 1 +
 hw/ppc/spapr_hcall.c| 1 +
 hw/riscv/riscv-iommu-pci.c  | 1 +
 hw/riscv/riscv-iommu.c  | 1 +
 hw/s390x/s390-pci-bus.c | 1 +
 hw/s390x/s390-pci-inst.c| 1 +
 hw/s390x/s390-skeys.c   | 1 +
 hw/sparc/sun4m.c| 1 +
 hw/sparc64/sun4u.c  | 1 +
 monitor/hmp-cmds-target.c   | 1 +
 target/alpha/helper.c   | 1 +
 target/arm/gdbstub64.c  | 1 +
 target/arm/tcg/tlb-insns.c  | 1 +
 target/avr/helper.c | 1 +
 target/hexagon/translate.c  | 1 +
 target/i386/helper.c| 1 +
 target/i386/hvf/hvf.c   | 1 +
 target/i386/kvm/hyperv.c| 1 +
 target/i386/kvm/kvm.c   | 1 +
 target/i386/kvm/xen-emu.c   | 1 +
 target/i386/sev.c   | 1 +
 target/loongarch/cpu_helper.c   | 1 +
 target/loongarch/tcg/translate.c| 1 +
 target/microblaze/helper.c  | 1 +
 target/microblaze/mmu.c | 1 +
 target/mips/tcg/system/cp0_helper.c | 1 +
 target/mips/tcg/translate.c | 1 +
 target/openrisc/mmu.c   | 1 +
 target/riscv/pmp.c  | 1 +
 target/rx/cpu.c | 1 +
 target/s390x/helper.c   | 1 +
 target/s390x/ioinst.c   | 1 +
 target/tricore/helper.c | 1 +
 target/xtensa/helper.c  | 1 +
 target/xtensa/xtensa-semi.c | 1 +
 57 files changed, 57 insertions(+), 3 deletions(-)

diff --git a/hw/s390x/ipl.h b/hw/s390x/ipl.h
index 6557ac3be5b..cb55101f062 100644
--- a/hw/s390x/ipl.h
+++ b/hw/s390x/ipl.h
@@ -14,6 +14,7 @@
 #define HW_S390_IPL_H
 
 #include "cpu.h"
+#include "exec/target_page.h"
 #include "system/address-spaces.h"
 #include "system/memory.h"
 #include "hw/qdev-core.h"
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index db44c0d3016..d4705210370 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -22,9 +22,6 @@
 #include "exec/cpu-common.h"
 #include "hw/core/cpu.h"
 
-/* page related stuff */
-#include "exec/target_page.h"
-
 #include "cpu.h"
 
 #endif /* CPU_ALL_H */
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 19b0eda44a7..c00683f74b0 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -24,6 +24,7 @@
 #include "exec/cpu_ldst.h"
 #endif
 #include "exec/mmu-access-type.h"
+#include "exec/target_page.h"
 #include "exec/translation-block.h"
 
 #if defined(CONFIG_TCG)
diff --git a/include/exec/tlb-flags.h b/include/exec/tlb-flags.h
index c371ae77602..2273144f421 100644
--- a/include/exec/tlb-flags.h
+++ b/include/exec/tlb-flags.h
@@ -20,6 +20,7 @@
 #define TLB_FLAGS_H
 
 #include "exec/cpu-defs.h"
+#include "exec/target_page.h"
 
 #ifdef CONFIG_USER_ONLY
 
diff --git a/linux-user/sparc/target_syscall.h 
b/linux-user/sparc/target_syscall.h
index e4211653574..c22ede1ddd2 100644
--- a/linux-user/sparc/target_syscall.h
+++ b/linux-user/sparc/target_syscall.h
@@ -1,6 +1,8 @@
 #ifndef SPARC_TARGET_SYSCALL_H
 #define SPARC_TARGET_SYSCALL_H
 
+#include "exec/target_page.h"
+
 #if defined(TARGET_SPARC64) && !defined(TARGET_ABI32)
 struct target_pt_regs {
 abi_ulong u_regs[16];
diff --git a/hw/alpha/dp264.c b/hw/alpha/dp264.c
index 570ea9edf24..c1e24a4ffe8 100644
--- a/hw/alpha/dp264.c
+++ b/hw/alpha/dp264.c
@@ -15,6 +15,7 @@
 #include "hw/rtc/mc146818rtc.h"
 #include "hw/ide/pci.h"
 #include "hw/isa/superio.h"
+#include "exec/target_page.h"
 #include "net/net.h"
 #include "qemu/cutils.h"
 #include "qemu/datadir.h"
diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index e296b62fa12..d3811b896fd 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -14,6 +14,7 @@
 #include 
 #include "hw/arm/boot.h"
 #include "hw/arm/linux-boot-if.h"
+#include "exec/target_page.h"
 #include "system/kvm.h"
 #include "system/tcg.h"
 #include "system/system.h"
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 704469abf19..62d0b3933ca 100644

[PATCH v2 22/30] target/arm/cpu: define same set of registers for aarch32 and aarch64

2025-03-20 Thread Pierrick Bouvier
To eliminate TARGET_AARCH64, we need to make various definitions common
between 32 and 64 bit Arm targets.
Added registers are used only by aarch64 code, and the only impact is on
the size of CPUARMState, and added zarray
(ARMVectorReg zarray[ARM_MAX_VQ * 16]) member (+64KB)

It could be eventually possible to allocate this array only for aarch64
emulation, but I'm not sure it's worth the hassle to save a few KB per
vcpu. Running qemu-system takes already several hundreds of MB of
(resident) memory, and qemu-user takes dozens of MB of (resident) memory
anyway.

As part of this, we define ARM_MAX_VQ once for aarch32 and aarch64,
which will affect zregs field for aarch32.
This field is used for MVE and SVE implementations. MVE implementation
is clipping index value to 0 or 1 for zregs[*].d[],
so we should not touch the rest of data in this case anyway.

This change is safe regarding migration, because aarch64 registers still
have the same size, and for aarch32, only zregs is modified.
Migration code explicitly specify a size of 2 for env.vfp.zregs[0].d,
VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[0].d, ARMCPU, 0, 2). So extending
the storage size has no impact.

Reviewed-by: Richard Henderson 
Signed-off-by: Pierrick Bouvier 
---
 target/arm/cpu.h | 12 +---
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index cc975175c61..b1c3e463267 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -169,17 +169,12 @@ typedef struct ARMGenericTimer {
  * Align the data for use with TCG host vector operations.
  */
 
-#ifdef TARGET_AARCH64
-# define ARM_MAX_VQ16
-#else
-# define ARM_MAX_VQ1
-#endif
+#define ARM_MAX_VQ16
 
 typedef struct ARMVectorReg {
 uint64_t d[2 * ARM_MAX_VQ] QEMU_ALIGNED(16);
 } ARMVectorReg;
 
-#ifdef TARGET_AARCH64
 /* In AArch32 mode, predicate registers do not exist at all.  */
 typedef struct ARMPredicateReg {
 uint64_t p[DIV_ROUND_UP(2 * ARM_MAX_VQ, 8)] QEMU_ALIGNED(16);
@@ -189,7 +184,6 @@ typedef struct ARMPredicateReg {
 typedef struct ARMPACKey {
 uint64_t lo, hi;
 } ARMPACKey;
-#endif
 
 /* See the commentary above the TBFLAG field definitions.  */
 typedef struct CPUARMTBFlags {
@@ -660,13 +654,11 @@ typedef struct CPUArchState {
 struct {
 ARMVectorReg zregs[32];
 
-#ifdef TARGET_AARCH64
 /* Store FFR as pregs[16] to make it easier to treat as any other.  */
 #define FFR_PRED_NUM 16
 ARMPredicateReg pregs[17];
 /* Scratch space for aa64 sve predicate temporary.  */
 ARMPredicateReg preg_tmp;
-#endif
 
 /* We store these fpcsr fields separately for convenience.  */
 uint32_t qc[4] QEMU_ALIGNED(16);
@@ -711,7 +703,6 @@ typedef struct CPUArchState {
 uint32_t cregs[16];
 } iwmmxt;
 
-#ifdef TARGET_AARCH64
 struct {
 ARMPACKey apia;
 ARMPACKey apib;
@@ -743,7 +734,6 @@ typedef struct CPUArchState {
  * to keep the offsets into the rest of the structure smaller.
  */
 ARMVectorReg zarray[ARM_MAX_VQ * 16];
-#endif
 
 struct CPUBreakpoint *cpu_breakpoint[16];
 struct CPUWatchpoint *cpu_watchpoint[16];
-- 
2.39.5




Re: VDPA MAC address problem

2025-03-20 Thread Konstantin Shkolnyy

On 3/20/2025 20:22, Jason Wang wrote:

On Fri, Mar 21, 2025 at 12:45 AM Konstantin Shkolnyy  wrote:


On 3/19/2025 19:58, Jason Wang wrote:

On Thu, Mar 20, 2025 at 12:34 AM Konstantin Shkolnyy  wrote:

Upon reading this forum, I see that VHOST_VDPA_SET_CONFIG is
“deprecated”, and so VIRTIO_NET_CTRL_MAC_ADDR_SET must be the right
method, but it’s apparently called too late.


VHOST_VDPA_SET_CONFIG requires the vDPA parent support which is not
necessarily there.


The mlx5 driver doesn't do anything for VHOST_VDPA_SET_CONFIG. Intel's
driver, however, apparently stores the configuration. So, it appears,
Intel will avoid the problem... Perhaps mlx5 could do the same so that
QEMU can set the address before it starts the VM (QEMU doesn't have to
later let the VM change the config).


The problem is that config space is not allowed to be written, that is
why mlx5 doesn't implement the write method.


Conceptually, setting the address
by QEMU cmdline doesn't look different from setting it by "vdpa dev add".


It's kind of different.

E.g the device may not even have VIRTIO_NET_F_MAC feature etc.


I'm not sure I understand... This is the MAC address returned by 
VHOST_VDPA_GET_CONFIG. If mlx5 allows it to be set by "vdpa dev add", 
shouldn't it also allow it to be set by VHOST_VDPA_SET_CONFIG called by 
virtio_net_device_realize(), before the VM exists.


When VM starts running, it queries this MAC address and QEMU fetches it 
by VHOST_VDPA_GET_CONFIG. But, because VHOST_VDPA_SET_CONFIG hasn't 
stored it, it fetches a wrong stale address.





Re: [PATCH for-10.1 v9 0/9] virtio-net: add support for SR-IOV emulation

2025-03-20 Thread Yui Washizu



On 2025/03/21 14:18, Akihiko Odaki wrote:

On 2025/03/21 13:34, Yui Washizu wrote:


I tested the following features with this patch series, and there 
were not  issues:

- Creation and deletion of VFs
- Communication with an external machine through VFs

Thank you.


Can you reply with:

Tested-by: Your Name 



OK, Thank you.

I resend it along with the content.


I tested the following features with this patch series, and there were 
not  issues:

- Creation and deletion of VFs
- Communication with an external machine through VFs


Tested-by: Yui Washizu 




This tag means:
- you have tested changes you found significant,
- you found no problems with them, and
- you are fine to record that in Git commit logs

Pasha Tatashin (who told me they tried the patches), it would be nice 
if you do the same.


Regards,
Akihiko Odaki




Yui


On 2025/03/14 15:14, Akihiko Odaki wrote:

Based-on:<20250104-reuse-v18-0-c349eafd8...@daynix.com>
("[PATCH v18 00/14] hw/pci: SR-IOV related fixes and improvements")

Introduction


This series is based on the RFC series submitted by Yui Washizu[1].
See also [2] for the context.

This series enables SR-IOV emulation for virtio-net. It is useful
to test SR-IOV support on the guest, or to expose several vDPA devices
in a VM. vDPA devices can also provide L2 switching feature for
offloading though it is out of scope to allow the guest to configure
such a feature.

The PF side code resides in virtio-pci. The VF side code resides in
the PCI common infrastructure, but it is restricted to work only for
virtio-net-pci because of lack of validation.

User Interface
--

A user can configure a SR-IOV capable virtio-net device by adding
virtio-net-pci functions to a bus. Below is a command line example:
   -netdev user,id=n -netdev user,id=o
   -netdev user,id=p -netdev user,id=q
   -device pcie-root-port,id=b
   -device virtio-net-pci,bus=b,addr=0x0.0x3,netdev=q,sriov-pf=f
   -device virtio-net-pci,bus=b,addr=0x0.0x2,netdev=p,sriov-pf=f
   -device virtio-net-pci,bus=b,addr=0x0.0x1,netdev=o,sriov-pf=f
   -device virtio-net-pci,bus=b,addr=0x0.0x0,netdev=n,id=f

The VFs specify the paired PF with "sriov-pf" property. The PF must be
added after all VFs. It is user's responsibility to ensure that VFs 
have

function numbers larger than one of the PF, and the function numbers
have a consistent stride.

Keeping VF instances


A problem with SR-IOV emulation is that it needs to hotplug the VFs as
the guest requests. Previously, this behavior was implemented by
realizing and unrealizing VFs at runtime. However, this strategy does
not work well for the proposed virtio-net emulation; in this proposal,
device options passed in the command line must be maintained as VFs
are hotplugged, but they are consumed when the machine starts and not
available after that, which makes realizing VFs at runtime impossible.

As an strategy alternative to runtime realization/unrealization, this
series proposes to reuse the code to power down PCI Express devices.
When a PCI Express device is powered down, it will be hidden from the
guest but will be kept realized. This effectively implements the
behavior we need for the SR-IOV emulation.

Summary
---

Patch 1 disables ROM BAR, which virtio-net-pci enables by default, for
VFs.
Patch 2 makes zero stride valid for 1 VF configuration.
Patch 3 and 4 adds validations.
Patch 5 adds user-created SR-IOV VF infrastructure.
Patch 6 makes virtio-pci work as SR-IOV PF for user-created VFs.
Patch 7 allows user to create SR-IOV VFs with virtio-net-pci.

[1]https://patchew.org/QEMU/1689731808-3009-1-git-send-email- 
yui.wash...@gmail.com/
[2]https://lore.kernel.org/all/5d46f455- 
f530-4e5e-9ae7-13a2297d4...@daynix.com/


Co-developed-by: Yui Washizu
Signed-off-by: Akihiko Odaki
---
Changes in v9:
- Rebased.
- Link to v8:https://lore.kernel.org/r/20250104-sriov- 
v8-0-56144cfdc...@daynix.com


Changes in v8:
- Rebased.
- Link to v7:https://lore.kernel.org/r/20240813-sriov- 
v7-0-8515e3774...@daynix.com


Changes in v7:
- Removed #include , which is no longer needed.
- Rebased.
- Link to v6:https://lore.kernel.org/r/20240802-sriov- 
v6-0-0c8ff49c4...@daynix.com


Changes in v6:
- Added ARI extended capability.
- Rebased.
- Link to v5:https://lore.kernel.org/r/20240715-sriov- 
v5-0-3f5539093...@daynix.com


Changes in v5:
- Dropped the RFC tag.
- Fixed device unrealization.
- Rebased.
- Link to v4:https://lore.kernel.org/r/20240428-sriov-v4-0- 
ac8ac6212...@daynix.com


Changes in v4:
- Added patch "hw/pci: Fix SR-IOV VF number calculation" to fix 
division

   by zero reported by Yui Washizu.
- Rebased.
- Link to v3:https://lore.kernel.org/r/20240305-sriov-v3-0- 
abdb75770...@daynix.com


Changes in v3:
- Rebased.
- Link to v2:https://lore.kernel.org/r/20231210-sriov-v2-0- 
b959e8a6d...@daynix.com


Changes in v2:
- Changed to keep VF instances.
- Link to v1:https://lore.kernel.org/r/20231202-sriov- 
v1-0-32b3570f7...@daynix.com



[PATCH v2 18/30] accel/kvm: move KVM_HAVE_MCE_INJECTION define to kvm-all.c

2025-03-20 Thread Pierrick Bouvier
This define is used only in accel/kvm/kvm-all.c, so we push directly the
definition there. Add more visibility to kvm_arch_on_sigbus_vcpu() to
allow removing this define from any header.

The architectures defining KVM_HAVE_MCE_INJECTION are i386, x86_64 and
aarch64.

Signed-off-by: Pierrick Bouvier 
---
 include/system/kvm.h | 2 --
 target/arm/cpu.h | 4 
 target/i386/cpu.h| 2 --
 accel/kvm/kvm-all.c  | 5 +
 4 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/include/system/kvm.h b/include/system/kvm.h
index 716c7dcdf6b..b690dda1370 100644
--- a/include/system/kvm.h
+++ b/include/system/kvm.h
@@ -392,9 +392,7 @@ bool kvm_vcpu_id_is_valid(int vcpu_id);
 /* Returns VCPU ID to be used on KVM_CREATE_VCPU ioctl() */
 unsigned long kvm_arch_vcpu_id(CPUState *cpu);
 
-#ifdef KVM_HAVE_MCE_INJECTION
 void kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr);
-#endif
 
 void kvm_arch_init_irq_routing(KVMState *s);
 
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index ea9956395ca..a8a1a8faf6b 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -33,10 +33,6 @@
 #include "target/arm/multiprocessing.h"
 #include "target/arm/gtimer.h"
 
-#ifdef TARGET_AARCH64
-#define KVM_HAVE_MCE_INJECTION 1
-#endif
-
 #define EXCP_UDEF1   /* undefined instruction */
 #define EXCP_SWI 2   /* software interrupt */
 #define EXCP_PREFETCH_ABORT  3
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 049bdd1a893..44ee263d8f1 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -35,8 +35,6 @@
 
 #define XEN_NR_VIRQS 24
 
-#define KVM_HAVE_MCE_INJECTION 1
-
 /* support for self modifying code even if the modified instruction is
close to the modifying instruction */
 #define TARGET_HAS_PRECISE_SMC
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 0723a3933bb..7c5d1a98bc4 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -57,6 +57,11 @@
 #include 
 #endif
 
+#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__)
+# define KVM_HAVE_MCE_INJECTION 1
+#endif
+
+
 /* KVM uses PAGE_SIZE in its definition of KVM_COALESCED_MMIO_MAX. We
  * need to use the real host PAGE_SIZE, as that's what KVM will use.
  */
-- 
2.39.5




[PATCH v2 17/30] exec/target_page: runtime defintion for TARGET_PAGE_BITS_MIN

2025-03-20 Thread Pierrick Bouvier
We introduce later a mechanism to skip cpu definitions inclusion, so we
can detect it here, and call the correct runtime function instead.

Signed-off-by: Pierrick Bouvier 
---
 include/exec/target_page.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/exec/target_page.h b/include/exec/target_page.h
index 8e89e5cbe6f..aeddb25c743 100644
--- a/include/exec/target_page.h
+++ b/include/exec/target_page.h
@@ -40,6 +40,9 @@ extern const TargetPageBits target_page;
 #  define TARGET_PAGE_MASK   ((TARGET_PAGE_TYPE)target_page.mask)
 # endif
 # define TARGET_PAGE_SIZE(-(int)TARGET_PAGE_MASK)
+# ifndef TARGET_PAGE_BITS_MIN
+#  define TARGET_PAGE_BITS_MIN qemu_target_page_bits_min()
+# endif
 #else
 # define TARGET_PAGE_BITS_MIN TARGET_PAGE_BITS
 # define TARGET_PAGE_SIZE(1 << TARGET_PAGE_BITS)
-- 
2.39.5




Re: [RFC PATCH v2 03/20] hw/arm/smmuv3-accel: Add initial infrastructure for smmuv3-accel device

2025-03-20 Thread Donald Dutile




On 3/19/25 1:04 PM, Eric Auger wrote:




On 3/18/25 10:22 PM, Donald Dutile wrote:



On 3/18/25 3:13 PM, Nicolin Chen wrote:

On Tue, Mar 18, 2025 at 07:31:36PM +0100, Eric Auger wrote:

On 3/17/25 9:19 PM, Nicolin Chen wrote:

On Mon, Mar 17, 2025 at 04:24:53PM -0300, Jason Gunthorpe wrote:

On Mon, Mar 17, 2025 at 12:10:19PM -0700, Nicolin Chen wrote:

Another question: how does an emulated device work with a vSMMUv3?
I could imagine that all the accel steps would be bypassed since
!sdev->idev. Yet, the emulated iotlb should cache its translation
so we will need to flush the iotlb, which will increase complexity
as the TLBI command dispatching function will need to be aware what
ASID is for emulated device and what is for vfio device..

I think you should block it. We already expect different vSMMU's
depending on the physical SMMU under the PCI device, it makes sense
that a SW VFIO device would have it's own, non-accelerated, vSMMU
model in the guest.

Yea, I agree and it'd be cleaner for an implementation separating
them.

In my mind, the general idea of "accel=on" is also to keep things
in a more efficient way: passthrough devices go to HW-accelerated
vSMMUs (separated PCIE buses), while emulated ones go to a vSMMU-
bypassed (PCIE0).



Originally a specific SMMU device was needed to opt in for MSI reserved
region ACPI IORT description which are not needed if you don't rely on
S1+S2. However if we don't rely on this trick this was not even needed
with legacy integration
(https://patchwork.kernel.org/project/qemu-devel/cover/20180921081819.9203-1-eric.au...@redhat.com/).


Nevertheless I don't think anything prevents the acceleration granted
device from also working with virtio/vhost devices for instance unless
you unplug the existing infra. The translation and invalidation just
should use different control paths (explicit translation requests,
invalidation notifications towards vhost, ...).


smmuv3_translate() is per sdev, so it's easy.

Invalidation is done via commands, which could be tricky:
a) Broadcast command
b) ASID validation -- we'll need to keep track of a list of ASIDs
     for vfio device to compare the ASID in each per-ASID command,
     potentially by trapping all CFGI_CD(_ALL) commands? Note that
     each vfio device may have multiple ASIDs (for multiple CDs).
Either a or b above will have some validation efficiency impact.


Again, what does legitimate to have different qemu devices for the same
IP? I understand that it simplifies the implementation but I am not
sure
this is a good reason. Nevertheless it worth challenging. What is the
plan for intel iommu? Will we have 2 devices, the legacy device and one
for nested?


Hmm, it seems that there are two different topics:
1. Use one SMMU device model (source code file; "iommu=" string)
     for both an emulated vSMMU and an HW-accelerated vSMMU.
2. Allow one vSMMU instance to work with both an emulated device
     and a passthrough device.
And I get that you want both 1 and 2.

I'm totally okay with 1, yet see no compelling benefit from 2 for
the increased complexity in the invalidation routine.

And another question about the mixed device attachment. Let's say
we have in the host:
    VFIO passthrough dev0 -> pSMMU0
    VFIO passthrough dev1 -> pSMMU1
Should we allow emulated devices to be flexibly plugged?
    dev0 -> vSMMU0 /* Hard requirement */
    dev1 -> vSMMU1 /* Hard requirement */
    emu0 -> vSMMU0 /* Soft requirement; can be vSMMU1 also */
    emu1 -> vSMMU1 /* Soft requirement; can be vSMMU0 also */

Thanks
Nicolin


I agree w/Jason & Nicolin: different vSMMUs for pass-through devices
than emulated, & vice-versa.
Not mixing... because... of the next agreement:

you need to clarify what you mean by different vSMMUs: are you taking
about different instances or different qemu device types?

Both. a device needed to use hw-accel feature has to use an smmu that has that 
feature;
an emulated device can use such an smmu, but as mentioned in other threads,
if you start with all emulated in one smmu, if you hot-plug a (assigned) device,
it needs another smmu that has hw-accel features.
Keeping them split makes it easier at config time, and it may enable the code 
to be simpler...
but the other half of my brain wants common code paths with accel/emulate 
branches but
a different smmu instance will like simplify the smmu-(accel-)specific lookups.



I agree with Eric that 'accel' isn't needed -- this should be
ascertained from the pSMMU that a physical device is attached to.

we can simply use an AUTO_ON_OFF property and by default choose AUTO
value. That would close the debate ;-)


Preaching to the choir... yes.


Eric

Now... how does vfio(?; why not qemu?) layer determine that? -- where
are SMMUv3 'accel' features exposed either: a) in the device struct
(for the smmuv3) or (b) somewhere under sysfs? ... I couldn't find
anything under either on my g-h system, but would appreciate a ptr if
there is.
and like Eric, although 'a

Re: [PATCH 1/2] migration: Add some documentation for multifd

2025-03-20 Thread Fabiano Rosas
Prasad Pandit  writes:

> On Tue, 11 Mar 2025 at 00:59, Fabiano Rosas  wrote:
>> Peter Xu  writes:
>> > To me, this is a fairly important question to ask.  Fundamentally, the very
>> > initial question is why do we need periodic flush and sync at all.  It's
>> > because we want to make sure new version of pages to land later than old
>> > versions.
> ...
>> > Then v1 and v2 of the page P are ordered.
>> > If without the message on the main channel:
>> > Then I don't see what protects reorder of arrival of messages like:
> ...
>> That's all fine. As long as the recv part doesn't see them out of
>> order. I'll try to write some code to confirm so I don't waste too much
>> of your time.
>
> * Relying on this receive order seems like a passive solution. On one
> side we are saying there is no defined 'requirement' on the network or
> compute capacity/quality for migration. ie. compute and network can be
> as bad as possible, yet migration shall always work reliably.
>
> * When receiving different versions of pages, couldn't multifd_recv
> check the latest version present in guest RAM and accept the incoming
> version only if it is fresher than the already present one? ie. if v1
> arrives later than v2 on the receive side, the receive side
> could/should discard v1 because v2 is already received.
>

"in guest RAM" I don't think so, the performance would probably be
affected. We could have a sequence number that gets bumped per
iteration, but I'm not sure how much of a improvement that would be.

Without a sync, we'd need some sort of per-page handling*. I have a gut
feeling this would get costly.

*- maybe per-iovec depending on how we queue pages to multifd.

> Thank you.
> ---
>   - Prasad



Re: [RFC PATCH v2 05/20] hw/arm/smmuv3-accel: Associate a pxb-pcie bus

2025-03-20 Thread Donald Dutile




On 3/19/25 2:21 PM, Eric Auger wrote:

Hi Don,


On 3/19/25 5:21 PM, Donald Dutile wrote:



On 3/19/25 5:26 AM, Shameerali Kolothum Thodi wrote:

Hi Don,


Hey!


-Original Message-
From: Donald Dutile 
Sent: Tuesday, March 18, 2025 10:12 PM
To: Shameerali Kolothum Thodi
; qemu-...@nongnu.org;
qemu-devel@nongnu.org
Cc: eric.au...@redhat.com; peter.mayd...@linaro.org; j...@nvidia.com;
nicol...@nvidia.com; berra...@redhat.com; nath...@nvidia.com;
mo...@nvidia.com; smost...@google.com; Linuxarm
; Wangzhou (B) ;
jiangkunkun ; Jonathan Cameron
; zhangfei@linaro.org
Subject: Re: [RFC PATCH v2 05/20] hw/arm/smmuv3-accel: Associate a pxb-
pcie bus

Shameer,

Hi!

On 3/11/25 10:10 AM, Shameer Kolothum wrote:

User must associate a pxb-pcie root bus to smmuv3-accel
and that is set as the primary-bus for the smmu dev.

Signed-off-by: Shameer Kolothum



---
    hw/arm/smmuv3-accel.c | 19 +++
    1 file changed, 19 insertions(+)

diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
index c327661636..1471b65374 100644
--- a/hw/arm/smmuv3-accel.c
+++ b/hw/arm/smmuv3-accel.c
@@ -9,6 +9,21 @@
    #include "qemu/osdep.h"

    #include "hw/arm/smmuv3-accel.h"
+#include "hw/pci/pci_bridge.h"
+
+static int smmuv3_accel_pxb_pcie_bus(Object *obj, void *opaque)
+{
+    DeviceState *d = opaque;
+
+    if (object_dynamic_cast(obj, "pxb-pcie-bus")) {
+    PCIBus *bus = PCI_HOST_BRIDGE(obj->parent)->bus;
+    if (d->parent_bus && !strcmp(bus->qbus.name, d->parent_bus-
name)) {
+    object_property_set_link(OBJECT(d), "primary-bus",
OBJECT(bus),
+ &error_abort);
+    }
+    }
+    return 0;
+}

    static void smmu_accel_realize(DeviceState *d, Error **errp)
    {
@@ -17,6 +32,9 @@ static void smmu_accel_realize(DeviceState *d, Error

**errp)

    SysBusDevice *dev = SYS_BUS_DEVICE(d);
    Error *local_err = NULL;

+    object_child_foreach_recursive(object_get_root(),
+   smmuv3_accel_pxb_pcie_bus, d);
+
    object_property_set_bool(OBJECT(dev), "accel", true,
&error_abort);
    c->parent_realize(d, &local_err);
    if (local_err) {
@@ -33,6 +51,7 @@ static void smmuv3_accel_class_init(ObjectClass

*klass, void *data)

    device_class_set_parent_realize(dc, smmu_accel_realize,
    &c->parent_realize);
    dc->hotpluggable = false;
+    dc->bus_type = TYPE_PCIE_BUS;
    }

    static const TypeInfo smmuv3_accel_type_info = {


I am not seeing the need for a pxb-pcie bus(switch) introduced for each
'accel'.
Isn't the IORT able to define different SMMUs for different RIDs?
if so,
itsn't that sufficient
to associate (define) an SMMU<->RID association without introducing a
pxb-pcie?
and again, I'm not sure how that improves/enables the device<->SMMU
associativity?


Thanks for taking a look at the series. As discussed elsewhere in
this thread(with
Eric), normally in physical world (or atleast in the most common
cases) SMMUv3
is attached to PCIe Root Complex and if you take a look at the IORT
spec, it describes
association of ID mappings between a RC node and SMMUV3 node.

And if my understanding is correct, in Qemu, only pxb-pcie allows you
to add
extra root complexes even though it is still plugged to
parent(pcie.0). ie, for all
devices downstream it acts as a root complex but still plugged into a
parent pcie.0.
This allows us to add/describe multiple "smmuv3-accel" each
associated with a RC.


I find the qemu statements a bit unclear here as well.
I looked at the hot plug statement(s) in docs/pcie.txt, as I figured
that's where dynamic
IORT changes would be needed as well.  There, it says you can hot-add
PCIe devices to RPs,
one has to define/add RP's to the machine model for that plug-in.

Using libvirt, it could auto-add the needed RPs to do dynmaic smmuv3
additions,

I am not sure I understand your statement here. we don't want "dynamic"
SMMUv3 instantiation. SMMUv3 is a platform device which is supposed to
be coldplugged on a pre-existing PCIe hierarchy. The SMMUv3 device is
not something that is meant to be hotplugged or hotunplugged.
To me we hijack the bus= property to provide information about the IORT
IDMAP


Dynamic in the sense that if one adds smmuv3 for multiple devices,
libvirt will dynamically figure out how to instantiate one, two, three... smmu's
in the machine at cold boot.
If you want a machine to be able to hot-plug a device that would require 
another smmu,
than the config, and smmu, would have to be explicilty stated; as is done today 
for
hot-plug PCIe if the simple machine that libvirt would make is not sufficient to
hot-add a PCIe device.


Thanks

Eric

if I understand how libvirt does that today for pcie devices now (/me
looks at danpb for feedback).


Having said that,  current code only allows pxb-pcie root complexes
avoiding
the pcie.0. The idea behind this was, user can use pcie.0 with a non
accel SMMUv3
for any emul

Re: VDPA MAC address problem

2025-03-20 Thread Jason Wang
On Fri, Mar 21, 2025 at 12:45 AM Konstantin Shkolnyy  wrote:
>
> On 3/19/2025 19:58, Jason Wang wrote:
> > On Thu, Mar 20, 2025 at 12:34 AM Konstantin Shkolnyy  
> > wrote:
> >> Upon reading this forum, I see that VHOST_VDPA_SET_CONFIG is
> >> “deprecated”, and so VIRTIO_NET_CTRL_MAC_ADDR_SET must be the right
> >> method, but it’s apparently called too late.
> >
> > VHOST_VDPA_SET_CONFIG requires the vDPA parent support which is not
> > necessarily there.
>
> The mlx5 driver doesn't do anything for VHOST_VDPA_SET_CONFIG. Intel's
> driver, however, apparently stores the configuration. So, it appears,
> Intel will avoid the problem... Perhaps mlx5 could do the same so that
> QEMU can set the address before it starts the VM (QEMU doesn't have to
> later let the VM change the config).

The problem is that config space is not allowed to be written, that is
why mlx5 doesn't implement the write method.

> Conceptually, setting the address
> by QEMU cmdline doesn't look different from setting it by "vdpa dev add".

It's kind of different.

E.g the device may not even have VIRTIO_NET_F_MAC feature etc.

Thanks

>




Re: [PATCH v2] docs: Explain how to use passt

2025-03-20 Thread Laurent Vivier

Hi,

I think it could be interesting to have this information in the current release.

Thanks,
Laurent

On 14/03/2025 09:43, Laurent Vivier wrote:

cc: trivial

On 11/03/2025 14:27, Laurent Vivier wrote:

Add a chapter to explain how to use passt(1) instead of '-net user'.
passt(1) can be connected to QEMU using UNIX socket or vhost-user.
With vhost-user, migration of the VM is allowed and internal state of
passt(1) is transfered from one side to the other

Bug: https://gitlab.com/qemu-project/qemu/-/issues/2827
Signed-off-by: Laurent Vivier 
---
  docs/system/devices/net.rst | 100 
  1 file changed, 100 insertions(+)

diff --git a/docs/system/devices/net.rst b/docs/system/devices/net.rst
index 2ab516d4b097..a3efbdcabd1a 100644
--- a/docs/system/devices/net.rst
+++ b/docs/system/devices/net.rst
@@ -77,6 +77,106 @@ When using the ``'-netdev user,hostfwd=...'`` option, TCP 
or UDP
  connections can be redirected from the host to the guest. It allows for
  example to redirect X11, telnet or SSH connections.
+Using passt as the user mode network stack
+~~
+
+passt_ can be used as a simple replacement for SLIRP (``-net user``).
+passt doesn't require any capability or privilege. passt has
+better performance than ``-net user``, full IPv6 support and better security
+as it's a daemon that is not executed in QEMU context.
+
+passt can be connected to QEMU either by using a socket
+(``-netdev stream``) or using the vhost-user interface (``-netdev 
vhost-user``).
+See `passt(1)`_ for more details on passt.
+
+.. _passt: https://passt.top/
+.. _passt(1): https://passt.top/builds/latest/web/passt.1.html
+
+To use socket based passt interface:
+
+
+Start passt as a daemon::
+
+   passt --socket ~/passt.socket
+
+If ``--socket`` is not provided, passt will print the path of the UNIX domain socket 
QEMU can connect to (``/tmp/passt_1.socket``, ``/tmp/passt_2.socket``,

+...). Then you can connect your QEMU instance to passt:
+
+.. parsed-literal::
+   |qemu_system| [...OPTIONS...] -device virtio-net-pci,netdev=netdev0 -netdev 
stream,id=netdev0,server=off,addr.type=unix,addr.path=~/passt.socket

+
+Where ``~/passt.socket`` is the UNIX socket created by passt to
+communicate with QEMU.
+
+To use vhost-based interface:
+^
+
+Start passt with ``--vhost-user``::
+
+   passt --vhost-user --socket ~/passt.socket
+
+Then to connect QEMU:
+
+.. parsed-literal::
+   |qemu_system| [...OPTIONS...] -m $RAMSIZE -chardev socket,id=chr0,path=~/ 
passt.socket -netdev vhost-user,id=netdev0,chardev=chr0 -device virtio- 
net,netdev=netdev0 -object memory-backend-memfd,id=memfd0,share=on,size=$RAMSIZE -numa 
node,memdev=memfd0

+
+Where ``$RAMSIZE`` is the memory size of your VM ``-m`` and ``-object memory-backend- 
memfd,size=`` must match.

+
+Migration of passt:
+^^^
+
+When passt is connected to QEMU using the vhost-user interface it can
+be migrated with QEMU and the network connections are not interrupted.
+
+As passt runs with no privileges, it relies on passt-repair to save and
+load the TCP connections state, using the TCP_REPAIR socket option.
+The passt-repair helper needs to have the CAP_NET_ADMIN capability, or run as root. If 
passt-repair is not available, TCP connections will not be preserved.

+
+Example of migration of a guest on the same host
+
+
+Before being able to run passt-repair, the CAP_NET_ADMIN capability must be set
+on the file, run as root::
+
+   setcap cap_net_admin+eip ./passt-repair
+
+Start passt for the source side::
+
+   passt --vhost-user --socket ~/passt_src.socket --repair-path 
~/passt-repair_src.socket
+
+Where ``~/passt-repair_src.socket`` is the UNIX socket created by passt to
+communicate with passt-repair. The default value is the ``--socket`` path
+appended with ``.repair``.
+
+Start passt-repair::
+
+   passt-repair ~/passt-repair_src.socket
+
+Start source side QEMU with a monitor to be able to send the migrate command:
+
+.. parsed-literal::
+   |qemu_system| [...OPTIONS...] [...VHOST USER OPTIONS...] -monitor stdio
+
+Start passt for the destination side::
+
+   passt --vhost-user --socket ~/passt_dst.socket --repair-path 
~/passt-repair_dst.socket
+
+Start passt-repair::
+
+   passt-repair ~/passt-repair_dst.socket
+
+Start QEMU with the ``-incoming`` parameter:
+
+.. parsed-literal::
+   |qemu_system| [...OPTIONS...] [...VHOST USER OPTIONS...] -incoming 
tcp:localhost:
+
+Then in the source guest monitor the migration can be started::
+
+   (qemu) migrate tcp:localhost:
+
+A separate passt-repair instance must be started for every migration. In the case of a 
failed migration, passt-repair also needs to be restarted before trying

+again.
+
  Hubs
  







Re: [PATCH for-10.1 07/32] vfio: Introduce a new header file for VFIOdisplay declarations

2025-03-20 Thread John Levon
On Tue, Mar 18, 2025 at 10:53:50AM +0100, Cédric Le Goater wrote:

> Gather all VFIOdisplay related declarations into "display.h" to
> reduce exposure of VFIO internals in "hw/vfio/vfio-common.h".
> 
> Signed-off-by: Cédric Le Goater 

Reviewed-by: John Levon 

regards
john



Re: [PATCH 1/1] target/loongarch: fix bad shift in check_ps()

2025-03-20 Thread gaosong

在 2025/3/20 下午4:49, bibo mao 写道:



On 2025/3/19 上午9:41, Song Gao wrote:
  In expression 1ULL << tlb_ps, left shifting by more than 63 bits 
has undefined behavior.

The shift amount, tlb_ps, is as much as 64. check "tlb_ps >=64" to fix.

Resolves: Coverity CID 1593475

Fixes: d882c284a3 ("target/loongarch: check tlb_ps")
Suggested-by: Peter Maydell 
Signed-off-by: Song Gao 
---
  target/loongarch/tcg/tlb_helper.c | 8 
  1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/target/loongarch/tcg/tlb_helper.c 
b/target/loongarch/tcg/tlb_helper.c

index 646dbf59de..e960adad4d 100644
--- a/target/loongarch/tcg/tlb_helper.c
+++ b/target/loongarch/tcg/tlb_helper.c
@@ -21,10 +21,10 @@
    bool check_ps(CPULoongArchState *env, int tlb_ps)
  {
- if (tlb_ps > 64) {
- return false;
- }
- return BIT_ULL(tlb_ps) & (env->CSR_PRCFG2);
+    if (tlb_ps >= 64) {
+    return false;
+    }
Do we need check (tlb_ps < 0) || (tlb_ps >= 64)? or define parameter 
tlb_ps as uint type.

hi, Peter and RIchard, what's your opinion?
I 'll define parameter tlb_ps as uint type on v2.

Thanks.
Song Gao

Regards
Bibo Mao

+    return BIT_ULL(tlb_ps) & (env->CSR_PRCFG2);
  }
    void get_dir_base_width(CPULoongArchState *env, uint64_t *dir_base,






Re: [PATCH for-10.1 12/32] vfio: Make vfio_group_list static

2025-03-20 Thread John Levon
On Tue, Mar 18, 2025 at 10:53:55AM +0100, Cédric Le Goater wrote:

> vfio_group_list is only used in file "container.c".
> 
> Signed-off-by: Cédric Le Goater 

Reviewed-by: John Levon 

regards
john



Re: [PATCH] host/include/loongarch64: Fix inline assembly compatibility with Clang

2025-03-20 Thread gaosong

在 2025/3/14 上午11:31, Yao Zi 写道:

Clang on LoongArch only accepts fp register names in the dollar-prefixed
form, while GCC allows omitting the dollar. Change registers in ASM
clobbers to the dollar-prefixed form to make user emulators buildable
with Clang on loongarch64. No functional change invovled.

Signed-off-by: Yao Zi 
---
  host/include/loongarch64/host/atomic128-ldst.h.inc| 4 ++--
  host/include/loongarch64/host/bufferiszero.c.inc  | 6 --
  host/include/loongarch64/host/load-extract-al16-al8.h.inc | 2 +-
  3 files changed, 7 insertions(+), 5 deletions(-)

Cc: qemu-sta...@nongnu.org

resolves: https://gitlab.com/qemu-project/qemu/-/issues/2871

diff --git a/host/include/loongarch64/host/atomic128-ldst.h.inc 
b/host/include/loongarch64/host/atomic128-ldst.h.inc
index 9a4a8f8b9e..754d2143f0 100644
--- a/host/include/loongarch64/host/atomic128-ldst.h.inc
+++ b/host/include/loongarch64/host/atomic128-ldst.h.inc
@@ -28,7 +28,7 @@ static inline Int128 atomic16_read_ro(const Int128 *ptr)
  asm("vld $vr0, %2, 0\n\t"
  "vpickve2gr.d %0, $vr0, 0\n\t"
  "vpickve2gr.d %1, $vr0, 1"
-   : "=r"(l), "=r"(h) : "r"(ptr), "m"(*ptr) : "f0");
+: "=r"(l), "=r"(h) : "r"(ptr), "m"(*ptr) : "$f0");
  
  return int128_make128(l, h);

  }
@@ -46,7 +46,7 @@ static inline void atomic16_set(Int128 *ptr, Int128 val)
  asm("vinsgr2vr.d $vr0, %1, 0\n\t"
  "vinsgr2vr.d $vr0, %2, 1\n\t"
  "vst $vr0, %3, 0"
-   : "=m"(*ptr) : "r"(l), "r"(h), "r"(ptr) : "f0");
+: "=m"(*ptr) : "r"(l), "r"(h), "r"(ptr) : "$f0");
  }
  
  #endif /* LOONGARCH_ATOMIC128_LDST_H */

diff --git a/host/include/loongarch64/host/bufferiszero.c.inc 
b/host/include/loongarch64/host/bufferiszero.c.inc
index 69891eac80..bb2598fdc3 100644
--- a/host/include/loongarch64/host/bufferiszero.c.inc
+++ b/host/include/loongarch64/host/bufferiszero.c.inc
@@ -61,7 +61,8 @@ static bool buffer_is_zero_lsx(const void *buf, size_t len)
  "2:"
  : "=&r"(ret), "+r"(p)
  : "r"(buf), "r"(e), "r"(l)
-: "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "fcc0");
+: "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", "$f8",
+  "$fcc0");
  
  return ret;

  }
@@ -119,7 +120,8 @@ static bool buffer_is_zero_lasx(const void *buf, size_t len)
  "3:"
  : "=&r"(ret), "+r"(p)
  : "r"(buf), "r"(e), "r"(l)
-: "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "fcc0");
+: "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", "$f8",
+  "$fcc0");
  
  return ret;

  }
diff --git a/host/include/loongarch64/host/load-extract-al16-al8.h.inc 
b/host/include/loongarch64/host/load-extract-al16-al8.h.inc
index d1fb59d8af..9528521e7d 100644
--- a/host/include/loongarch64/host/load-extract-al16-al8.h.inc
+++ b/host/include/loongarch64/host/load-extract-al16-al8.h.inc
@@ -31,7 +31,7 @@ static inline uint64_t load_atom_extract_al16_or_al8(void 
*pv, int s)
  asm("vld $vr0, %2, 0\n\t"
  "vpickve2gr.d %0, $vr0, 0\n\t"
  "vpickve2gr.d %1, $vr0, 1"
-   : "=r"(l), "=r"(h) : "r"(ptr_align), "m"(*ptr_align) : "f0");
+: "=r"(l), "=r"(h) : "r"(ptr_align), "m"(*ptr_align) : "$f0");
  
  return (l >> shr) | (h << (-shr & 63));

  }





RE: [PATCH for-10.1 17/32] vfio: Move vfio_kvm_device_add/del_fd() to helpers.c

2025-03-20 Thread Duan, Zhenzhong


>-Original Message-
>From: Cédric Le Goater 
>Subject: [PATCH for-10.1 17/32] vfio: Move vfio_kvm_device_add/del_fd() to
>helpers.c
>
>vfio_kvm_device_add/del_fd() are low level routines. Move them with
>the other helpers.
>
>Signed-off-by: Cédric Le Goater 

Reviewed-by: Zhenzhong Duan 

Thanks
Zhenzhong


RE: [PATCH for-10.1 18/32] vfio: Move vfio_get_device_info() to helpers.c

2025-03-20 Thread Duan, Zhenzhong


>-Original Message-
>From: Cédric Le Goater 
>Subject: [PATCH for-10.1 18/32] vfio: Move vfio_get_device_info() to helpers.c
>
>vfio_get_device_info() is a low level routine. Move it with the other
>helpers.
>
>Signed-off-by: Cédric Le Goater 

Reviewed-by: Zhenzhong Duan 

Thanks
Zhenzhong


Re: [PATCH for-10.1 21/32] vfio: Move vfio_kvm_device_fd() into helpers.c

2025-03-20 Thread John Levon
On Tue, Mar 18, 2025 at 10:54:04AM +0100, Cédric Le Goater wrote:

> The vfio_kvm_device_add/del_fd() routines opening the VFIO pseudo
> device are defined in "helpers.c". Move 'vfio_kvm_device_fd'
> definition there and its declaration into "helpers.h" to reduce
> exposure of VFIO internals in "hw/vfio/vfio-common.h".
> 
> Signed-off-by: Cédric Le Goater 

Reviewed-by: John Levon 

regards
john



RE: [PATCH for-10.1 14/32] vfio: Move Host IOMMU type declarations into their respective files

2025-03-20 Thread Duan, Zhenzhong



>-Original Message-
>From: Cédric Le Goater 
>Subject: [PATCH for-10.1 14/32] vfio: Move Host IOMMU type declarations into
>their respective files
>
>These definitions don't have any use outside of their respective
>submodules. There is no need to expose them externally. Keep them
>private.
>
>Signed-off-by: Cédric Le Goater 

Reviewed-by: Zhenzhong Duan 

Thanks
Zhenzhong



Re: [PATCH for-10.1 24/32] vfio: Introduce new files for dirty tracking definitions and declarations

2025-03-20 Thread John Levon
On Tue, Mar 18, 2025 at 10:54:07AM +0100, Cédric Le Goater wrote:

> File "common.c" has been emptied of most of its definitions by the
> previous changes and the only definitions left are related to dirty
> tracking. Rename it to "dirty-tracking.c" and introduce its associated
> "dirty-tracking.h" header file for the declarations.
> 
> Cleanup a little the includes while at it.
> 
> Signed-off-by: Cédric Le Goater 

> rename from hw/vfio/common.c
> rename to hw/vfio/dirty-tracking.c
> index 
> ed2f2ed8839caaf40fabb0cbbcaa1df2c5b70d67..441f9d9a08c06a88dda44ef143dcee5f0a89a900
>  100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/dirty-tracking.c
> @@ -20,14 +20,10 @@

I think you might want to update the file comment from "generic functions used
by VFIO devices".

regards
john



Re: [PATCH for-10.1 31/32] vfio: Introduce vfio_dirty_tracking_un/register() routines

2025-03-20 Thread John Levon
On Tue, Mar 18, 2025 at 10:54:14AM +0100, Cédric Le Goater wrote:

> This hides the MemoryListener implementation and makes the code common
> to both IOMMU backends, legacy and IOMMUFD.

Patch itself seems fine but

> index 
> 8e47ccbb9aea748e57271508ddcd10e394abf16c..d7827f7b64adf3e2b41fafd59aab71e0b28c1567
>  100644
> --- a/hw/vfio/dirty-tracking.c
> +++ b/hw/vfio/dirty-tracking.c
> @@ -1267,7 +1267,7 @@ static void vfio_listener_log_sync(MemoryListener 
> *listener,
>  }
>  }
>  
> -const MemoryListener vfio_memory_listener = {
> +static const MemoryListener vfio_memory_listener = {

In vfio-user, we register new begin/commit callbacks for non-dirty-tracking
purposes, making this location a little bit odd. But not for now I suppose.

regards
john



RE: [PATCH for-10.1 24/32] vfio: Introduce new files for dirty tracking definitions and declarations

2025-03-20 Thread Duan, Zhenzhong


>-Original Message-
>From: Cédric Le Goater 
>Subject: [PATCH for-10.1 24/32] vfio: Introduce new files for dirty tracking
>definitions and declarations
>
>File "common.c" has been emptied of most of its definitions by the
>previous changes and the only definitions left are related to dirty
>tracking. Rename it to "dirty-tracking.c" and introduce its associated
>"dirty-tracking.h" header file for the declarations.
>
>Cleanup a little the includes while at it.
>
>Signed-off-by: Cédric Le Goater 
>---
> hw/vfio/dirty-tracking.h   | 22 ++
> include/hw/vfio/vfio-common.h  | 10 --
> hw/vfio/container.c|  1 +
> hw/vfio/{common.c => dirty-tracking.c} |  5 +
> hw/vfio/iommufd.c  |  1 +
> hw/vfio/meson.build|  2 +-
> hw/vfio/trace-events   |  2 +-
> 7 files changed, 27 insertions(+), 16 deletions(-)
> create mode 100644 hw/vfio/dirty-tracking.h
> rename hw/vfio/{common.c => dirty-tracking.c} (99%)
>
>diff --git a/hw/vfio/dirty-tracking.h b/hw/vfio/dirty-tracking.h
>new file mode 100644
>index
>..4b83dc54ab50dabfff040d7cc3
>db27b80bfe2d3a
>--- /dev/null
>+++ b/hw/vfio/dirty-tracking.h
>@@ -0,0 +1,22 @@
>+/*
>+ * VFIO dirty page tracking routines
>+ *
>+ * Copyright Red Hat, Inc. 2025
>+ *
>+ * SPDX-License-Identifier: GPL-2.0-or-later
>+ */
>+
>+#ifndef HW_VFIO_DIRTY_TRACKING_H
>+#define HW_VFIO_DIRTY_TRACKING_H
>+
>+extern const MemoryListener vfio_memory_listener;
>+
>+bool vfio_devices_all_dirty_tracking_started(const VFIOContainerBase
>*bcontainer);
>+bool vfio_devices_all_device_dirty_tracking(const VFIOContainerBase
>*bcontainer);
>+int vfio_devices_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
>+VFIOBitmap *vbmap, hwaddr iova, hwaddr 
>size,
>+Error **errp);
>+int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova,
>+  uint64_t size, ram_addr_t ram_addr, Error **errp);
>+
>+#endif /* HW_VFIO_DIRTY_TRACKING_H */
>diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>index
>184a422916f62259158e8759efc473a5efb2b2f7..cc20110d9de8ac173b67e6e878
>d4d61818497426 100644
>--- a/include/hw/vfio/vfio-common.h
>+++ b/include/hw/vfio/vfio-common.h
>@@ -130,7 +130,6 @@ VFIODevice *vfio_get_vfio_device(Object *obj);
>
> typedef QLIST_HEAD(VFIODeviceList, VFIODevice) VFIODeviceList;
> extern VFIODeviceList vfio_device_list;
>-extern const MemoryListener vfio_memory_listener;
>
> #ifdef CONFIG_LINUX
> int vfio_get_region_info(VFIODevice *vbasedev, int index,
>@@ -140,15 +139,6 @@ int vfio_get_dev_region_info(VFIODevice *vbasedev,
>uint32_t type,
> bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type);
> #endif
>
>-bool vfio_devices_all_dirty_tracking_started(
>-const VFIOContainerBase *bcontainer);
>-bool
>-vfio_devices_all_device_dirty_tracking(const VFIOContainerBase *bcontainer);
>-int vfio_devices_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
>-VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp);
>-int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova,
>-  uint64_t size, ram_addr_t ram_addr, Error **errp);
>-
> /* Returns 0 on success, or a negative errno. */
> bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
> void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp);
>diff --git a/hw/vfio/container.c b/hw/vfio/container.c
>index
>4e41a7476549a0c5e464e499d059db5aca6e3470..e88dfe12edd6dee469c06ee2e
>46ab9c8b5019ae7 100644
>--- a/hw/vfio/container.c
>+++ b/hw/vfio/container.c
>@@ -35,6 +35,7 @@
> #include "hw/vfio/vfio-container.h"
> #include "helpers.h"
> #include "cpr.h"
>+#include "dirty-tracking.h"
>
> #define TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO
>TYPE_HOST_IOMMU_DEVICE "-legacy-vfio"
>
>diff --git a/hw/vfio/common.c b/hw/vfio/dirty-tracking.c
>similarity index 99%
>rename from hw/vfio/common.c
>rename to hw/vfio/dirty-tracking.c
>index
>ed2f2ed8839caaf40fabb0cbbcaa1df2c5b70d67..441f9d9a08c06a88dda44ef143d
>cee5f0a89a900 100644
>--- a/hw/vfio/common.c
>+++ b/hw/vfio/dirty-tracking.c
>@@ -20,14 +20,10 @@
>
> #include "qemu/osdep.h"
> #include 
>-#ifdef CONFIG_KVM
>-#include 
>-#endif

It looks this change unrelated to this patch?

> #include 
>
> #include "hw/vfio/vfio-common.h"
> #include "hw/vfio/pci.h"
>-#include "exec/address-spaces.h"

Same here.

Thanks
Zhenzhong

> #include "exec/memory.h"
> #include "exec/ram_addr.h"
> #include "exec/target_page.h"
>@@ -45,6 +41,7 @@
> #include "system/tpm.h"
> #include "migration.h"
> #include "helpers.h"
>+#include "dirty-tracking.h"
>
> /*
>  * Device state interfaces
>diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>index
>b25f3b4086d7b7fc6fcd519a9b8b2904513a655f..9335a17920b32dc2bf9cb4eeb2b
>8f57382f14ac8 100644

[PATCH v5 2/6] hw/loongarch/virt: Fix error handling in cpu plug

2025-03-20 Thread Bibo Mao
In function virt_cpu_plug(), it will send cpu plug message to interrupt
controller extioi and ipi irqchip. If there is problem in this function,
system should continue to run and keep state the same before cpu is
added.

Object cpuslot::cpu is set at last only when there is no any error.
If there is, send cpu unplug message to extioi and ipi irqchip, and then
return immediately.

Fixes: ab9935d2991e (hw/loongarch/virt: Implement cpu plug interface)
Signed-off-by: Bibo Mao 
---
 hw/loongarch/virt.c | 17 +++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c
index a5840ff968..5118f01e4b 100644
--- a/hw/loongarch/virt.c
+++ b/hw/loongarch/virt.c
@@ -981,8 +981,6 @@ static void virt_cpu_plug(HotplugHandler *hotplug_dev,
 LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(hotplug_dev);
 Error *err = NULL;
 
-cpu_slot = virt_find_cpu_slot(MACHINE(lvms), cpu->phy_id);
-cpu_slot->cpu = CPU(dev);
 if (lvms->ipi) {
 hotplug_handler_plug(HOTPLUG_HANDLER(lvms->ipi), dev, &err);
 if (err) {
@@ -995,6 +993,10 @@ static void virt_cpu_plug(HotplugHandler *hotplug_dev,
 hotplug_handler_plug(HOTPLUG_HANDLER(lvms->extioi), dev, &err);
 if (err) {
 error_propagate(errp, err);
+if (lvms->ipi) {
+/* Send unplug message to restore, discard error here */
+hotplug_handler_unplug(HOTPLUG_HANDLER(lvms->ipi), dev, NULL);
+}
 return;
 }
 }
@@ -1003,9 +1005,20 @@ static void virt_cpu_plug(HotplugHandler *hotplug_dev,
 hotplug_handler_plug(HOTPLUG_HANDLER(lvms->acpi_ged), dev, &err);
 if (err) {
 error_propagate(errp, err);
+if (lvms->ipi) {
+hotplug_handler_unplug(HOTPLUG_HANDLER(lvms->ipi), dev, NULL);
+}
+
+if (lvms->extioi) {
+hotplug_handler_unplug(HOTPLUG_HANDLER(lvms->extioi),
+   dev, NULL);
+}
+return;
 }
 }
 
+cpu_slot = virt_find_cpu_slot(MACHINE(lvms), cpu->phy_id);
+cpu_slot->cpu = CPU(dev);
 return;
 }
 
-- 
2.39.3




  1   2   >