Re: [PATCH] gdbstub: Fix client Ctrl-C handling

2023-07-31 Thread Joel Stanley
On Sun, 30 Jul 2023 at 09:43, Nicholas Piggin  wrote:
>
> On Wed Jul 26, 2023 at 4:35 PM AEST, Joel Stanley wrote:
> > On Wed, 12 Jul 2023 at 02:12, Nicholas Piggin  wrote:
> > >
> > > On Tue Jul 11, 2023 at 9:03 PM AEST, Matheus Tavares Bernardino wrote:
> > > > > Nicholas Piggin  wrote:
> > > > >
> > > > > diff --git a/gdbstub/gdbstub.c b/gdbstub/gdbstub.c
> > > > > index 6911b73c07..ce8b42eb15 100644
> > > > > --- a/gdbstub/gdbstub.c
> > > > > +++ b/gdbstub/gdbstub.c
> > > > > @@ -2051,8 +2051,17 @@ void gdb_read_byte(uint8_t ch)
> > > > >  return;
> > > > >  }
> > > > >  if (runstate_is_running()) {
> > > > > -/* when the CPU is running, we cannot do anything except stop
> > > > > -   it when receiving a char */
> > > > > +/*
> > > > > + * When the CPU is running, we cannot do anything except stop
> > > > > + * it when receiving a char. This is expected on a Ctrl-C in 
> > > > > the
> > > > > + * gdb client. Because we are in all-stop mode, gdb sends a
> > > > > + * 0x03 byte which is not a usual packet, so we handle it 
> > > > > specially
> > > > > + * here, but it does expect a stop reply.
> > > > > + */
> > > > > +if (ch != 0x03) {
> > > > > +warn_report("gdbstub: client sent packet while target 
> > > > > running\n");
> > > > > +}
> > > > > +gdbserver_state.allow_stop_reply = true;
> > > > >  vm_stop(RUN_STATE_PAUSED);
> > > > >  } else
> > > > >  #endif
> > > >
> > > > Makes sense to me, but shouldn't we send the stop-reply packet only for
> > > > Ctrl+C/0x03?
> > >
> > > Good question.
> > >
> > > I think if we get a character here that's not a 3, we're already in
> > > trouble, and we eat it so even worse. Since we only send a stop packet
> > > back when the vm stops, then if we don't send one now we might never
> > > send it. At least if we send one then the client might have some chance
> > > to get back to a sane state. And this does at least take us back to
> > > behaviour before the stop filtering patch.
> > >
> > > Could go further and only stop the machine if it was a 3, or send a
> > > stop packet even if we were stopped, etc. but all that get further from
> > > a minimal fix.
> >
> > I was taking a look at -rc1 and it looks like this hasn't made it in.
> > Is it something we want to propose including?
> >
> > As a user of qemu I'd vote for it to go in.
>
> I think it should, gdb is hardly usable without it.

I think I hit this issue when debugging u-boot in the aspeed arm
machines. Your patch fixed things:

Tested-by: Joel Stanley 

Alex, Philippe, can we get this queued for 8.1?

Cheers,

Joel



[PATCH 0/3] Accompany -nostdlib with -fno-stack-protector

2023-07-31 Thread Akihiko Odaki
A build of GCC 13.2 will have stack protector enabled by default if it was
configured with --enable-default-ssp option. For such a compiler, it is
necessary to explicitly disable stack protector when linking without
standard libraries.

This is a tree-wide change to add -fno-stack-protector where -nostdlib is
present.

Akihiko Odaki (3):
  pc-bios/optionrom: Add -fno-stack-protector
  tests/migration: Add -fno-stack-protector
  tests/tcg: Add -fno-stack-protector

 tests/tcg/mips/hello-mips.c   | 4 ++--
 pc-bios/optionrom/Makefile| 2 +-
 pc-bios/s390-ccw/Makefile | 2 +-
 tests/migration/s390x/Makefile| 4 ++--
 tests/tcg/aarch64/Makefile.softmmu-target | 2 +-
 tests/tcg/aarch64/Makefile.target | 2 +-
 tests/tcg/alpha/Makefile.softmmu-target   | 2 +-
 tests/tcg/arm/Makefile.target | 2 +-
 tests/tcg/cris/Makefile.target| 2 +-
 tests/tcg/hexagon/Makefile.target | 2 +-
 tests/tcg/i386/Makefile.softmmu-target| 2 +-
 tests/tcg/i386/Makefile.target| 2 +-
 tests/tcg/loongarch64/Makefile.softmmu-target | 2 +-
 tests/tcg/minilib/Makefile.target | 2 +-
 tests/tcg/mips/Makefile.target| 2 +-
 tests/tcg/nios2/Makefile.softmmu-target   | 2 +-
 tests/tcg/s390x/Makefile.softmmu-target   | 2 +-
 tests/tcg/x86_64/Makefile.softmmu-target  | 2 +-
 18 files changed, 20 insertions(+), 20 deletions(-)

-- 
2.41.0




[PATCH 1/3] pc-bios/optionrom: Add -fno-stack-protector

2023-07-31 Thread Akihiko Odaki
A build of GCC 13.2 will have stack protector enabled by default if it
was configured with --enable-default-ssp option. For such a compiler,
it is necessary to explicitly disable stack protector when linking
without standard libraries.

Signed-off-by: Akihiko Odaki 
---
 pc-bios/optionrom/Makefile | 2 +-
 pc-bios/s390-ccw/Makefile  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pc-bios/optionrom/Makefile b/pc-bios/optionrom/Makefile
index b1fff0ba6c..f220d81f2c 100644
--- a/pc-bios/optionrom/Makefile
+++ b/pc-bios/optionrom/Makefile
@@ -19,7 +19,7 @@ quiet-command = $(call quiet-@,$2 $@)$1
 override CPPFLAGS += -MMD -MP -MT $@ -MF $(@D)/$(*F).d
 
 override CFLAGS += -march=i486 -Wall $(EXTRA_CFLAGS) -m16
-override CFLAGS += -ffreestanding -I$(TOPSRC_DIR)/include
+override CFLAGS += -ffreestanding -fno-stack-protector -I$(TOPSRC_DIR)/include
 
 cc-test = $(CC) -Werror $1 -c -o /dev/null -xc /dev/null >/dev/null 2>/dev/null
 cc-option = if $(call cc-test, $1); then \
diff --git a/pc-bios/s390-ccw/Makefile b/pc-bios/s390-ccw/Makefile
index acfcd1e71a..446d448913 100644
--- a/pc-bios/s390-ccw/Makefile
+++ b/pc-bios/s390-ccw/Makefile
@@ -38,7 +38,7 @@ OBJECTS = start.o main.o bootmap.o jump2ipl.o sclp.o menu.o \
 EXTRA_CFLAGS += -Wall
 EXTRA_CFLAGS += -ffreestanding -fno-delete-null-pointer-checks -fno-common 
-fPIE
 EXTRA_CFLAGS += -fwrapv -fno-strict-aliasing -fno-asynchronous-unwind-tables
-EXTRA_CFLAGS += -msoft-float
+EXTRA_CFLAGS += -fno-stack-protector -msoft-float
 EXTRA_CFLAGS += -std=gnu99
 LDFLAGS += -Wl,-pie -nostdlib
 
-- 
2.41.0




[PATCH 2/3] tests/migration: Add -fno-stack-protector

2023-07-31 Thread Akihiko Odaki
A build of GCC 13.2 will have stack protector enabled by default if it
was configured with --enable-default-ssp option. For such a compiler,
it is necessary to explicitly disable stack protector when linking
without standard libraries.

Signed-off-by: Akihiko Odaki 
---
 tests/migration/s390x/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/migration/s390x/Makefile b/tests/migration/s390x/Makefile
index 6393c3e5b9..6671de2efc 100644
--- a/tests/migration/s390x/Makefile
+++ b/tests/migration/s390x/Makefile
@@ -6,8 +6,8 @@ all: a-b-bios.h
 fwdir=../../../pc-bios/s390-ccw
 
 CFLAGS+=-ffreestanding -fno-delete-null-pointer-checks -fPIE -Os \
-   -msoft-float -march=z900 -fno-asynchronous-unwind-tables -Wl,-pie \
-   -Wl,--build-id=none -nostdlib
+   -msoft-float -march=z900 -fno-asynchronous-unwind-tables \
+   -fno-stack-protector -Wl,-pie -Wl,--build-id=none -nostdlib
 
 a-b-bios.h: s390x.elf
echo "$$__note" > header.tmp
-- 
2.41.0




[PATCH 3/3] tests/tcg: Add -fno-stack-protector

2023-07-31 Thread Akihiko Odaki
A build of GCC 13.2 will have stack protector enabled by default if it
was configured with --enable-default-ssp option. For such a compiler,
it is necessary to explicitly disable stack protector when linking
without standard libraries.

Signed-off-by: Akihiko Odaki 
---
 tests/tcg/mips/hello-mips.c   | 4 ++--
 tests/tcg/aarch64/Makefile.softmmu-target | 2 +-
 tests/tcg/aarch64/Makefile.target | 2 +-
 tests/tcg/alpha/Makefile.softmmu-target   | 2 +-
 tests/tcg/arm/Makefile.target | 2 +-
 tests/tcg/cris/Makefile.target| 2 +-
 tests/tcg/hexagon/Makefile.target | 2 +-
 tests/tcg/i386/Makefile.softmmu-target| 2 +-
 tests/tcg/i386/Makefile.target| 2 +-
 tests/tcg/loongarch64/Makefile.softmmu-target | 2 +-
 tests/tcg/minilib/Makefile.target | 2 +-
 tests/tcg/mips/Makefile.target| 2 +-
 tests/tcg/nios2/Makefile.softmmu-target   | 2 +-
 tests/tcg/s390x/Makefile.softmmu-target   | 2 +-
 tests/tcg/x86_64/Makefile.softmmu-target  | 2 +-
 15 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/tests/tcg/mips/hello-mips.c b/tests/tcg/mips/hello-mips.c
index 4e1cf501af..0ba5f1bf23 100644
--- a/tests/tcg/mips/hello-mips.c
+++ b/tests/tcg/mips/hello-mips.c
@@ -5,8 +5,8 @@
 * http://www.linux-mips.org/wiki/MIPSABIHistory
 * http://www.linux.com/howtos/Assembly-HOWTO/mips.shtml
 *
-* mipsel-linux-gcc -nostdlib -mno-abicalls -fno-PIC -mabi=32 \
-*  -O2 -static -o hello-mips hello-mips.c
+* mipsel-linux-gcc -nostdlib -mno-abicalls -fno-PIC -fno-stack-protector \
+   -mabi=32 -O2 -static -o hello-mips hello-mips.c
 *
 */
 #define __NR_SYSCALL_BASE  4000
diff --git a/tests/tcg/aarch64/Makefile.softmmu-target 
b/tests/tcg/aarch64/Makefile.softmmu-target
index b74a2534e3..11016ffab2 100644
--- a/tests/tcg/aarch64/Makefile.softmmu-target
+++ b/tests/tcg/aarch64/Makefile.softmmu-target
@@ -16,7 +16,7 @@ LINK_SCRIPT=$(AARCH64_SYSTEM_SRC)/kernel.ld
 LDFLAGS=-Wl,-T$(LINK_SCRIPT)
 TESTS+=$(AARCH64_TESTS) $(MULTIARCH_TESTS)
 EXTRA_RUNS+=$(MULTIARCH_RUNS)
-CFLAGS+=-nostdlib -ggdb -O0 $(MINILIB_INC)
+CFLAGS+=-nostdlib -fno-stack-protector -ggdb -O0 $(MINILIB_INC)
 LDFLAGS+=-static -nostdlib $(CRT_OBJS) $(MINILIB_OBJS) -lgcc
 
 config-cc.mak: Makefile
diff --git a/tests/tcg/aarch64/Makefile.target 
b/tests/tcg/aarch64/Makefile.target
index 617f821613..55f8609897 100644
--- a/tests/tcg/aarch64/Makefile.target
+++ b/tests/tcg/aarch64/Makefile.target
@@ -49,7 +49,7 @@ endif
 # bti-1 tests the elf notes, so we require special compiler support.
 ifneq ($(CROSS_CC_HAS_ARMV8_BTI),)
 AARCH64_TESTS += bti-1 bti-3
-bti-1 bti-3: CFLAGS += -mbranch-protection=standard
+bti-1 bti-3: CFLAGS += -fno-stack-protector -mbranch-protection=standard
 bti-1 bti-3: LDFLAGS += -nostdlib
 endif
 # bti-2 tests PROT_BTI, so no special compiler support required.
diff --git a/tests/tcg/alpha/Makefile.softmmu-target 
b/tests/tcg/alpha/Makefile.softmmu-target
index 09193a62d6..99c23c2903 100644
--- a/tests/tcg/alpha/Makefile.softmmu-target
+++ b/tests/tcg/alpha/Makefile.softmmu-target
@@ -15,7 +15,7 @@ CRT_PATH=$(ALPHA_SYSTEM_SRC)
 LINK_SCRIPT=$(ALPHA_SYSTEM_SRC)/kernel.ld
 LDFLAGS=-Wl,-T$(LINK_SCRIPT)
 TESTS+=$(ALPHA_TESTS) $(MULTIARCH_TESTS)
-CFLAGS+=-nostdlib -g -O1 -mcpu=ev6 $(MINILIB_INC)
+CFLAGS+=-fno-stack-protector -nostdlib -g -O1 -mcpu=ev6 $(MINILIB_INC)
 LDFLAGS+=-static -nostdlib $(CRT_OBJS) $(MINILIB_OBJS) -lgcc
 
 # building head blobs
diff --git a/tests/tcg/arm/Makefile.target b/tests/tcg/arm/Makefile.target
index 0038cef02c..3473f4619e 100644
--- a/tests/tcg/arm/Makefile.target
+++ b/tests/tcg/arm/Makefile.target
@@ -12,7 +12,7 @@ float_madds: CFLAGS+=-mfpu=neon-vfpv4
 
 # Basic Hello World
 ARM_TESTS = hello-arm
-hello-arm: CFLAGS+=-marm -ffreestanding
+hello-arm: CFLAGS+=-marm -ffreestanding -fno-stack-protector
 hello-arm: LDFLAGS+=-nostdlib
 
 # IWMXT floating point extensions
diff --git a/tests/tcg/cris/Makefile.target b/tests/tcg/cris/Makefile.target
index 43587d2769..713e2a5b6c 100644
--- a/tests/tcg/cris/Makefile.target
+++ b/tests/tcg/cris/Makefile.target
@@ -30,7 +30,7 @@ AS= $(CC) -x assembler-with-cpp
 LD  = $(CC)
 
 # we rely on GCC inline:ing the stuff we tell it to in many places here.
-CFLAGS  = -Winline -Wall -g -O2 -static
+CFLAGS  = -Winline -Wall -g -O2 -static -fno-stack-protector
 NOSTDFLAGS = -nostartfiles -nostdlib
 ASFLAGS += -mcpu=v10 -g -Wa,-I,$(SRC_PATH)/tests/tcg/cris/bare
 CRT_FILES = crt.o sys.o
diff --git a/tests/tcg/hexagon/Makefile.target 
b/tests/tcg/hexagon/Makefile.target
index 87ed2c90b9..f839b2c0d5 100644
--- a/tests/tcg/hexagon/Makefile.target
+++ b/tests/tcg/hexagon/Makefile.target
@@ -19,7 +19,7 @@
 EXTRA_RUNS =
 
 CFLAGS += -Wno-incompatible-pointer-types -Wno-undefined-internal
-CFLAGS += -fno-unroll-loops
+CFLAGS += -fno-unroll-loops -fno-stack-protector
 
 HEX_SRC=$(SRC_PATH)/tests/tcg/hexagon
 VPATH += $(HEX_SRC)
d

Re: [PATCH for-8.2 1/6] vfio/migration: Move from STOP_COPY to STOP in vfio_save_cleanup()

2023-07-31 Thread Cédric Le Goater

On 7/31/23 08:32, Avihai Horon wrote:


On 30/07/2023 19:25, Cédric Le Goater wrote:

External email: Use caution opening links or attachments


On 7/16/23 10:15, Avihai Horon wrote:

Changing the device state from STOP_COPY to STOP can take time as the
device may need to free resources and do other operations as part of the
transition. Currently, this is done in vfio_save_complete_precopy() and
therefore it is counted in the migration downtime.

To avoid this, change the device state from STOP_COPY to STOP in
vfio_save_cleanup(), which is called after migration has completed and
thus is not part of migration downtime.

Signed-off-by: Avihai Horon 


Have you tried this series with vGPUs ? If so, are there any improvement
to report ?


Not with a vGPU.
But I tried it with a ConnectX-7 VF and I could see up to 6% downtime 
improvement.
I mentioned this in the cover letter.
Do you want to mention it also in the commit message?


This is not necessary.

Thanks,

C.






Re: [PATCH 2/3] tests/migration: Add -fno-stack-protector

2023-07-31 Thread Juan Quintela
Akihiko Odaki  wrote:
> A build of GCC 13.2 will have stack protector enabled by default if it
> was configured with --enable-default-ssp option. For such a compiler,
> it is necessary to explicitly disable stack protector when linking
> without standard libraries.
>
> Signed-off-by: Akihiko Odaki 

Reviewed-by: Juan Quintela 

For whatever is related to migration.
My liker options skills are (very) rusty, so I will let others decided
if this is the correct way to do it O:-)




Re: [PATCH for-8.2 2/2] arm/kvm: convert to kvm_get_one_reg

2023-07-31 Thread Gavin Shan



On 7/18/23 21:14, Cornelia Huck wrote:

We can neaten the code by switching the callers that work on a
CPUstate to the kvm_get_one_reg function.

Signed-off-by: Cornelia Huck 
---
  target/arm/kvm.c   | 15 +++-
  target/arm/kvm64.c | 57 --
  2 files changed, 18 insertions(+), 54 deletions(-)



Reviewed-by: Gavin Shan 

Thanks,
Gavin




Re: [PATCH] kvm: Fix crash by initializing kvm_state early

2023-07-31 Thread David Hildenbrand

On 31.07.23 01:48, Gavin Shan wrote:

Runs into core dump on arm64 and the backtrace extracted from the
core dump is shown as below. It's caused by accessing @kvm_state which
isn't initialized at that point due to commit 176d073029 ("hw/arm/virt:
Use machine_memory_devices_init()"), where the machine's memory region
is added ealier than before.


s/ealier/earlier/



 main
 qemu_init
 configure_accelerators
 qemu_opts_foreach
 do_configure_accelerator
 accel_init_machine
 kvm_init
 virt_kvm_type
 virt_set_memmap
 machine_memory_devices_init
 memory_region_add_subregion
 memory_region_add_subregion_common
 memory_region_update_container_subregions
 memory_region_transaction_begin
 qemu_flush_coalesced_mmio_buffer
 kvm_flush_coalesced_mmio_buffer

Fix it by initializing @kvm_state early. With this applied, no crash
is observed on arm64.


Interestingly, we register memory listeners in kvm_init() after setting 
kvm_state, so in theory it should have worked fine.


But it's rather surprising that we see kvm_flush_coalesced_mmio_buffer() 
call even though we didn't even setup a listener with 
kvm_coalesce_mmio_region / kvm_uncoalesce_mmio_region.


Such a notifier-specific flush might have been better placed in the 
MemoryListener->begin() call. But that needs more thought, as 
qemu_flush_coalesced_mmio_buffer() is called from a couple of places.


Fixes: 176d073029 ("hw/arm/virt: Use machine_memory_devices_init()")
Signed-off-by: Gavin Shan 
---
  accel/kvm/kvm-all.c | 3 +--
  1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 373d876c05..c825cba12f 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -2464,6 +2464,7 @@ static int kvm_init(MachineState *ms)
  qemu_mutex_init(&kml_slots_lock);
  
  s = KVM_STATE(ms->accelerator);

+kvm_state = s;
  
  /*

   * On systems where the kernel can support different base page
@@ -2695,8 +2696,6 @@ static int kvm_init(MachineState *ms)
  #endif
  }
  
-kvm_state = s;

-
  ret = kvm_arch_init(ms, s);
  if (ret < 0) {
  goto err;


As an alternative, we might simply do nothing in 
kvm_flush_coalesced_mmio_buffer(), in case kvm_state is not setup yet. 
We don't have any notifier registered in that case.


Thanks!

--
Cheers,

David / dhildenb




RE: [PATCH RFC v1 2/3] vfio/pci: enable vector on dynamic MSI-X allocation

2023-07-31 Thread Liu, Jing2
Hi Alex,

> On July 28, 2023 1:25 AM,  Alex Williamson  wrote:
> 
> On Thu, 27 Jul 2023 03:24:09 -0400
> Jing Liu  wrote:
> 
> > The vector_use callback is used to enable vector that is unmasked in
> > guest. The kernel used to only support static MSI-X allocation. When
> > allocating a new interrupt using "static MSI-X allocation" kernels,
> > Qemu first disables all previously allocated vectors and then
> > re-allocates all including the new one. The nr_vectors of
> > VFIOPCIDevice indicates that all vectors from 0 to nr_vectors are
> > allocated (and may be enabled), which is used to to loop all the
> > possibly used vectors When, e.g., disabling MSI-X interrupts.
> >
> > Extend the vector_use function to support dynamic MSI-X allocation
> > when host supports the capability. Qemu therefore can individually
> > allocate and enable a new interrupt without affecting others or
> > causing interrupts lost during runtime.
> >
> > Utilize nr_vectors to calculate the upper bound of enabled vectors in
> > dynamic MSI-X allocation mode since looping all msix_entries_nr is not
> > efficient and unnecessary.
> >
> > Signed-off-by: Jing Liu 
> > Tested-by: Reinette Chatre 
> > ---
> >  hw/vfio/pci.c | 40 +++-
> >  1 file changed, 27 insertions(+), 13 deletions(-)
> >
> > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index
> > 0c4ac0873d40..8c485636445c 100644
> > --- a/hw/vfio/pci.c
> > +++ b/hw/vfio/pci.c
> > @@ -512,12 +512,20 @@ static int vfio_msix_vector_do_use(PCIDevice
> *pdev, unsigned int nr,
> >  }
> >
> >  /*
> > - * We don't want to have the host allocate all possible MSI vectors
> > - * for a device if they're not in use, so we shutdown and incrementally
> > - * increase them as needed.
> > + * When dynamic allocation is not supported, we don't want to have the
> > + * host allocate all possible MSI vectors for a device if they're not
> > + * in use, so we shutdown and incrementally increase them as needed.
> > + * And nr_vectors stands for the number of vectors being allocated.
> 
> "nr_vectors represents the total number of vectors allocated."

Will change.

> 
> > + *
> > + * When dynamic allocation is supported, let the host only allocate
> > + * and enable a vector when it is in use in guest. nr_vectors stands
> > + * for the upper bound of vectors being enabled (but not all of the
> > + * ranges is allocated or enabled).
> 
> s/stands for/represents/
Will change.
> 
> >   */
> > -if (vdev->nr_vectors < nr + 1) {
> > +if ((vdev->msix->irq_info_flags & VFIO_IRQ_INFO_NORESIZE) &&
> 
> Testing vdev->msix->noresize would be cleaner.
> 
> > +(vdev->nr_vectors < nr + 1)) {
> >  vdev->nr_vectors = nr + 1;
> > +
> >  if (!vdev->defer_kvm_irq_routing) {
> >  vfio_disable_irqindex(&vdev->vbasedev, 
> > VFIO_PCI_MSIX_IRQ_INDEX);
> >  ret = vfio_enable_vectors(vdev, true); @@ -529,16 +537,22
> > @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
> >  Error *err = NULL;
> >  int32_t fd;
> >
> > -if (vector->virq >= 0) {
> > -fd = event_notifier_get_fd(&vector->kvm_interrupt);
> > -} else {
> > -fd = event_notifier_get_fd(&vector->interrupt);
> > -}
> > +if (!vdev->defer_kvm_irq_routing) {
> > +if (vector->virq >= 0) {
> > +fd = event_notifier_get_fd(&vector->kvm_interrupt);
> > +} else {
> > +fd = event_notifier_get_fd(&vector->interrupt);
> > +}
> >
> > -if (vfio_set_irq_signaling(&vdev->vbasedev,
> > - VFIO_PCI_MSIX_IRQ_INDEX, nr,
> > - VFIO_IRQ_SET_ACTION_TRIGGER, fd, 
> > &err)) {
> > -error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
> > +if (vfio_set_irq_signaling(&vdev->vbasedev,
> > +   VFIO_PCI_MSIX_IRQ_INDEX, nr,
> > +   VFIO_IRQ_SET_ACTION_TRIGGER, fd, 
> > &err)) {
> > +error_reportf_err(err, VFIO_MSG_PREFIX, 
> > vdev->vbasedev.name);
> > +}
> > +}
> > +/* Increase for dynamic allocation case. */
> > +if (vdev->nr_vectors < nr + 1) {
> > +vdev->nr_vectors = nr + 1;
> >  }
> 
> We now have two branches where the bulk of the code is skipped when
> defer_kvm_irq_routing is enabled and doing effectively the same update to
> nr_vectors otherwise.  This suggests we should move the
> defer_kvm_irq_routing test out and create a common place to update
> nr_vectors.  Thanks,

I make a new logic as follows that moves the defer_kvm_irq_routing test out.
Since the vfio_enable_vectors() function need an updated nr_vectors value
so need first update and test the different conditions using old value,
e.g. old_nr_vec.

int old_nr_vec = vdev->nr_vectors;
.

Re: [PATCH RFC v1 1/3] vfio/pci: detect the support of dynamic MSI-X allocation

2023-07-31 Thread Cédric Le Goater

On 7/31/23 05:57, Liu, Jing2 wrote:

Hi C.


On July 28, 2023 4:44 PM, Cédric Le Goater  wrote:

[ ... ]


Sorry I didn't quite understand "info.flags be tested against

VFIO_IRQ_INFO_NORESIZE".

I saw kernel < 6.4 simply added NORESIZE to info.flags and latest kernel adds

if has_dyn_msix.

Would you please kindly describe more on your point?


I was trying to find the conditions to detect safely that the kernel didn't have
dynamic MSI-X support. Testing VFIO_IRQ_INFO_NORESIZE seems enough.


Oh, I see.


In that case, QEMU should report an error and the trace event is not

needed.


I replied an email with new error handling draft code based on my
understanding, which reports the error and need no trace. Could you please

help review if that is what we want?

yes. It looked good. Please send a v1 !


Thanks for reviewing that. I guess you mean v2 for next version 😊


Well, if you remove the RFC status, I think you should, this would
still be a v1.

Thanks,

C.




Re: [RFC] migration/block-dirty-bitmap: make loading bitmap for device with iothread future-proof

2023-07-31 Thread Juan Quintela
Fiona Ebner  wrote:
> The bdrv_create_dirty_bitmap() function (which is also called by
> bdrv_dirty_bitmap_create_successor()) uses bdrv_getlength(bs). This is
> a wrapper around a coroutine, and when not called in coroutine context
> would use bdrv_poll_co(). Such a call would trigger an assert() if the
> correct AioContext hasn't been acquired before, because polling would
> try to release the AioContext.

The ingenous in me thinks:

If the problem is that bdrv_poll_co() release an AioContext that it
don't have acquired, perhaps we should fix bdrv_poll_co().

Ha!!!

$ find . -type f -exec grep --color=auto -nH --null -e bdrv_poll_co \{\} +
./scripts/block-coroutine-wrapper.py\0173:
bdrv_poll_co(&s.poll_state);
./scripts/block-coroutine-wrapper.py\0198:
bdrv_poll_co(&s.poll_state);
./block/block-gen.h\038:static inline void bdrv_poll_co(BdrvPollCo *s)
$

/me retreats

> The issue does not happen for migration, because the call happens
> from process_incoming_migration_co(), i.e. in coroutine context. So
> the bdrv_getlength() wrapper will just call bdrv_co_getlength()
> directly without polling.

The ingenous in me wonders why bdrv_getlength() needs to use coroutines
at all, but as I have been burned on the previous paragraph, I learn not
to even try.

Ok, I never learn, so I do a grep and I see two appearces of
bdrv_getlength in include files, but grep only shows uses of the
function, not a real definition.

I even see

co_wrapper_mixed_bdrv_rdlock

And decied to stop here.


> The issue would happen for snapshots, but won't in practice, because
> saving a snapshot with a block dirty bitmap is currently not possible.
> The reason is that dirty_bitmap_save_iterate() returns whether it has
> completed the bulk phase, which only happens in postcopy, so
> qemu_savevm_state_iterate() will always return 0, meaning the call
> to iterate will be repeated over and over again without ever reaching
> the completion phase.
>
> Still, this would make the code more robust for the future.

What I wonder is if we should annotate this calls somehow that they need
to be called from a coroutine context, because I would have never found
it.

And just thinking loud:

I still wonder if we should make incoming migration its own thread.
Because we got more and more problems because it is a coroutine, that in
non-multifd case can consume a whole CPU alone, so it makes no sense to
have a coroutine.

On the other hand, with multifd, it almost don't use any resources, so 

> Signed-off-by: Fiona Ebner 
> ---
>
> We ran into this issue downstream, because we have a custom snapshot
> mechanism which does support dirty bitmaps and does not run in
> coroutine context during load.
>
>  migration/block-dirty-bitmap.c | 6 ++
>  1 file changed, 6 insertions(+)
>
> diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
> index 032fc5f405..e1ae3b7316 100644
> --- a/migration/block-dirty-bitmap.c
> +++ b/migration/block-dirty-bitmap.c
> @@ -805,8 +805,11 @@ static int dirty_bitmap_load_start(QEMUFile *f, 
> DBMLoadState *s)
>   "destination", bdrv_dirty_bitmap_name(s->bitmap));
>  return -EINVAL;
>  } else {
> +AioContext *ctx = bdrv_get_aio_context(s->bs);
> +aio_context_acquire(ctx);
>  s->bitmap = bdrv_create_dirty_bitmap(s->bs, granularity,
>   s->bitmap_name, &local_err);
> +aio_context_release(ctx);
>  if (!s->bitmap) {
>  error_report_err(local_err);
>  return -EINVAL;

I was going to suggest to put that code inside brdv_create_dirty_bitmap,
because migration come is already complex enough without knowing about
AioContext, but it appears that this pattern is already used in several
places.

> @@ -833,7 +836,10 @@ static int dirty_bitmap_load_start(QEMUFile *f, 
> DBMLoadState *s)
>  
>  bdrv_disable_dirty_bitmap(s->bitmap);
>  if (flags & DIRTY_BITMAP_MIG_START_FLAG_ENABLED) {
> +AioContext *ctx = bdrv_get_aio_context(s->bs);
> +aio_context_acquire(ctx);
>  bdrv_dirty_bitmap_create_successor(s->bitmap, &local_err);
> +aio_context_release(ctx);
>  if (local_err) {
>  error_report_err(local_err);
>  return -EINVAL;

for(i=1; i < 1000; i++)
 printf(""I will not grep what a successor of a dirty bitmap
 is\n");

Later, Juan.




Re: [PATCH 2/3] tests/migration: Add -fno-stack-protector

2023-07-31 Thread Thomas Huth

On 31/07/2023 08.58, Akihiko Odaki wrote:

A build of GCC 13.2 will have stack protector enabled by default if it
was configured with --enable-default-ssp option. For such a compiler,
it is necessary to explicitly disable stack protector when linking
without standard libraries.

Signed-off-by: Akihiko Odaki 
---
  tests/migration/s390x/Makefile | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/migration/s390x/Makefile b/tests/migration/s390x/Makefile
index 6393c3e5b9..6671de2efc 100644
--- a/tests/migration/s390x/Makefile
+++ b/tests/migration/s390x/Makefile
@@ -6,8 +6,8 @@ all: a-b-bios.h
  fwdir=../../../pc-bios/s390-ccw
  
  CFLAGS+=-ffreestanding -fno-delete-null-pointer-checks -fPIE -Os \

-   -msoft-float -march=z900 -fno-asynchronous-unwind-tables -Wl,-pie \
-   -Wl,--build-id=none -nostdlib
+   -msoft-float -march=z900 -fno-asynchronous-unwind-tables \
+   -fno-stack-protector -Wl,-pie -Wl,--build-id=none -nostdlib
  
  a-b-bios.h: s390x.elf

echo "$$__note" > header.tmp


Reviewed-by: Thomas Huth 




Re: [PATCH 1/3] pc-bios/optionrom: Add -fno-stack-protector

2023-07-31 Thread Thomas Huth

On 31/07/2023 08.58, Akihiko Odaki wrote:

A build of GCC 13.2 will have stack protector enabled by default if it
was configured with --enable-default-ssp option. For such a compiler,
it is necessary to explicitly disable stack protector when linking
without standard libraries.

Signed-off-by: Akihiko Odaki 
---
  pc-bios/optionrom/Makefile | 2 +-
  pc-bios/s390-ccw/Makefile  | 2 +-
  2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pc-bios/optionrom/Makefile b/pc-bios/optionrom/Makefile
index b1fff0ba6c..f220d81f2c 100644
--- a/pc-bios/optionrom/Makefile
+++ b/pc-bios/optionrom/Makefile
@@ -19,7 +19,7 @@ quiet-command = $(call quiet-@,$2 $@)$1
  override CPPFLAGS += -MMD -MP -MT $@ -MF $(@D)/$(*F).d
  
  override CFLAGS += -march=i486 -Wall $(EXTRA_CFLAGS) -m16

-override CFLAGS += -ffreestanding -I$(TOPSRC_DIR)/include
+override CFLAGS += -ffreestanding -fno-stack-protector -I$(TOPSRC_DIR)/include
  
  cc-test = $(CC) -Werror $1 -c -o /dev/null -xc /dev/null >/dev/null 2>/dev/null

  cc-option = if $(call cc-test, $1); then \
diff --git a/pc-bios/s390-ccw/Makefile b/pc-bios/s390-ccw/Makefile
index acfcd1e71a..446d448913 100644
--- a/pc-bios/s390-ccw/Makefile
+++ b/pc-bios/s390-ccw/Makefile
@@ -38,7 +38,7 @@ OBJECTS = start.o main.o bootmap.o jump2ipl.o sclp.o menu.o \
  EXTRA_CFLAGS += -Wall
  EXTRA_CFLAGS += -ffreestanding -fno-delete-null-pointer-checks -fno-common 
-fPIE
  EXTRA_CFLAGS += -fwrapv -fno-strict-aliasing -fno-asynchronous-unwind-tables
-EXTRA_CFLAGS += -msoft-float
+EXTRA_CFLAGS += -fno-stack-protector -msoft-float
  EXTRA_CFLAGS += -std=gnu99
  LDFLAGS += -Wl,-pie -nostdlib
  


Both Makefiles seem to add -no-stack-protector in other lines already, so 
this patch does not seem to be necessary?


 Thomas




Re: [PATCH 3/3] tests/tcg: Add -fno-stack-protector

2023-07-31 Thread Ilya Leoshkevich
On Mon, 2023-07-31 at 15:58 +0900, Akihiko Odaki wrote:
> A build of GCC 13.2 will have stack protector enabled by default if
> it
> was configured with --enable-default-ssp option. For such a compiler,
> it is necessary to explicitly disable stack protector when linking
> without standard libraries.
> 
> Signed-off-by: Akihiko Odaki 
> ---
>  tests/tcg/mips/hello-mips.c   | 4 ++--
>  tests/tcg/aarch64/Makefile.softmmu-target | 2 +-
>  tests/tcg/aarch64/Makefile.target | 2 +-
>  tests/tcg/alpha/Makefile.softmmu-target   | 2 +-
>  tests/tcg/arm/Makefile.target | 2 +-
>  tests/tcg/cris/Makefile.target    | 2 +-
>  tests/tcg/hexagon/Makefile.target | 2 +-
>  tests/tcg/i386/Makefile.softmmu-target    | 2 +-
>  tests/tcg/i386/Makefile.target    | 2 +-
>  tests/tcg/loongarch64/Makefile.softmmu-target | 2 +-
>  tests/tcg/minilib/Makefile.target | 2 +-
>  tests/tcg/mips/Makefile.target    | 2 +-
>  tests/tcg/nios2/Makefile.softmmu-target   | 2 +-
>  tests/tcg/s390x/Makefile.softmmu-target   | 2 +-
>  tests/tcg/x86_64/Makefile.softmmu-target  | 2 +-
>  15 files changed, 16 insertions(+), 16 deletions(-)

Acked-by: Ilya Leoshkevich 


Re: [PATCH 3/3] tests/tcg: Add -fno-stack-protector

2023-07-31 Thread Thomas Huth

On 31/07/2023 08.58, Akihiko Odaki wrote:

A build of GCC 13.2 will have stack protector enabled by default if it
was configured with --enable-default-ssp option. For such a compiler,
it is necessary to explicitly disable stack protector when linking
without standard libraries.

Signed-off-by: Akihiko Odaki 
---
  tests/tcg/mips/hello-mips.c   | 4 ++--
  tests/tcg/aarch64/Makefile.softmmu-target | 2 +-
  tests/tcg/aarch64/Makefile.target | 2 +-
  tests/tcg/alpha/Makefile.softmmu-target   | 2 +-
  tests/tcg/arm/Makefile.target | 2 +-
  tests/tcg/cris/Makefile.target| 2 +-
  tests/tcg/hexagon/Makefile.target | 2 +-
  tests/tcg/i386/Makefile.softmmu-target| 2 +-
  tests/tcg/i386/Makefile.target| 2 +-
  tests/tcg/loongarch64/Makefile.softmmu-target | 2 +-
  tests/tcg/minilib/Makefile.target | 2 +-
  tests/tcg/mips/Makefile.target| 2 +-
  tests/tcg/nios2/Makefile.softmmu-target   | 2 +-
  tests/tcg/s390x/Makefile.softmmu-target   | 2 +-
  tests/tcg/x86_64/Makefile.softmmu-target  | 2 +-
  15 files changed, 16 insertions(+), 16 deletions(-)


I think the changes to the Makefile.softmmu-target files should rather be 
done in a central place, in tests/tcg/Makefile.target, right after the 
"EXTRA_CFLAGS += -ffreestanding" there.


 Thomas




[PATCH 3/5] linux-user: Use MAP_FIXED_NOREPLACE for do_brk()

2023-07-31 Thread Akihiko Odaki
MAP_FIXED_NOREPLACE can ensure the mapped address is fixed without
concerning that the new mapping overwrites something else.

Signed-off-by: Akihiko Odaki 
---
 linux-user/syscall.c | 17 +++--
 1 file changed, 3 insertions(+), 14 deletions(-)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index b9d2ec02f9..ac429a185a 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -854,17 +854,12 @@ abi_long do_brk(abi_ulong brk_val)
 return target_brk;
 }
 
-/* We need to allocate more memory after the brk... Note that
- * we don't use MAP_FIXED because that will map over the top of
- * any existing mapping (like the one with the host libc or qemu
- * itself); instead we treat "mapped but at wrong address" as
- * a failure and unmap again.
- */
 if (new_host_brk_page > brk_page) {
 new_alloc_size = new_host_brk_page - brk_page;
 mapped_addr = target_mmap(brk_page, new_alloc_size,
-  PROT_READ|PROT_WRITE,
-  MAP_ANON|MAP_PRIVATE, 0, 0);
+  PROT_READ | PROT_WRITE,
+  MAP_FIXED_NOREPLACE | MAP_ANON | MAP_PRIVATE,
+  0, 0);
 } else {
 new_alloc_size = 0;
 mapped_addr = brk_page;
@@ -883,12 +878,6 @@ abi_long do_brk(abi_ulong brk_val)
 target_brk = brk_val;
 brk_page = new_host_brk_page;
 return target_brk;
-} else if (mapped_addr != -1) {
-/* Mapped but at wrong address, meaning there wasn't actually
- * enough space for this brk.
- */
-target_munmap(mapped_addr, new_alloc_size);
-mapped_addr = -1;
 }
 
 #if defined(TARGET_ALPHA)
-- 
2.41.0




[PATCH 0/5] linux-user: brk/mmap fixes

2023-07-31 Thread Akihiko Odaki
linux-user was failing on M2 MacBook Air. Digging into the details, I found
several bugs in brk and mmap so here are fixes.

Akihiko Odaki (5):
  linux-user: Unset MAP_FIXED_NOREPLACE for host
  linux-user: Do not call get_errno() in do_brk()
  linux-user: Use MAP_FIXED_NOREPLACE for do_brk()
  linux-user: Do nothing if too small brk is specified
  linux-user: Do not align brk with host page size

 linux-user/elfload.c |  4 +--
 linux-user/mmap.c|  2 ++
 linux-user/syscall.c | 67 +---
 3 files changed, 17 insertions(+), 56 deletions(-)

-- 
2.41.0




[PATCH 4/5] linux-user: Do nothing if too small brk is specified

2023-07-31 Thread Akihiko Odaki
Linux 6.4.7 does nothing when a value smaller than the initial brk is
specified.

Fixes: 86f04735ac ("linux-user: Fix brk() to release pages")
Signed-off-by: Akihiko Odaki 
---
 linux-user/syscall.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index ac429a185a..ebdc8c144c 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -820,14 +820,14 @@ abi_long do_brk(abi_ulong brk_val)
 
 /* brk pointers are always untagged */
 
-/* return old brk value if brk_val unchanged or zero */
-if (!brk_val || brk_val == target_brk) {
+/* return old brk value if brk_val unchanged */
+if (brk_val == target_brk) {
 return target_brk;
 }
 
 /* do not allow to shrink below initial brk value */
 if (brk_val < initial_target_brk) {
-brk_val = initial_target_brk;
+return target_brk;
 }
 
 new_brk = TARGET_PAGE_ALIGN(brk_val);
-- 
2.41.0




[PATCH 5/5] linux-user: Do not align brk with host page size

2023-07-31 Thread Akihiko Odaki
do_brk() minimizes calls into target_mmap() by aligning the address
with host page size, which is potentially larger than the target page
size. However, the current implementation of this optimization has two
bugs:

- The start of brk is rounded up with the host page size while brk
  advertises an address aligned with the target page size as the
  beginning of brk. This makes the beginning of brk unmapped.
- Content clearing after mapping is flawed. The size to clear is
  specified as HOST_PAGE_ALIGN(brk_page) - brk_page, but brk_page is
  aligned with the host page size so it is always zero.

This optimization actually has no practical benefit. It makes difference
when brk() is called multiple times with values in a range of the host
page size. However, sophisticated memory allocators try to avoid to
make such frequent brk() calls. For example, glibc 2.37 calls brk() to
shrink the heap only when there is a room more than 128 KiB. It is
rare to have a page size larger than 128 KiB if it happens.

Let's remove the optimization to fix the bugs and make the code simpler.

Fixes: 86f04735ac ("linux-user: Fix brk() to release pages")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1616
Signed-off-by: Akihiko Odaki 
---
 linux-user/elfload.c |  4 ++--
 linux-user/syscall.c | 54 ++--
 2 files changed, 14 insertions(+), 44 deletions(-)

diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 861ec07abc..2aee2298ec 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -3678,8 +3678,8 @@ int load_elf_binary(struct linux_binprm *bprm, struct 
image_info *info)
  * to mmap pages in this space.
  */
 if (info->reserve_brk) {
-abi_ulong start_brk = HOST_PAGE_ALIGN(info->brk);
-abi_ulong end_brk = HOST_PAGE_ALIGN(info->brk + info->reserve_brk);
+abi_ulong start_brk = TARGET_PAGE_ALIGN(info->brk);
+abi_ulong end_brk = TARGET_PAGE_ALIGN(info->brk + info->reserve_brk);
 target_munmap(start_brk, end_brk - start_brk);
 }
 
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index ebdc8c144c..475260b7ce 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -802,81 +802,51 @@ static inline int host_to_target_sock_type(int host_type)
 }
 
 static abi_ulong target_brk, initial_target_brk;
-static abi_ulong brk_page;
 
 void target_set_brk(abi_ulong new_brk)
 {
 target_brk = TARGET_PAGE_ALIGN(new_brk);
 initial_target_brk = target_brk;
-brk_page = HOST_PAGE_ALIGN(target_brk);
 }
 
 /* do_brk() must return target values and target errnos. */
 abi_long do_brk(abi_ulong brk_val)
 {
 abi_long mapped_addr;
-abi_ulong new_alloc_size;
-abi_ulong new_brk, new_host_brk_page;
+abi_ulong new_brk;
+abi_ulong old_brk;
 
 /* brk pointers are always untagged */
 
-/* return old brk value if brk_val unchanged */
-if (brk_val == target_brk) {
-return target_brk;
-}
-
 /* do not allow to shrink below initial brk value */
 if (brk_val < initial_target_brk) {
 return target_brk;
 }
 
 new_brk = TARGET_PAGE_ALIGN(brk_val);
-new_host_brk_page = HOST_PAGE_ALIGN(brk_val);
+old_brk = TARGET_PAGE_ALIGN(target_brk);
 
-/* brk_val and old target_brk might be on the same page */
-if (new_brk == TARGET_PAGE_ALIGN(target_brk)) {
-/* empty remaining bytes in (possibly larger) host page */
-memset(g2h_untagged(new_brk), 0, new_host_brk_page - new_brk);
+/* new and old target_brk might be on the same page */
+if (new_brk == old_brk) {
 target_brk = brk_val;
 return target_brk;
 }
 
 /* Release heap if necesary */
-if (new_brk < target_brk) {
-/* empty remaining bytes in (possibly larger) host page */
-memset(g2h_untagged(new_brk), 0, new_host_brk_page - new_brk);
-
-/* free unused host pages and set new brk_page */
-target_munmap(new_host_brk_page, brk_page - new_host_brk_page);
-brk_page = new_host_brk_page;
+if (new_brk < old_brk) {
+target_munmap(new_brk, old_brk - new_brk);
 
 target_brk = brk_val;
 return target_brk;
 }
 
-if (new_host_brk_page > brk_page) {
-new_alloc_size = new_host_brk_page - brk_page;
-mapped_addr = target_mmap(brk_page, new_alloc_size,
-  PROT_READ | PROT_WRITE,
-  MAP_FIXED_NOREPLACE | MAP_ANON | MAP_PRIVATE,
-  0, 0);
-} else {
-new_alloc_size = 0;
-mapped_addr = brk_page;
-}
-
-if (mapped_addr == brk_page) {
-/* Heap contents are initialized to zero, as for anonymous
- * mapped pages.  Technically the new pages are already
- * initialized to zero since they *are* anonymous mapped
- * pages, however we have to take care with the contents that
- * come from the remaining part of the previous page: it may
- 

[PATCH 2/5] linux-user: Do not call get_errno() in do_brk()

2023-07-31 Thread Akihiko Odaki
Later the returned value is compared with -1, and negated errno is not
expected.

Fixes: 00faf08c95 ("linux-user: Don't use MAP_FIXED in do_brk()")
Signed-off-by: Akihiko Odaki 
---
 linux-user/syscall.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 95727a816a..b9d2ec02f9 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -862,9 +862,9 @@ abi_long do_brk(abi_ulong brk_val)
  */
 if (new_host_brk_page > brk_page) {
 new_alloc_size = new_host_brk_page - brk_page;
-mapped_addr = get_errno(target_mmap(brk_page, new_alloc_size,
-PROT_READ|PROT_WRITE,
-MAP_ANON|MAP_PRIVATE, 0, 0));
+mapped_addr = target_mmap(brk_page, new_alloc_size,
+  PROT_READ|PROT_WRITE,
+  MAP_ANON|MAP_PRIVATE, 0, 0);
 } else {
 new_alloc_size = 0;
 mapped_addr = brk_page;
-- 
2.41.0




[PATCH 1/5] linux-user: Unset MAP_FIXED_NOREPLACE for host

2023-07-31 Thread Akihiko Odaki
Passing MAP_FIXED_NOREPLACE to host will fail if the virtual
address space is reserved with mmap. Replace it with MAP_FIXED.

Signed-off-by: Akihiko Odaki 
---
 linux-user/mmap.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/linux-user/mmap.c b/linux-user/mmap.c
index a5dfb56545..2f26cbaf5d 100644
--- a/linux-user/mmap.c
+++ b/linux-user/mmap.c
@@ -610,6 +610,8 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int 
target_prot,
 goto fail;
 }
 
+flags = (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED;
+
 /*
  * worst case: we cannot map the file because the offset is not
  * aligned, so we read it
-- 
2.41.0




Re: [PATCH] vdpa: set old virtio status at cvq isolation probing end

2023-07-31 Thread Eugenio Perez Martin
On Mon, Jul 31, 2023 at 8:36 AM Jason Wang  wrote:
>
> On Wed, Jul 26, 2023 at 2:27 PM Eugenio Perez Martin
>  wrote:
> >
> > On Wed, Jul 26, 2023 at 4:07 AM Jason Wang  wrote:
> > >
> > > On Wed, Jul 26, 2023 at 2:21 AM Eugenio Pérez  wrote:
> > > >
> > > > The device already has a virtio status set by vhost_vdpa_init by the
> > > > time vhost_vdpa_probe_cvq_isolation is called. vhost_vdpa_init set
> > > > S_ACKNOWLEDGE and S_DRIVER, so it is invalid to just reset it.
> > > >
> > > > It is invalid to start the device after it, but all devices seems to be
> > > > fine with it.  Fixing qemu so it follows virtio start procedure.
> > > >
> > > > Fixes: 152128d64697 ("vdpa: move CVQ isolation check to 
> > > > net_init_vhost_vdpa")
> > > > Reported-by: Dragos Tatulea 
> > > > Signed-off-by: Eugenio Pérez 
> > > > ---
> > > >  net/vhost-vdpa.c | 2 ++
> > > >  1 file changed, 2 insertions(+)
> > > >
> > > > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > > > index 9795306742..d7e2b714b4 100644
> > > > --- a/net/vhost-vdpa.c
> > > > +++ b/net/vhost-vdpa.c
> > > > @@ -1333,6 +1333,8 @@ static int vhost_vdpa_probe_cvq_isolation(int 
> > > > device_fd, uint64_t features,
> > > >  out:
> > > >  status = 0;
> > > >  ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status);
> > > > +status = VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER;
> > > > +ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status);
> > >
> > > So if we fail after FEATURES_OK, this basically clears that bit. Spec
> > > doesn't say it can or not, I wonder if a reset is better?
> > >
> >
> > I don't follow this, the reset is just above the added code, isn't it?
>
> I meant for error path:
>
> E.g:
> uint8_t status = VIRTIO_CONFIG_S_ACKNOWLEDGE |
>  VIRTIO_CONFIG_S_DRIVER |
>  VIRTIO_CONFIG_S_FEATURES_OK;
> ...
> r = ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status);
> 
> if (cvq_group != -ENOTSUP) {
> r = cvq_group;
> goto out;
> }
>
> out:
> status = VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER;
> ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status);
>
> We're basically clearing FEATURES_OK?
>

Yes, it is the state that previous functions (vhost_vdpa_init) set. We
need to leave it that way, either if the backend supports cvq
isolation or not, or in the case of an error. Not doing that way makes
vhost_dev_start (and vhost_vdpa_set_features) set the features before
setting VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER.
Otherwise, the guest can (and do) access to config space before
_S_ACKNOWLEDGE | _S_DRIVER.


> >
> > > Btw, spec requires a read of status after setting FEATURES_OK, this
> > > seems to be missed in the current code.
> > >
> >
> > I'm ok with that, but this patch does not touch that part.
> >
> > To fix this properly we should:
> > - Expose vhost_vdpa_set_dev_features_fd as we did in previous versions
> > of the series that added vhost_vdpa_probe_cvq_isolation [1].
> > - Get status after vhost_vdpa_add_status, so both vhost start code and
> > this follows the standard properly.
> >
> > Is it ok to do these on top of this patch?
>
> Fine.
>
> Thanks
>
> >
> > Thanks!
> >
> > [1] 
> > https://lore.kernel.org/qemu-devel/20230509154435.1410162-4-epere...@redhat.com/
> >
> >
> > > Thanks
> > >
> > > >  return r;
> > > >  }
> > > >
> > > > --
> > > > 2.39.3
> > > >
> > >
> >
>




Re: [RFC PATCH 4/6] hw/i386/intel_iommu: Fix VTD_IR_TableEntry for ms_struct layout

2023-07-31 Thread Thomas Huth

On 28/07/2023 16.37, Daniel P. Berrangé wrote:

On Fri, Jul 28, 2023 at 04:27:46PM +0200, Thomas Huth wrote:

We might want to compile QEMU with Clang on Windows - but it
does not support the __attribute__((gcc_struct)) yet. So we
have to make sure that the structs will stay the same when
the compiler uses the "ms_struct" layout. The VTD_IR_TableEntry
struct is affected - rewrite it a little bit so that it works
fine with both struct layouts.

Reported-by: Daniel P. Berrangé 
Signed-off-by: Thomas Huth 
---
  include/hw/i386/intel_iommu.h | 14 --
  hw/i386/intel_iommu.c |  2 +-
  2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 89dcbc5e1e..08bf220393 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -204,18 +204,20 @@ union VTD_IR_TableEntry {
  #endif
  uint32_t dest_id;/* Destination ID */
  uint16_t source_id;  /* Source-ID */
+uint16_t __reserved_2;   /* Reserved 2 */
  #if HOST_BIG_ENDIAN
-uint64_t __reserved_2:44;/* Reserved 2 */
-uint64_t sid_vtype:2;/* Source-ID Validation Type */
-uint64_t sid_q:2;/* Source-ID Qualifier */
+uint32_t __reserved_3:28;/* Reserved 3 */
+uint32_t sid_vtype:2;/* Source-ID Validation Type */
+uint32_t sid_q:2;/* Source-ID Qualifier */
  #else
-uint64_t sid_q:2;/* Source-ID Qualifier */
-uint64_t sid_vtype:2;/* Source-ID Validation Type */
-uint64_t __reserved_2:44;/* Reserved 2 */
+uint32_t sid_q:2;/* Source-ID Qualifier */
+uint32_t sid_vtype:2;/* Source-ID Validation Type */
+uint32_t __reserved_3:28;/* Reserved 3 */


Hasn't this has changed the struct layout in the else clause

  Old layout:

source_id : 16
sid_q : 2
sid_vtype : 2
reserved_2 : 44

  New layout

source_id : 16
reserved_2 : 16
sid_q : 2
sid_vtype : 2
reserved_3 : 28


Drat, you're right, I missed the fact that the whole stuff is read and 
written via the uint64_t data[2] part from the union in the code ... :-(



Was there something wrong with the change I suggested to
just make source_id be a bitfield too:

uint64_t source_id: 16;  /* Source-ID */

which could make ms_struct layout avoid padding to the following
bitfields.


That likely works, but I think we then need to add it then twice, one time 
in the HOST_BIG_ENDIAN at the end, and one time in the #else part?


Anyway, that whole code looks like it's completely wrong on big endian 
machines. The struct is read via dma_memory_read() from guest memory, but 
then the values are never byte-swapped, except for the error_report and 
trace functions, e.g. entry->irte.present is used without calling 
le64_to_cpu() first.
entry->irte.source_id is swapped with le32_to_cpu() which looks also wrong 
since this is a 16 bit field.


Sigh. This is another good example why we shouldn't use bitfields at all in 
structures that exchange data. As Richard suggested in his reply, this 
really should be rewritten, e.g. with the stuff from hw/registerfields.h.


 Thomas




Re: [PATCH] target/s390x: Move trans_exc_code update to do_program_interrupt

2023-07-31 Thread Ilya Leoshkevich
On Fri, 2023-07-28 at 13:02 -0700, Richard Henderson wrote:
> On 7/28/23 12:55, Richard Henderson wrote:
> > This solves a problem in which the store to LowCore during tlb_fill
> > triggers a clean-page TB invalidation for page0 during translation,
> > which results in an assertion failure for locked pages.
> > 
> > By delaying the store until after the exception has been raised,
> > we will have unwound the pages locked for translation and the
> > problem does not arise.  There are plenty of other updates to
> > LowCore while delivering an interrupt/exception; trans_exc_code
> > does not need to be special.
> > 
> > Reported-by: Claudio Fontana 
> > Signed-off-by: Richard Henderson 
> > ---
> >   target/s390x/tcg/excp_helper.c | 42 +++--
> > -
> >   1 file changed, 29 insertions(+), 13 deletions(-)

[...]

> >   switch (env->int_pgm_code) {
> >   case PGM_PER:
> > -    if (env->per_perc_atmid & PER_CODE_EVENT_NULLIFICATION) {
> > -    break;
> > -    }
> > -    /* FALL THROUGH */
> > +    advance = !(env->per_perc_atmid &
> > PER_CODE_EVENT_NULLIFICATION);
> > +    break;
> > +    case PGM_ASCE_TYPE:
> > +    case PGM_REG_FIRST_TRANS:
> > +    case PGM_REG_SEC_TRANS:
> > +    case PGM_REG_THIRD_TRANS:
> > +    case PGM_SEGMENT_TRANS:
> > +    case PGM_PAGE_TRANS:
> > +    assert(env->int_pgm_code == env->tlb_fill_exc);
> > +    set_trans_exc_code = true;
> > +    break;
> 
> I should have mentioned that this block of exceptions came from page
> 3-76 
> (Translation-Exception Identification for DAT Exceptions) of the 13th
> edition of the PoO.
> 
> > +    case PGM_PROTECTION:
> > +    case PGM_TRANS_SPEC:
> > +    assert(env->int_pgm_code == env->tlb_fill_exc);
> > +    set_trans_exc_code = true;
> > +    advance = true;
> > +    break;
> 
> These exceptions came from seeing an early kernel fault, grepping for
> the set of 
> exceptions raised in mmu_helper.c, and eliminating PGM_ADDRESSING per
> the first hunk.

Does POp specify that the CPU stores Translation-Exception
Identification on Translation-Specification Exceptions
(PGM_TRANS_SPEC)? I re-read the 0xA8 documentation a few times, but
could not find it.

It's also interesting what the kernel was attempting when it got
PGM_TRANS_SPEC and recovered from it. Maybe something else is wrong?

Other than the POp question:

Reviewed-by: Ilya Leoshkevich 

> 
> I wasn't sure where to look for the full specification of exception
> effects, but this did 
> solve the kernel fault.
> 
> 
> r~




Re: [PATCH v5 0/3] linux-user: Fix and optimize target memory layout

2023-07-31 Thread Michael Tokarev

28.07.2023 20:31, Helge Deller wrote:


NOTE:
- this patch series is for qemu v8.1.0-rc ONLY.
- do not apply on top of v8.0-stable series, which uses
   a different search algorithm for free mmap memory
   and thus will give incorrect memory layouts.


Hm. I included a previous version of this patchset in debian
qemu 8.0 package.  Wonder what should we do on 8.0 and on
debian..

/mjt



Re: [RFC PATCH] target/i386: Truncate ESP when exiting from long mode

2023-07-31 Thread Ard Biesheuvel
On Wed, 26 Jul 2023 at 17:01, Richard Henderson
 wrote:
>
> On 7/26/23 01:17, Ard Biesheuvel wrote:
> > While working on some EFI boot changes for Linux/x86, I noticed that TCG 
> > deviates from
> > bare metal when it comes to how it handles the value of the stack pointer 
> > register RSP
> > when dropping out of long mode.
> >
> > On bare metal, RSP is truncated to 32 bits, even if the code that runs in 
> > 32-bit
> > protected mode never uses the stack at all (and uses a long jump rather 
> > than long
> > return to switch back to long mode). This means 64-bit code cannot rely on 
> > RSP
> > surviving any excursions into 32-bit protected mode (with paging disabled).
> >
> > Let's align TCG with this behavior, so that code that relies on RSP 
> > retaining its value
> > does not inadvertently work while bare metal does not.
> >
> > Observed on Intel Ice Lake cores.
> >
> > Cc: Paolo Bonzini Cc: Richard
> > Henderson Cc: Eduardo 
> > Habkost
> > Link:https://lore.kernel.org/all/20230711091453.2543622-11-a...@kernel.org/
> > Signed-off-by: Ard Biesheuvel --- I used this patch 
> > locally to
> > reproduce an issue that was reported on Ice Lake but didn't trigger in my 
> > QEMU
> > testing.
> >
> > Hints welcome on where the architectural behavior is specified, and in 
> > particular,
> > whether or not other 64-bit GPRs can be relied upon to preserve their full 
> > 64-bit
> > length values.
>
> No idea about chapter and verse, but it has the feel of being part and parcel 
> with the
> truncation of eip.  While esp is always special, I suspect that none of the 
> GPRs can be
> relied on carrying all bits.
>
> I'm happy with the change though, since similar behaviour can be observed on 
> hw.
>
> Acked-by: Richard Henderson 
>

I experimented with truncating all GPRs that exist in 32-bit mode, and
this actually breaks kexec on Linux if it happens to load the kernel
above 4G (which it appears to do reproducibly when sufficient memory
is available)

This is due to the 4/5 level paging switch trampoline, which is called
while RBX, RBP and RSI are live and refer to assets in memory that may
reside above 4G.

I am fixing that code, but it does mean we should probably limit this
change to ESP (as apparently, current hw only happens to truncate ESP
but no other GPRs)



[PATCH v2] target/riscv: Use existing lookup tables for MixColumns

2023-07-31 Thread Ard Biesheuvel
The AES MixColumns and InvMixColumns operations are relatively
expensive 4x4 matrix multiplications in GF(2^8), which is why C
implementations usually rely on precomputed lookup tables rather than
performing the calculations on demand.

Given that we already carry those tables in QEMU, we can just grab the
right value in the implementation of the RISC-V AES32 instructions. Note
that the tables in question are permuted according to the respective
Sbox, so we can omit the Sbox lookup as well in this case.

Cc: Richard Henderson 
Cc: Philippe Mathieu-Daudé 
Cc: Zewen Ye 
Cc: Weiwei Li 
Cc: Junqiang Wang 
Signed-off-by: Ard Biesheuvel 
---
v2:
- ignore host endianness and use be32_to_cpu() unconditionally

 crypto/aes.c |  4 +--
 include/crypto/aes.h |  7 
 target/riscv/crypto_helper.c | 34 +++-
 3 files changed, 13 insertions(+), 32 deletions(-)

diff --git a/crypto/aes.c b/crypto/aes.c
index 836d7d5c0bf1b392..df4362ac6022eac2 100644
--- a/crypto/aes.c
+++ b/crypto/aes.c
@@ -272,7 +272,7 @@ AES_Td3[x] = Si[x].[09, 0d, 0b, 0e];
 AES_Td4[x] = Si[x].[01, 01, 01, 01];
 */
 
-static const uint32_t AES_Te0[256] = {
+const uint32_t AES_Te0[256] = {
 0xc66363a5U, 0xf87c7c84U, 0xee99U, 0xf67b7b8dU,
 0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
 0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
@@ -607,7 +607,7 @@ static const uint32_t AES_Te4[256] = {
 0xb0b0b0b0U, 0x54545454U, 0xU, 0x16161616U,
 };
 
-static const uint32_t AES_Td0[256] = {
+const uint32_t AES_Td0[256] = {
 0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
 0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
 0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
diff --git a/include/crypto/aes.h b/include/crypto/aes.h
index 709d4d226bfe158b..381f24c9022d2aa8 100644
--- a/include/crypto/aes.h
+++ b/include/crypto/aes.h
@@ -30,4 +30,11 @@ void AES_decrypt(const unsigned char *in, unsigned char *out,
 extern const uint8_t AES_sbox[256];
 extern const uint8_t AES_isbox[256];
 
+/*
+AES_Te0[x] = S [x].[02, 01, 01, 03];
+AES_Td0[x] = Si[x].[0e, 09, 0d, 0b];
+*/
+
+extern const uint32_t AES_Te0[256], AES_Td0[256];
+
 #endif
diff --git a/target/riscv/crypto_helper.c b/target/riscv/crypto_helper.c
index 99d85a618843e87e..4d65945429c6dcc4 100644
--- a/target/riscv/crypto_helper.c
+++ b/target/riscv/crypto_helper.c
@@ -25,29 +25,6 @@
 #include "crypto/aes-round.h"
 #include "crypto/sm4.h"
 
-#define AES_XTIME(a) \
-((a << 1) ^ ((a & 0x80) ? 0x1b : 0))
-
-#define AES_GFMUL(a, b) (( \
-(((b) & 0x1) ? (a) : 0) ^ \
-(((b) & 0x2) ? AES_XTIME(a) : 0) ^ \
-(((b) & 0x4) ? AES_XTIME(AES_XTIME(a)) : 0) ^ \
-(((b) & 0x8) ? AES_XTIME(AES_XTIME(AES_XTIME(a))) : 0)) & 0xFF)
-
-static inline uint32_t aes_mixcolumn_byte(uint8_t x, bool fwd)
-{
-uint32_t u;
-
-if (fwd) {
-u = (AES_GFMUL(x, 3) << 24) | (x << 16) | (x << 8) |
-(AES_GFMUL(x, 2) << 0);
-} else {
-u = (AES_GFMUL(x, 0xb) << 24) | (AES_GFMUL(x, 0xd) << 16) |
-(AES_GFMUL(x, 0x9) << 8) | (AES_GFMUL(x, 0xe) << 0);
-}
-return u;
-}
-
 #define sext32_xlen(x) (target_ulong)(int32_t)(x)
 
 static inline target_ulong aes32_operation(target_ulong shamt,
@@ -55,23 +32,20 @@ static inline target_ulong aes32_operation(target_ulong 
shamt,
bool enc, bool mix)
 {
 uint8_t si = rs2 >> shamt;
-uint8_t so;
 uint32_t mixed;
 target_ulong res;
 
 if (enc) {
-so = AES_sbox[si];
 if (mix) {
-mixed = aes_mixcolumn_byte(so, true);
+mixed = be32_to_cpu(AES_Te0[si]);
 } else {
-mixed = so;
+mixed = AES_sbox[si];
 }
 } else {
-so = AES_isbox[si];
 if (mix) {
-mixed = aes_mixcolumn_byte(so, false);
+mixed = be32_to_cpu(AES_Td0[si]);
 } else {
-mixed = so;
+mixed = AES_isbox[si];
 }
 }
 mixed = rol32(mixed, shamt);
-- 
2.39.2




RE: [PATCH RFC v1 1/3] vfio/pci: detect the support of dynamic MSI-X allocation

2023-07-31 Thread Liu, Jing2
Hi C.

> On July 31, 2023 3:26 PM, Cédric Le Goater  wrote:
> 
> On 7/31/23 05:57, Liu, Jing2 wrote:
> > Hi C.
> >
> >> On July 28, 2023 4:44 PM, Cédric Le Goater  wrote:
> >>
> >> [ ... ]
> >>
> >>> Sorry I didn't quite understand "info.flags be tested against
> >> VFIO_IRQ_INFO_NORESIZE".
> >>> I saw kernel < 6.4 simply added NORESIZE to info.flags and latest
> >>> kernel adds
> >> if has_dyn_msix.
> >>> Would you please kindly describe more on your point?
> >>
> >> I was trying to find the conditions to detect safely that the kernel
> >> didn't have dynamic MSI-X support. Testing VFIO_IRQ_INFO_NORESIZE
> seems enough.
> >>
> > Oh, I see.
> >
>  In that case, QEMU should report an error and the trace event is
>  not
> >> needed.
> >>>
> >>> I replied an email with new error handling draft code based on my
> >>> understanding, which reports the error and need no trace. Could you
> >>> please
> >> help review if that is what we want?
> >>
> >> yes. It looked good. Please send a v1 !
> >
> > Thanks for reviewing that. I guess you mean v2 for next version 😊
> 
> Well, if you remove the RFC status, I think you should, this would still be a 
> v1.
> 
Oh, got it. Thanks for telling this.

Jing

> Thanks,
> 
> C.



Re: [PATCH] vdpa: set old virtio status at cvq isolation probing end

2023-07-31 Thread Jason Wang
On Mon, Jul 31, 2023 at 4:05 PM Eugenio Perez Martin
 wrote:
>
> On Mon, Jul 31, 2023 at 8:36 AM Jason Wang  wrote:
> >
> > On Wed, Jul 26, 2023 at 2:27 PM Eugenio Perez Martin
> >  wrote:
> > >
> > > On Wed, Jul 26, 2023 at 4:07 AM Jason Wang  wrote:
> > > >
> > > > On Wed, Jul 26, 2023 at 2:21 AM Eugenio Pérez  
> > > > wrote:
> > > > >
> > > > > The device already has a virtio status set by vhost_vdpa_init by the
> > > > > time vhost_vdpa_probe_cvq_isolation is called. vhost_vdpa_init set
> > > > > S_ACKNOWLEDGE and S_DRIVER, so it is invalid to just reset it.
> > > > >
> > > > > It is invalid to start the device after it, but all devices seems to 
> > > > > be
> > > > > fine with it.  Fixing qemu so it follows virtio start procedure.
> > > > >
> > > > > Fixes: 152128d64697 ("vdpa: move CVQ isolation check to 
> > > > > net_init_vhost_vdpa")
> > > > > Reported-by: Dragos Tatulea 
> > > > > Signed-off-by: Eugenio Pérez 
> > > > > ---
> > > > >  net/vhost-vdpa.c | 2 ++
> > > > >  1 file changed, 2 insertions(+)
> > > > >
> > > > > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > > > > index 9795306742..d7e2b714b4 100644
> > > > > --- a/net/vhost-vdpa.c
> > > > > +++ b/net/vhost-vdpa.c
> > > > > @@ -1333,6 +1333,8 @@ static int vhost_vdpa_probe_cvq_isolation(int 
> > > > > device_fd, uint64_t features,
> > > > >  out:
> > > > >  status = 0;
> > > > >  ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status);
> > > > > +status = VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER;
> > > > > +ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status);
> > > >
> > > > So if we fail after FEATURES_OK, this basically clears that bit. Spec
> > > > doesn't say it can or not, I wonder if a reset is better?
> > > >
> > >
> > > I don't follow this, the reset is just above the added code, isn't it?
> >
> > I meant for error path:
> >
> > E.g:
> > uint8_t status = VIRTIO_CONFIG_S_ACKNOWLEDGE |
> >  VIRTIO_CONFIG_S_DRIVER |
> >  VIRTIO_CONFIG_S_FEATURES_OK;
> > ...
> > r = ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status);
> > 
> > if (cvq_group != -ENOTSUP) {
> > r = cvq_group;
> > goto out;
> > }
> >
> > out:
> > status = VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER;
> > ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status);
> >
> > We're basically clearing FEATURES_OK?
> >
>
> Yes, it is the state that previous functions (vhost_vdpa_init) set. We
> need to leave it that way, either if the backend supports cvq
> isolation or not, or in the case of an error. Not doing that way makes
> vhost_dev_start (and vhost_vdpa_set_features) set the features before
> setting VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER.
> Otherwise, the guest can (and do) access to config space before
> _S_ACKNOWLEDGE | _S_DRIVER.

I'm not sure if it is supported by the spec or not (I meant clearing
the FEATURES_OK). Or maybe we need a reset here?

Thanks

>
>
> > >
> > > > Btw, spec requires a read of status after setting FEATURES_OK, this
> > > > seems to be missed in the current code.
> > > >
> > >
> > > I'm ok with that, but this patch does not touch that part.
> > >
> > > To fix this properly we should:
> > > - Expose vhost_vdpa_set_dev_features_fd as we did in previous versions
> > > of the series that added vhost_vdpa_probe_cvq_isolation [1].
> > > - Get status after vhost_vdpa_add_status, so both vhost start code and
> > > this follows the standard properly.
> > >
> > > Is it ok to do these on top of this patch?
> >
> > Fine.
> >
> > Thanks
> >
> > >
> > > Thanks!
> > >
> > > [1] 
> > > https://lore.kernel.org/qemu-devel/20230509154435.1410162-4-epere...@redhat.com/
> > >
> > >
> > > > Thanks
> > > >
> > > > >  return r;
> > > > >  }
> > > > >
> > > > > --
> > > > > 2.39.3
> > > > >
> > > >
> > >
> >
>




[RFC PATCH 00/24] plugins: Allow to read registers

2023-07-31 Thread Akihiko Odaki
I and other people in the University of Tokyo, where I research processor
design, found TCG plugins are very useful for processor design exploration.

The feature we find missing is the capability to read registers from
plugins. In this series, I propose to add such a capability by reusing
gdbstub code.

The reuse of gdbstub code ensures the long-term stability of the TCG plugin
interface for register access without incurring a burden to maintain yet
another interface for register access.

This process to add TCG plugin involves four major changes. The first one
is to add GDBFeature structure that represents a GDB feature, which usually
includes registers. GDBFeature can be generated from static XML files or
dynamically generated by architecture-specific code. In fact, this is a
refactoring independent of the feature this series adds, and potentially
it's benefitial even without the plugin feature. The plugin feature will
utilize this new structure to describe registers exposed to plugins.

The second one is to make gdb_read_register/gdb_write_register usable
outside of gdbstub context.

The third one is to actually make registers readable for plugins.

The last one is to allow to implement a QEMU plugin in C++. A plugin that
I'll describe later is written in C++.

The below is a summary of patches:
Patch 01 fixes a bug in execlog plugin.
Patch [02, 13] introduces GDBFeature.
Patch [14, 17] adds information useful for plugins to GDBFeature.
Patch [18, 20] makes registers readable outside of gdbstub context.
Patch [21, 22] adds the feature to read registers from plugins.
Patch [23, 24] makes it possible to write plugins in C++.

The execlog plugin will have new options to demonstrate the new feature.
I also have a plugin that uses this new feature to generate execution traces for
Sniper processor simulator, which is available at:
https://github.com/shioya-lab/sniper/tree/akihikodaki/bb

Akihiko Odaki (24):
  contrib/plugins: Use GRWLock in execlog
  gdbstub: Introduce GDBFeature structure
  gdbstub: Add num_regs member to GDBFeature
  gdbstub: Introduce gdb_find_static_feature()
  target/arm: Move the reference to arm-core.xml
  hw/core/cpu: Replace gdb_core_xml_file with gdb_core_feature
  target/arm: Use GDBFeature for dynamic XML
  target/ppc: Use GDBFeature for dynamic XML
  target/riscv: Use GDBFeature for dynamic XML
  gdbstub: Use GDBFeature for gdb_register_coprocessor
  gdbstub: Use GDBFeature for GDBRegisterState
  gdbstub: Simplify XML lookup
  hw/core/cpu: Remove gdb_get_dynamic_xml member
  gdbstub: Add members to identify registers to GDBFeature
  target/arm: Fill new members of GDBFeature
  target/ppc: Fill new members of GDBFeature
  target/riscv: Fill new members of GDBFeature
  hw/core/cpu: Add a parameter to gdb_read_register/gdb_write_register
  gdbstub: Hide gdb_has_xml
  gdbstub: Expose functions to read registers
  plugins: Allow to read registers
  contrib/plugins: Allow to log registers
  plugins: Support C++
  contrib/plugins: Add cc plugin

 MAINTAINERS   |   2 +-
 docs/devel/tcg-plugins.rst|  18 +++-
 configure |  15 ++-
 meson.build   |   2 +-
 gdbstub/internals.h   |   8 ++
 include/exec/gdbstub.h|  30 +++---
 include/hw/core/cpu.h |  15 ++-
 include/qemu/qemu-plugin.h|  69 +-
 target/alpha/cpu.h|   6 +-
 target/arm/cpu.h  |  37 
 target/arm/internals.h|   2 +-
 target/avr/cpu.h  |   6 +-
 target/cris/cpu.h |   9 +-
 target/hexagon/internal.h |   6 +-
 target/hppa/cpu.h |   6 +-
 target/i386/cpu.h |   6 +-
 target/loongarch/internals.h  |   6 +-
 target/m68k/cpu.h |   6 +-
 target/microblaze/cpu.h   |   6 +-
 target/mips/internal.h|   6 +-
 target/openrisc/cpu.h |   6 +-
 target/ppc/cpu-qom.h  |   3 +-
 target/ppc/cpu.h  |  15 +--
 target/riscv/cpu.h|  10 +-
 target/rx/cpu.h   |   6 +-
 target/s390x/cpu.h|   2 -
 target/s390x/s390x-internal.h |   6 +-
 target/sh4/cpu.h  |   6 +-
 target/sparc/cpu.h|   6 +-
 target/tricore/cpu.h  |   6 +-
 target/xtensa/cpu.h   |   6 +-
 contrib/plugins/execlog.c | 140 ---
 cpu.c |  11 ---
 gdbstub/gdbstub.c | 100 
 hw/core/cpu-common.c  |  16 +++-
 plugins/api.c |  40 
 stubs/gdbstub.c   |   6 +-
 target/alpha/gdbstub.c|   6 +-
 target/arm/cpu.c  |   6 +-
 target/arm/cpu64.c|   4 +-
 target/arm/gdbstub.c  | 172 ++
 target/arm/gdbstub64.c|  55 +++
 target/arm/tcg/cpu32.c|   3 +-
 target/avr/cpu.c  |   4 +-
 target/avr/gdbstub.c  |   6 +-
 target/cris/gdbstub.c |   9 +-
 target/hexagon/cpu

[RFC PATCH 01/24] contrib/plugins: Use GRWLock in execlog

2023-07-31 Thread Akihiko Odaki
execlog had the following comment:
> As we could have multiple threads trying to do this we need to
> serialise the expansion under a lock. Threads accessing already
> created entries can continue without issue even if the ptr array
> gets reallocated during resize.

However, when the ptr array gets reallocated, the other threads may have
a stale reference to the old buffer. This results in use-after-free.

Use GRWLock to properly fix this issue.

Fixes: 3d7caf145e ("contrib/plugins: add execlog to log instruction execution 
and memory access")
Signed-off-by: Akihiko Odaki 
---
 contrib/plugins/execlog.c | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/contrib/plugins/execlog.c b/contrib/plugins/execlog.c
index 7129d526f8..ce67acf145 100644
--- a/contrib/plugins/execlog.c
+++ b/contrib/plugins/execlog.c
@@ -19,7 +19,7 @@ QEMU_PLUGIN_EXPORT int qemu_plugin_version = 
QEMU_PLUGIN_VERSION;
 
 /* Store last executed instruction on each vCPU as a GString */
 static GPtrArray *last_exec;
-static GMutex expand_array_lock;
+static GRWLock expand_array_lock;
 
 static GPtrArray *imatches;
 static GArray *amatches;
@@ -28,18 +28,16 @@ static GArray *amatches;
  * Expand last_exec array.
  *
  * As we could have multiple threads trying to do this we need to
- * serialise the expansion under a lock. Threads accessing already
- * created entries can continue without issue even if the ptr array
- * gets reallocated during resize.
+ * serialise the expansion under a lock.
  */
 static void expand_last_exec(int cpu_index)
 {
-g_mutex_lock(&expand_array_lock);
+g_rw_lock_writer_unlock(&expand_array_lock);
 while (cpu_index >= last_exec->len) {
 GString *s = g_string_new(NULL);
 g_ptr_array_add(last_exec, s);
 }
-g_mutex_unlock(&expand_array_lock);
+g_rw_lock_writer_unlock(&expand_array_lock);
 }
 
 /**
@@ -51,8 +49,10 @@ static void vcpu_mem(unsigned int cpu_index, 
qemu_plugin_meminfo_t info,
 GString *s;
 
 /* Find vCPU in array */
+g_rw_lock_reader_lock(&expand_array_lock);
 g_assert(cpu_index < last_exec->len);
 s = g_ptr_array_index(last_exec, cpu_index);
+g_rw_lock_reader_unlock(&expand_array_lock);
 
 /* Indicate type of memory access */
 if (qemu_plugin_mem_is_store(info)) {
@@ -80,10 +80,14 @@ static void vcpu_insn_exec(unsigned int cpu_index, void 
*udata)
 GString *s;
 
 /* Find or create vCPU in array */
+g_rw_lock_reader_lock(&expand_array_lock);
 if (cpu_index >= last_exec->len) {
+g_rw_lock_reader_unlock(&expand_array_lock);
 expand_last_exec(cpu_index);
+g_rw_lock_reader_lock(&expand_array_lock);
 }
 s = g_ptr_array_index(last_exec, cpu_index);
+g_rw_lock_reader_unlock(&expand_array_lock);
 
 /* Print previous instruction in cache */
 if (s->len) {
-- 
2.41.0




[RFC PATCH 05/24] target/arm: Move the reference to arm-core.xml

2023-07-31 Thread Akihiko Odaki
Some subclasses overwrite gdb_core_xml_file member but others don't.
Always initialize the member in the subclasses for consistency.

This especially helps for AArch64; in a following change, the file
specified by gdb_core_xml_file is always looked up even if it's going to
be overwritten later. Looking up arm-core.xml results in an error as
it will not be embedded in the AArch64 build.

Signed-off-by: Akihiko Odaki 
---
 target/arm/cpu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 93c28d50e5..d71a162070 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -2354,7 +2354,6 @@ static void arm_cpu_class_init(ObjectClass *oc, void 
*data)
 cc->sysemu_ops = &arm_sysemu_ops;
 #endif
 cc->gdb_num_core_regs = 26;
-cc->gdb_core_xml_file = "arm-core.xml";
 cc->gdb_arch_name = arm_gdb_arch_name;
 cc->gdb_get_dynamic_xml = arm_gdb_get_dynamic_xml;
 cc->gdb_stop_before_watchpoint = true;
@@ -2376,8 +2375,10 @@ static void arm_cpu_instance_init(Object *obj)
 static void cpu_register_class_init(ObjectClass *oc, void *data)
 {
 ARMCPUClass *acc = ARM_CPU_CLASS(oc);
+CPUClass *cc = CPU_CLASS(acc);
 
 acc->info = data;
+cc->gdb_core_xml_file = "arm-core.xml";
 }
 
 void arm_cpu_register(const ARMCPUInfo *info)
-- 
2.41.0




[RFC PATCH 10/24] gdbstub: Use GDBFeature for gdb_register_coprocessor

2023-07-31 Thread Akihiko Odaki
This is a tree-wide change to introduce GDBFeature parameter to
gdb_register_coprocessor(). The new parameter just replaces num_regs
and xml parameters for now. GDBFeature will be utilized to simplify XML
lookup in a following change.

Signed-off-by: Akihiko Odaki 
---
 include/exec/gdbstub.h |  2 +-
 gdbstub/gdbstub.c  | 13 +++--
 target/arm/gdbstub.c   | 36 
 target/hexagon/cpu.c   |  3 +--
 target/loongarch/gdbstub.c |  2 +-
 target/m68k/helper.c   |  6 +++---
 target/microblaze/cpu.c|  5 +++--
 target/ppc/gdbstub.c   | 11 ++-
 target/riscv/gdbstub.c | 20 
 target/s390x/gdbstub.c | 28 +++-
 10 files changed, 61 insertions(+), 65 deletions(-)

diff --git a/include/exec/gdbstub.h b/include/exec/gdbstub.h
index 3115dc21c0..9b3da5b257 100644
--- a/include/exec/gdbstub.h
+++ b/include/exec/gdbstub.h
@@ -22,7 +22,7 @@ typedef int (*gdb_get_reg_cb)(CPUArchState *env, GByteArray 
*buf, int reg);
 typedef int (*gdb_set_reg_cb)(CPUArchState *env, uint8_t *buf, int reg);
 void gdb_register_coprocessor(CPUState *cpu,
   gdb_get_reg_cb get_reg, gdb_set_reg_cb set_reg,
-  int num_regs, const char *xml, int g_pos);
+  const GDBFeature *feature, int g_pos);
 
 /**
  * gdbserver_start: start the gdb server
diff --git a/gdbstub/gdbstub.c b/gdbstub/gdbstub.c
index 6f2e0cb06f..ab75f6686b 100644
--- a/gdbstub/gdbstub.c
+++ b/gdbstub/gdbstub.c
@@ -471,7 +471,7 @@ static int gdb_write_register(CPUState *cpu, uint8_t 
*mem_buf, int reg)
 
 void gdb_register_coprocessor(CPUState *cpu,
   gdb_get_reg_cb get_reg, gdb_set_reg_cb set_reg,
-  int num_regs, const char *xml, int g_pos)
+  const GDBFeature *feature, int g_pos)
 {
 GDBRegisterState *s;
 GDBRegisterState **p;
@@ -479,25 +479,26 @@ void gdb_register_coprocessor(CPUState *cpu,
 p = &cpu->gdb_regs;
 while (*p) {
 /* Check for duplicates.  */
-if (strcmp((*p)->xml, xml) == 0)
+if (strcmp((*p)->xml, feature->xmlname) == 0)
 return;
 p = &(*p)->next;
 }
 
 s = g_new0(GDBRegisterState, 1);
 s->base_reg = cpu->gdb_num_regs;
-s->num_regs = num_regs;
+s->num_regs = feature->num_regs;
 s->get_reg = get_reg;
 s->set_reg = set_reg;
-s->xml = xml;
+s->xml = feature->xml;
 
 /* Add to end of list.  */
-cpu->gdb_num_regs += num_regs;
+cpu->gdb_num_regs += feature->num_regs;
 *p = s;
 if (g_pos) {
 if (g_pos != s->base_reg) {
 error_report("Error: Bad gdb register numbering for '%s', "
- "expected %d got %d", xml, g_pos, s->base_reg);
+ "expected %d got %d", feature->xml,
+ g_pos, s->base_reg);
 } else {
 cpu->gdb_num_g_regs = cpu->gdb_num_regs;
 }
diff --git a/target/arm/gdbstub.c b/target/arm/gdbstub.c
index cd35bac013..ab4ffe6264 100644
--- a/target/arm/gdbstub.c
+++ b/target/arm/gdbstub.c
@@ -522,14 +522,15 @@ void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu)
  */
 #ifdef TARGET_AARCH64
 if (isar_feature_aa64_sve(&cpu->isar)) {
-int nreg = arm_gen_dynamic_svereg_feature(cs, 
cs->gdb_num_regs)->num_regs;
+GDBFeature *feature =
+arm_gen_dynamic_svereg_feature(cs, cs->gdb_num_regs);
 gdb_register_coprocessor(cs, aarch64_gdb_get_sve_reg,
- aarch64_gdb_set_sve_reg, nreg,
- "sve-registers.xml", 0);
+ aarch64_gdb_set_sve_reg, feature, 0);
 } else {
 gdb_register_coprocessor(cs, aarch64_gdb_get_fpu_reg,
  aarch64_gdb_set_fpu_reg,
- 34, "aarch64-fpu.xml", 0);
+ 
gdb_find_static_feature("aarch64-fpu.xml"),
+ 0);
 }
 /*
  * Note that we report pauth information via the feature name
@@ -540,19 +541,22 @@ void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu)
 if (isar_feature_aa64_pauth(&cpu->isar)) {
 gdb_register_coprocessor(cs, aarch64_gdb_get_pauth_reg,
  aarch64_gdb_set_pauth_reg,
- 4, "aarch64-pauth.xml", 0);
+ 
gdb_find_static_feature("aarch64-pauth.xml"),
+ 0);
 }
 #endif
 } else {
 if (arm_feature(env, ARM_FEATURE_NEON)) {
 gdb_register_coprocessor(cs, vfp_gdb_get_reg, vfp_gdb_set_reg,
- 49, "arm-neon.xml", 0);
+ gdb_find_static

[RFC PATCH 03/24] gdbstub: Add num_regs member to GDBFeature

2023-07-31 Thread Akihiko Odaki
Currently the number of registers exposed to GDB is written as magic
numbers in code. Derive the number of registers GDB actually see from
XML files to replace the magic numbers in code later.

Signed-off-by: Akihiko Odaki 
---
 include/exec/gdbstub.h  |  1 +
 scripts/feature_to_c.py | 46 +++--
 2 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/include/exec/gdbstub.h b/include/exec/gdbstub.h
index bd5bc91dda..22e5add5b1 100644
--- a/include/exec/gdbstub.h
+++ b/include/exec/gdbstub.h
@@ -13,6 +13,7 @@
 typedef struct GDBFeature {
 const char *xmlname;
 const char *xml;
+int num_regs;
 } GDBFeature;
 
 
diff --git a/scripts/feature_to_c.py b/scripts/feature_to_c.py
index 5a5b49367b..8eb8c81cf8 100755
--- a/scripts/feature_to_c.py
+++ b/scripts/feature_to_c.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-import os, sys
+import os, sys, xml.etree.ElementTree
 
 def writeliteral(indent, bytes):
 sys.stdout.write(' ' * indent)
@@ -35,10 +35,52 @@ def writeliteral(indent, bytes):
 with open(input, 'rb') as file:
 read = file.read()
 
+parser = xml.etree.ElementTree.XMLPullParser(['start', 'end'])
+parser.feed(read)
+events = parser.read_events()
+event, element = next(events)
+if event != 'start':
+sys.stderr.write(f'unexpected event: {event}\n')
+exit(1)
+if element.tag != 'feature':
+sys.stderr.write(f'unexpected start tag: {element.tag}\n')
+exit(1)
+
+regnum = 0
+regnums = []
+tags = ['feature']
+for event, element in events:
+if event == 'end':
+if element.tag != tags[len(tags) - 1]:
+sys.stderr.write(f'unexpected end tag: {element.tag}\n')
+exit(1)
+
+tags.pop()
+if element.tag == 'feature':
+break
+elif event == 'start':
+if len(tags) < 2 and element.tag == 'reg':
+if 'regnum' in element.attrib:
+regnum = int(element.attrib['regnum'])
+
+regnums.append(regnum)
+regnum += 1
+
+tags.append(element.tag)
+else:
+raise Exception(f'unexpected event: {event}\n')
+
+if len(tags):
+sys.stderr.write('unterminated feature tag\n')
+exit(1)
+
+base_reg = min(regnums)
+num_regs = max(regnums) - base_reg + 1 if len(regnums) else 0
+
 sys.stdout.write('{\n')
 writeliteral(8, bytes(os.path.basename(input), 'utf-8'))
 sys.stdout.write(',\n')
 writeliteral(8, read)
-sys.stdout.write('\n},\n')
+sys.stdout.write(f',\n{num_regs},\n}},\n')
 
 sys.stdout.write('{ NULL }\n};\n')
-- 
2.41.0




[RFC PATCH 04/24] gdbstub: Introduce gdb_find_static_feature()

2023-07-31 Thread Akihiko Odaki
This function is useful to determine the number of registers exposed to
GDB from the XML name.

Signed-off-by: Akihiko Odaki 
---
 include/exec/gdbstub.h |  2 ++
 gdbstub/gdbstub.c  | 13 +
 2 files changed, 15 insertions(+)

diff --git a/include/exec/gdbstub.h b/include/exec/gdbstub.h
index 22e5add5b1..3115dc21c0 100644
--- a/include/exec/gdbstub.h
+++ b/include/exec/gdbstub.h
@@ -34,6 +34,8 @@ void gdb_register_coprocessor(CPUState *cpu,
  */
 int gdbserver_start(const char *port_or_device);
 
+const GDBFeature *gdb_find_static_feature(const char *xmlname);
+
 void gdb_set_stop_cpu(CPUState *cpu);
 
 /**
diff --git a/gdbstub/gdbstub.c b/gdbstub/gdbstub.c
index fad70200d8..6d9cef5b95 100644
--- a/gdbstub/gdbstub.c
+++ b/gdbstub/gdbstub.c
@@ -414,6 +414,19 @@ static const char *get_feature_xml(const char *p, const 
char **newp,
 return name ? gdb_features[i].xml : NULL;
 }
 
+const GDBFeature *gdb_find_static_feature(const char *xmlname)
+{
+const GDBFeature *feature;
+
+for (feature = gdb_features; feature->xmlname; feature++) {
+if (!strcmp(feature->xmlname, xmlname)) {
+return feature;
+}
+}
+
+return NULL;
+}
+
 static int gdb_read_register(CPUState *cpu, GByteArray *buf, int reg)
 {
 CPUClass *cc = CPU_GET_CLASS(cpu);
-- 
2.41.0




[RFC PATCH 06/24] hw/core/cpu: Replace gdb_core_xml_file with gdb_core_feature

2023-07-31 Thread Akihiko Odaki
This is a tree-wide change to replace gdb_core_xml_file, the path to
GDB XML file with gdb_core_feature, the pointer to GDBFeature. This
also replaces the values assigned to gdb_num_core_regs with the
num_regs member of GDBFeature where applicable to remove magic numbers.

A following change will utilize additional information provided by
GDBFeature to simplify XML file lookup.

Signed-off-by: Akihiko Odaki 
---
 include/hw/core/cpu.h   | 5 +++--
 target/s390x/cpu.h  | 2 --
 gdbstub/gdbstub.c   | 6 +++---
 target/arm/cpu.c| 4 ++--
 target/arm/cpu64.c  | 4 ++--
 target/arm/tcg/cpu32.c  | 3 ++-
 target/avr/cpu.c| 4 ++--
 target/hexagon/cpu.c| 2 +-
 target/i386/cpu.c   | 7 +++
 target/loongarch/cpu.c  | 4 ++--
 target/m68k/cpu.c   | 7 ---
 target/microblaze/cpu.c | 4 ++--
 target/ppc/cpu_init.c   | 4 ++--
 target/riscv/cpu.c  | 7 ---
 target/rx/cpu.c | 4 ++--
 target/s390x/cpu.c  | 4 ++--
 16 files changed, 36 insertions(+), 35 deletions(-)

diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index fdcbe87352..84219c1885 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -23,6 +23,7 @@
 #include "hw/qdev-core.h"
 #include "disas/dis-asm.h"
 #include "exec/cpu-common.h"
+#include "exec/gdbstub.h"
 #include "exec/hwaddr.h"
 #include "exec/memattrs.h"
 #include "qapi/qapi-types-run-state.h"
@@ -127,7 +128,7 @@ struct SysemuCPUOps;
  *   breakpoint.  Used by AVR to handle a gdb mis-feature with
  *   its Harvard architecture split code and data.
  * @gdb_num_core_regs: Number of core registers accessible to GDB.
- * @gdb_core_xml_file: File name for core registers GDB XML description.
+ * @gdb_core_feature: GDB core feature description.
  * @gdb_stop_before_watchpoint: Indicates whether GDB expects the CPU to stop
  *   before the insn which triggers a watchpoint rather than after it.
  * @gdb_arch_name: Optional callback that returns the architecture name known
@@ -163,7 +164,7 @@ struct CPUClass {
 int (*gdb_write_register)(CPUState *cpu, uint8_t *buf, int reg);
 vaddr (*gdb_adjust_breakpoint)(CPUState *cpu, vaddr addr);
 
-const char *gdb_core_xml_file;
+const GDBFeature *gdb_core_feature;
 gchar * (*gdb_arch_name)(CPUState *cpu);
 const char * (*gdb_get_dynamic_xml)(CPUState *cpu, const char *xmlname);
 
diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
index eb5b65b7d3..c5bac3230c 100644
--- a/target/s390x/cpu.h
+++ b/target/s390x/cpu.h
@@ -451,8 +451,6 @@ static inline void cpu_get_tb_cpu_state(CPUS390XState *env, 
vaddr *pc,
 #define S390_R13_REGNUM 15
 #define S390_R14_REGNUM 16
 #define S390_R15_REGNUM 17
-/* Total Core Registers. */
-#define S390_NUM_CORE_REGS 18
 
 static inline void setcc(S390CPU *cpu, uint64_t cc)
 {
diff --git a/gdbstub/gdbstub.c b/gdbstub/gdbstub.c
index 6d9cef5b95..6f2e0cb06f 100644
--- a/gdbstub/gdbstub.c
+++ b/gdbstub/gdbstub.c
@@ -386,7 +386,7 @@ static const char *get_feature_xml(const char *p, const 
char **newp,
 g_free(arch);
 }
 pstrcat(buf, buf_sz, "gdb_core_xml_file);
+pstrcat(buf, buf_sz, cc->gdb_core_feature->xmlname);
 pstrcat(buf, buf_sz, "\"/>");
 for (r = cpu->gdb_regs; r; r = r->next) {
 pstrcat(buf, buf_sz, "gdb_core_xml_file) {
+if (cc->gdb_core_feature) {
 g_string_append(gdbserver_state.str_buf, ";qXfer:features:read+");
 }
 
@@ -1548,7 +1548,7 @@ static void handle_query_xfer_features(GArray *params, 
void *user_ctx)
 
 process = gdb_get_cpu_process(gdbserver_state.g_cpu);
 cc = CPU_GET_CLASS(gdbserver_state.g_cpu);
-if (!cc->gdb_core_xml_file) {
+if (!cc->gdb_core_feature) {
 gdb_put_packet("");
 return;
 }
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index d71a162070..a206ab6b1b 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -2353,7 +2353,6 @@ static void arm_cpu_class_init(ObjectClass *oc, void 
*data)
 #ifndef CONFIG_USER_ONLY
 cc->sysemu_ops = &arm_sysemu_ops;
 #endif
-cc->gdb_num_core_regs = 26;
 cc->gdb_arch_name = arm_gdb_arch_name;
 cc->gdb_get_dynamic_xml = arm_gdb_get_dynamic_xml;
 cc->gdb_stop_before_watchpoint = true;
@@ -2378,7 +2377,8 @@ static void cpu_register_class_init(ObjectClass *oc, void 
*data)
 CPUClass *cc = CPU_CLASS(acc);
 
 acc->info = data;
-cc->gdb_core_xml_file = "arm-core.xml";
+cc->gdb_core_feature = gdb_find_static_feature("arm-core.xml");
+cc->gdb_num_core_regs = cc->gdb_core_feature->num_regs;
 }
 
 void arm_cpu_register(const ARMCPUInfo *info)
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index 96158093cc..9c2a226159 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -754,8 +754,8 @@ static void aarch64_cpu_class_init(ObjectClass *oc, void 
*data)
 
 cc->gdb_read_register = aarch64_cpu_gdb_read_register;
 cc->gdb_write_register = aarch64_cpu_gdb_write_register;
-cc->gdb_n

[RFC PATCH 08/24] target/ppc: Use GDBFeature for dynamic XML

2023-07-31 Thread Akihiko Odaki
In preparation for a change to use GDBFeature as a parameter of
gdb_register_coprocessor(), convert the internal representation of
dynamic feature from plain XML to GDBFeature.

Signed-off-by: Akihiko Odaki 
---
 target/ppc/cpu-qom.h  |  3 +--
 target/ppc/cpu.h  |  2 +-
 target/ppc/cpu_init.c |  2 +-
 target/ppc/gdbstub.c  | 13 +++--
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/target/ppc/cpu-qom.h b/target/ppc/cpu-qom.h
index be33786bd8..633fb402b5 100644
--- a/target/ppc/cpu-qom.h
+++ b/target/ppc/cpu-qom.h
@@ -186,8 +186,7 @@ struct PowerPCCPUClass {
 int bfd_mach;
 uint32_t l1_dcache_size, l1_icache_size;
 #ifndef CONFIG_USER_ONLY
-unsigned int gdb_num_sprs;
-const char *gdb_spr_xml;
+GDBFeature gdb_spr;
 #endif
 const PPCHash64Options *hash64_opts;
 struct ppc_radix_page_info *radix_page_info;
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 25fac9577a..5f251bdffe 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1381,7 +1381,7 @@ int ppc_cpu_gdb_write_register(CPUState *cpu, uint8_t 
*buf, int reg);
 int ppc_cpu_gdb_write_register_apple(CPUState *cpu, uint8_t *buf, int reg);
 #ifndef CONFIG_USER_ONLY
 hwaddr ppc_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
-void ppc_gdb_gen_spr_xml(PowerPCCPU *cpu);
+void ppc_gdb_gen_spr_feature(PowerPCCPU *cpu);
 const char *ppc_gdb_get_dynamic_xml(CPUState *cs, const char *xml_name);
 #endif
 int ppc64_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index eb56226865..938cd2b7e1 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -6673,7 +6673,7 @@ static void init_ppc_proc(PowerPCCPU *cpu)
 (*pcc->init_proc)(env);
 
 #if !defined(CONFIG_USER_ONLY)
-ppc_gdb_gen_spr_xml(cpu);
+ppc_gdb_gen_spr_feature(cpu);
 #endif
 
 /* MSR bits & flags consistency checks */
diff --git a/target/ppc/gdbstub.c b/target/ppc/gdbstub.c
index ca39efdc35..adc647a24e 100644
--- a/target/ppc/gdbstub.c
+++ b/target/ppc/gdbstub.c
@@ -318,7 +318,7 @@ int ppc_cpu_gdb_write_register_apple(CPUState *cs, uint8_t 
*mem_buf, int n)
 }
 
 #ifndef CONFIG_USER_ONLY
-void ppc_gdb_gen_spr_xml(PowerPCCPU *cpu)
+void ppc_gdb_gen_spr_feature(PowerPCCPU *cpu)
 {
 PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
 CPUPPCState *env = &cpu->env;
@@ -346,7 +346,7 @@ void ppc_gdb_gen_spr_xml(PowerPCCPU *cpu)
 num_regs++;
 }
 
-if (pcc->gdb_spr_xml) {
+if (pcc->gdb_spr.xml) {
 return;
 }
 
@@ -371,8 +371,9 @@ void ppc_gdb_gen_spr_xml(PowerPCCPU *cpu)
 
 g_string_append(xml, "");
 
-pcc->gdb_num_sprs = num_regs;
-pcc->gdb_spr_xml = g_string_free(xml, false);
+pcc->gdb_spr.num_regs = num_regs;
+pcc->gdb_spr.xmlname = "power-spr.xml";
+pcc->gdb_spr.xml = g_string_free(xml, false);
 }
 
 const char *ppc_gdb_get_dynamic_xml(CPUState *cs, const char *xml_name)
@@ -380,7 +381,7 @@ const char *ppc_gdb_get_dynamic_xml(CPUState *cs, const 
char *xml_name)
 PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
 
 if (strcmp(xml_name, "power-spr.xml") == 0) {
-return pcc->gdb_spr_xml;
+return pcc->gdb_spr.xml;
 }
 return NULL;
 }
@@ -618,6 +619,6 @@ void ppc_gdb_init(CPUState *cs, PowerPCCPUClass *pcc)
 }
 #ifndef CONFIG_USER_ONLY
 gdb_register_coprocessor(cs, gdb_get_spr_reg, gdb_set_spr_reg,
- pcc->gdb_num_sprs, "power-spr.xml", 0);
+ pcc->gdb_spr.num_regs, "power-spr.xml", 0);
 #endif
 }
-- 
2.41.0




[RFC PATCH 12/24] gdbstub: Simplify XML lookup

2023-07-31 Thread Akihiko Odaki
Now we know all instances of GDBFeature that is used in CPU so we can
traverse them to find XML. This removes the need for a CPU-specific
lookup function for dynamic XMLs.

Signed-off-by: Akihiko Odaki 
---
 gdbstub/gdbstub.c | 28 +---
 1 file changed, 9 insertions(+), 19 deletions(-)

diff --git a/gdbstub/gdbstub.c b/gdbstub/gdbstub.c
index 182efe7e0f..e5bb2c89ba 100644
--- a/gdbstub/gdbstub.c
+++ b/gdbstub/gdbstub.c
@@ -354,8 +354,7 @@ static const char *get_feature_xml(const char *p, const 
char **newp,
GDBProcess *process)
 {
 size_t len;
-int i;
-const char *name;
+GDBRegisterState *r;
 CPUState *cpu = gdb_get_first_cpu_in_process(process);
 CPUClass *cc = CPU_GET_CLASS(cpu);
 
@@ -364,15 +363,12 @@ static const char *get_feature_xml(const char *p, const 
char **newp,
 len++;
 *newp = p + len;
 
-name = NULL;
 if (strncmp(p, "target.xml", len) == 0) {
 char *buf = process->target_xml;
 const size_t buf_sz = sizeof(process->target_xml);
 
 /* Generate the XML description for this CPU.  */
 if (!buf[0]) {
-GDBRegisterState *r;
-
 pstrcat(buf, buf_sz,
 ""
 ""
@@ -389,28 +385,22 @@ static const char *get_feature_xml(const char *p, const 
char **newp,
 pstrcat(buf, buf_sz, "\"/>");
 for (r = cpu->gdb_regs; r; r = r->next) {
 pstrcat(buf, buf_sz, "feature->xml);
+pstrcat(buf, buf_sz, r->feature->xmlname);
 pstrcat(buf, buf_sz, "\"/>");
 }
 pstrcat(buf, buf_sz, "");
 }
 return buf;
 }
-if (cc->gdb_get_dynamic_xml) {
-char *xmlname = g_strndup(p, len);
-const char *xml = cc->gdb_get_dynamic_xml(cpu, xmlname);
-
-g_free(xmlname);
-if (xml) {
-return xml;
-}
+if (strncmp(p, cc->gdb_core_feature->xmlname, len) == 0) {
+return cc->gdb_core_feature->xml;
 }
-for (i = 0; ; i++) {
-name = gdb_features[i].xmlname;
-if (!name || (strncmp(name, p, len) == 0 && strlen(name) == len))
-break;
+for (r = cpu->gdb_regs; r; r = r->next) {
+if (strncmp(p, r->feature->xmlname, len) == 0) {
+return r->feature->xml;
+}
 }
-return name ? gdb_features[i].xml : NULL;
+return NULL;
 }
 
 const GDBFeature *gdb_find_static_feature(const char *xmlname)
-- 
2.41.0




[RFC PATCH 02/24] gdbstub: Introduce GDBFeature structure

2023-07-31 Thread Akihiko Odaki
Before this change, the information from a XML file was stored in an
array that is not descriptive. Introduce a dedicated structure type to
make it easier to understand and to extend with more fields.

Signed-off-by: Akihiko Odaki 
---
 MAINTAINERS |  2 +-
 meson.build |  2 +-
 include/exec/gdbstub.h  |  9 --
 gdbstub/gdbstub.c   |  4 +--
 stubs/gdbstub.c |  6 ++--
 scripts/feature_to_c.py | 44 ++
 scripts/feature_to_c.sh | 69 -
 7 files changed, 58 insertions(+), 78 deletions(-)
 create mode 100755 scripts/feature_to_c.py
 delete mode 100644 scripts/feature_to_c.sh

diff --git a/MAINTAINERS b/MAINTAINERS
index 12e59b6b27..514ac74101 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2826,7 +2826,7 @@ F: include/exec/gdbstub.h
 F: include/gdbstub/*
 F: gdb-xml/
 F: tests/tcg/multiarch/gdbstub/
-F: scripts/feature_to_c.sh
+F: scripts/feature_to_c.py
 F: scripts/probe-gdb-support.py
 
 Memory API
diff --git a/meson.build b/meson.build
index 98e68ef0b1..5c633f7e01 100644
--- a/meson.build
+++ b/meson.build
@@ -3683,7 +3683,7 @@ common_all = static_library('common',
 dependencies: common_all.dependencies(),
 name_suffix: 'fa')
 
-feature_to_c = find_program('scripts/feature_to_c.sh')
+feature_to_c = find_program('scripts/feature_to_c.py')
 
 if targetos == 'darwin'
   entitlement = find_program('scripts/entitlement.sh')
diff --git a/include/exec/gdbstub.h b/include/exec/gdbstub.h
index 7d743fe1e9..bd5bc91dda 100644
--- a/include/exec/gdbstub.h
+++ b/include/exec/gdbstub.h
@@ -10,6 +10,11 @@
 #define GDB_WATCHPOINT_READ  3
 #define GDB_WATCHPOINT_ACCESS4
 
+typedef struct GDBFeature {
+const char *xmlname;
+const char *xml;
+} GDBFeature;
+
 
 /* Get or set a register.  Returns the size of the register.  */
 typedef int (*gdb_get_reg_cb)(CPUArchState *env, GByteArray *buf, int reg);
@@ -38,7 +43,7 @@ void gdb_set_stop_cpu(CPUState *cpu);
  */
 extern bool gdb_has_xml;
 
-/* in gdbstub-xml.c, generated by scripts/feature_to_c.sh */
-extern const char *const xml_builtin[][2];
+/* in gdbstub-xml.c, generated by scripts/feature_to_c.py */
+extern const GDBFeature gdb_features[];
 
 #endif
diff --git a/gdbstub/gdbstub.c b/gdbstub/gdbstub.c
index 6911b73c07..fad70200d8 100644
--- a/gdbstub/gdbstub.c
+++ b/gdbstub/gdbstub.c
@@ -407,11 +407,11 @@ static const char *get_feature_xml(const char *p, const 
char **newp,
 }
 }
 for (i = 0; ; i++) {
-name = xml_builtin[i][0];
+name = gdb_features[i].xmlname;
 if (!name || (strncmp(name, p, len) == 0 && strlen(name) == len))
 break;
 }
-return name ? xml_builtin[i][1] : NULL;
+return name ? gdb_features[i].xml : NULL;
 }
 
 static int gdb_read_register(CPUState *cpu, GByteArray *buf, int reg)
diff --git a/stubs/gdbstub.c b/stubs/gdbstub.c
index 2b7aee50d3..324d17f045 100644
--- a/stubs/gdbstub.c
+++ b/stubs/gdbstub.c
@@ -1,6 +1,6 @@
 #include "qemu/osdep.h"
-#include "exec/gdbstub.h"   /* xml_builtin */
+#include "exec/gdbstub.h"   /* gdb_features */
 
-const char *const xml_builtin[][2] = {
-  { NULL, NULL }
+const GDBFeature gdb_features[] = {
+  { NULL }
 };
diff --git a/scripts/feature_to_c.py b/scripts/feature_to_c.py
new file mode 100755
index 00..5a5b49367b
--- /dev/null
+++ b/scripts/feature_to_c.py
@@ -0,0 +1,44 @@
+#!/usr/bin/env python3
+
+import os, sys
+
+def writeliteral(indent, bytes):
+sys.stdout.write(' ' * indent)
+sys.stdout.write('"')
+quoted = True
+
+for c in bytes:
+if not quoted:
+sys.stdout.write('\n')
+sys.stdout.write(' ' * indent)
+sys.stdout.write('"')
+quoted = True
+
+if c == b'"'[0]:
+sys.stdout.write('\\"')
+elif c == b'\\'[0]:
+sys.stdout.write('')
+elif c == b'\n'[0]:
+sys.stdout.write('\\n"')
+quoted = False
+elif c >= 32 and c < 127:
+sys.stdout.write(c.to_bytes(1, 'big').decode())
+else:
+sys.stdout.write(f'\{c:03o}')
+
+if quoted:
+sys.stdout.write('"')
+
+sys.stdout.write('#include "qemu/osdep.h"\n#include "exec/gdbstub.h"\n\nconst 
GDBFeature gdb_features[] = {\n')
+
+for input in sys.argv[1:]:
+with open(input, 'rb') as file:
+read = file.read()
+
+sys.stdout.write('{\n')
+writeliteral(8, bytes(os.path.basename(input), 'utf-8'))
+sys.stdout.write(',\n')
+writeliteral(8, read)
+sys.stdout.write('\n},\n')
+
+sys.stdout.write('{ NULL }\n};\n')
diff --git a/scripts/feature_to_c.sh b/scripts/feature_to_c.sh
deleted file mode 100644
index c1f67c8f6a..00
--- a/scripts/feature_to_c.sh
+++ /dev/null
@@ -1,69 +0,0 @@
-#!/bin/sh
-
-# Convert text files to compilable C arrays.
-#
-# Copyright (C) 2007 Free Software Foundation, Inc.
-#
-# This file is pa

[RFC PATCH 15/24] target/arm: Fill new members of GDBFeature

2023-07-31 Thread Akihiko Odaki
These members will be used to help plugins to identify registers.

Signed-off-by: Akihiko Odaki 
---
 target/arm/gdbstub.c   | 46 +++---
 target/arm/gdbstub64.c | 42 +-
 2 files changed, 58 insertions(+), 30 deletions(-)

diff --git a/target/arm/gdbstub.c b/target/arm/gdbstub.c
index 100a6eed15..56d24028f6 100644
--- a/target/arm/gdbstub.c
+++ b/target/arm/gdbstub.c
@@ -270,6 +270,7 @@ static void arm_gen_one_feature_sysreg(GString *s,
 g_string_append_printf(s, " regnum=\"%d\"", regnum);
 g_string_append_printf(s, " group=\"cp_regs\"/>");
 dyn_feature->data.cpregs.keys[dyn_feature->desc.num_regs] = ri_key;
+((const char **)dyn_feature->desc.regs)[dyn_feature->desc.num_regs] = 
ri->name;
 dyn_feature->desc.num_regs++;
 }
 
@@ -316,6 +317,8 @@ static GDBFeature *arm_gen_dynamic_sysreg_feature(CPUState 
*cs, int base_reg)
 DynamicGDBFeatureInfo *dyn_feature = &cpu->dyn_sysreg_feature;
 gsize num_regs = g_hash_table_size(cpu->cp_regs);
 
+dyn_feature->desc.name = "org.qemu.gdb.arm.sys.regs";
+dyn_feature->desc.regs = g_new(const char *, num_regs);
 dyn_feature->desc.num_regs = 0;
 dyn_feature->data.cpregs.keys = g_new(uint32_t, num_regs);
 g_string_printf(s, "");
@@ -418,30 +421,34 @@ static int arm_gdb_set_m_systemreg(CPUARMState *env, 
uint8_t *buf, int reg)
 }
 
 static GDBFeature *arm_gen_dynamic_m_systemreg_feature(CPUState *cs,
-   int orig_base_reg)
+   int base_reg)
 {
 ARMCPU *cpu = ARM_CPU(cs);
 CPUARMState *env = &cpu->env;
 GString *s = g_string_new(NULL);
-int base_reg = orig_base_reg;
-int i;
+const char **regs = g_new(const char *, ARRAY_SIZE(m_sysreg_def));
+int i = 0;
+int j;
 
 g_string_printf(s, "");
 g_string_append_printf(s, "");
 g_string_append_printf(s, "\n");
 
-for (i = 0; i < ARRAY_SIZE(m_sysreg_def); i++) {
-if (arm_feature(env, m_sysreg_def[i].feature)) {
+for (j = 0; j < ARRAY_SIZE(m_sysreg_def); j++) {
+if (arm_feature(env, m_sysreg_def[j].feature)) {
+regs[i] = m_sysreg_def[j].name;
 g_string_append_printf(s,
 "\n",
-m_sysreg_def[i].name, base_reg++);
+m_sysreg_def[j].name, base_reg + i++);
 }
 }
 
 g_string_append_printf(s, "");
+cpu->dyn_m_systemreg_feature.desc.name = "org.gnu.gdb.arm.m-system";
 cpu->dyn_m_systemreg_feature.desc.xmlname = "arm-m-system.xml";
 cpu->dyn_m_systemreg_feature.desc.xml = g_string_free(s, false);
-cpu->dyn_m_systemreg_feature.desc.num_regs = base_reg - orig_base_reg;
+cpu->dyn_m_systemreg_feature.desc.regs = regs;
+cpu->dyn_m_systemreg_feature.desc.num_regs = i;
 
 return &cpu->dyn_m_systemreg_feature.desc;
 }
@@ -462,30 +469,37 @@ static int arm_gdb_set_m_secextreg(CPUARMState *env, 
uint8_t *buf, int reg)
 }
 
 static GDBFeature *arm_gen_dynamic_m_secextreg_feature(CPUState *cs,
-   int orig_base_reg)
+   int base_reg)
 {
 ARMCPU *cpu = ARM_CPU(cs);
 GString *s = g_string_new(NULL);
-int base_reg = orig_base_reg;
-int i;
+const char **regs = g_new(const char *, ARRAY_SIZE(m_sysreg_def) * 2);
+int i = 0;
+int j;
 
 g_string_printf(s, "");
 g_string_append_printf(s, "");
 g_string_append_printf(s, "\n");
 
-for (i = 0; i < ARRAY_SIZE(m_sysreg_def); i++) {
+for (j = 0; j < ARRAY_SIZE(m_sysreg_def); j++) {
+regs[i] = g_strconcat(m_sysreg_def[j].name, "_ns", NULL);
 g_string_append_printf(s,
-"\n",
-m_sysreg_def[i].name, base_reg++);
+"\n",
+regs[i], base_reg + i);
+i++;
+regs[i] = g_strconcat(m_sysreg_def[j].name, "_s", NULL);
 g_string_append_printf(s,
-"\n",
-m_sysreg_def[i].name, base_reg++);
+"\n",
+regs[i], base_reg + i);
+i++;
 }
 
 g_string_append_printf(s, "");
+cpu->dyn_m_secextreg_feature.desc.name = "org.gnu.gdb.arm.secext";
 cpu->dyn_m_secextreg_feature.desc.xmlname = "arm-m-secext.xml";
 cpu->dyn_m_secextreg_feature.desc.xml = g_string_free(s, false);
-cpu->dyn_m_secextreg_feature.desc.num_regs = base_reg - orig_base_reg;
+cpu->dyn_m_secextreg_feature.desc.regs = regs;
+cpu->dyn_m_secextreg_feature.desc.num_regs = i;
 
 return &cpu->dyn_m_secextreg_feature.desc;
 }
diff --git a/target/arm/gdbstub64.c b/target/arm/gdbstub64.c
index 20483ef9bc..c5ed7c0aa3 100644
--- a/target/arm/gdbstub64.c
+++ b/target/arm/gdbstub64.c
@@ -316,15 +316,21 @@ static void output_vector_union_type(GString *s, int 
reg_width,
 g_string_append(s, "");
 }
 
-GDBFeature *arm_gen_dynamic_svereg_feature(CPUState *cs, int orig_base_reg)

[RFC PATCH 07/24] target/arm: Use GDBFeature for dynamic XML

2023-07-31 Thread Akihiko Odaki
In preparation for a change to use GDBFeature as a parameter of
gdb_register_coprocessor(), convert the internal representation of
dynamic feature from plain XML to GDBFeature.

Signed-off-by: Akihiko Odaki 
---
 target/arm/cpu.h   | 20 +--
 target/arm/internals.h |  2 +-
 target/arm/gdbstub.c   | 80 +++---
 target/arm/gdbstub64.c | 11 +++---
 4 files changed, 60 insertions(+), 53 deletions(-)

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 88e5accda6..d6c2378d05 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -136,23 +136,21 @@ enum {
  */
 
 /**
- * DynamicGDBXMLInfo:
- * @desc: Contains the XML descriptions.
- * @num: Number of the registers in this XML seen by GDB.
+ * DynamicGDBFeatureInfo:
+ * @desc: Contains the feature descriptions.
  * @data: A union with data specific to the set of registers
  *@cpregs_keys: Array that contains the corresponding Key of
  *  a given cpreg with the same order of the cpreg
  *  in the XML description.
  */
-typedef struct DynamicGDBXMLInfo {
-char *desc;
-int num;
+typedef struct DynamicGDBFeatureInfo {
+GDBFeature desc;
 union {
 struct {
 uint32_t *keys;
 } cpregs;
 } data;
-} DynamicGDBXMLInfo;
+} DynamicGDBFeatureInfo;
 
 /* CPU state for each instance of a generic timer (in cp15 c14) */
 typedef struct ARMGenericTimer {
@@ -881,10 +879,10 @@ struct ArchCPU {
 uint64_t *cpreg_vmstate_values;
 int32_t cpreg_vmstate_array_len;
 
-DynamicGDBXMLInfo dyn_sysreg_xml;
-DynamicGDBXMLInfo dyn_svereg_xml;
-DynamicGDBXMLInfo dyn_m_systemreg_xml;
-DynamicGDBXMLInfo dyn_m_secextreg_xml;
+DynamicGDBFeatureInfo dyn_sysreg_feature;
+DynamicGDBFeatureInfo dyn_svereg_feature;
+DynamicGDBFeatureInfo dyn_m_systemreg_feature;
+DynamicGDBFeatureInfo dyn_m_secextreg_feature;
 
 /* Timers used by the generic (architected) timer */
 QEMUTimer *gt_timer[NUM_GTIMERS];
diff --git a/target/arm/internals.h b/target/arm/internals.h
index 0f01bc32a8..8421a755af 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -1388,7 +1388,7 @@ static inline uint64_t pmu_counter_mask(CPUARMState *env)
 }
 
 #ifdef TARGET_AARCH64
-int arm_gen_dynamic_svereg_xml(CPUState *cpu, int base_reg);
+GDBFeature *arm_gen_dynamic_svereg_feature(CPUState *cpu, int base_reg);
 int aarch64_gdb_get_sve_reg(CPUARMState *env, GByteArray *buf, int reg);
 int aarch64_gdb_set_sve_reg(CPUARMState *env, uint8_t *buf, int reg);
 int aarch64_gdb_get_fpu_reg(CPUARMState *env, GByteArray *buf, int reg);
diff --git a/target/arm/gdbstub.c b/target/arm/gdbstub.c
index f421c5d041..cd35bac013 100644
--- a/target/arm/gdbstub.c
+++ b/target/arm/gdbstub.c
@@ -25,11 +25,11 @@
 #include "internals.h"
 #include "cpregs.h"
 
-typedef struct RegisterSysregXmlParam {
+typedef struct RegisterSysregFeatureParam {
 CPUState *cs;
 GString *s;
 int n;
-} RegisterSysregXmlParam;
+} RegisterSysregFeatureParam;
 
 /* Old gdb always expect FPA registers.  Newer (xml-aware) gdb only expect
whatever the target description contains.  Due to a historical mishap
@@ -243,7 +243,7 @@ static int arm_gdb_get_sysreg(CPUARMState *env, GByteArray 
*buf, int reg)
 const ARMCPRegInfo *ri;
 uint32_t key;
 
-key = cpu->dyn_sysreg_xml.data.cpregs.keys[reg];
+key = cpu->dyn_sysreg_feature.data.cpregs.keys[reg];
 ri = get_arm_cp_reginfo(cpu->cp_regs, key);
 if (ri) {
 if (cpreg_field_is_64bit(ri)) {
@@ -260,7 +260,8 @@ static int arm_gdb_set_sysreg(CPUARMState *env, uint8_t 
*buf, int reg)
 return 0;
 }
 
-static void arm_gen_one_xml_sysreg_tag(GString *s, DynamicGDBXMLInfo *dyn_xml,
+static void arm_gen_one_feature_sysreg(GString *s,
+   DynamicGDBFeatureInfo *dyn_feature,
ARMCPRegInfo *ri, uint32_t ri_key,
int bitsize, int regnum)
 {
@@ -268,25 +269,25 @@ static void arm_gen_one_xml_sysreg_tag(GString *s, 
DynamicGDBXMLInfo *dyn_xml,
 g_string_append_printf(s, " bitsize=\"%d\"", bitsize);
 g_string_append_printf(s, " regnum=\"%d\"", regnum);
 g_string_append_printf(s, " group=\"cp_regs\"/>");
-dyn_xml->data.cpregs.keys[dyn_xml->num] = ri_key;
-dyn_xml->num++;
+dyn_feature->data.cpregs.keys[dyn_feature->desc.num_regs] = ri_key;
+dyn_feature->desc.num_regs++;
 }
 
-static void arm_register_sysreg_for_xml(gpointer key, gpointer value,
-gpointer p)
+static void arm_register_sysreg_for_feature(gpointer key, gpointer value,
+gpointer p)
 {
 uint32_t ri_key = (uintptr_t)key;
 ARMCPRegInfo *ri = value;
-RegisterSysregXmlParam *param = (RegisterSysregXmlParam *)p;
+RegisterSysregFeatureParam *param = (RegisterSysregFeatureParam *)p;
 GString *s = param->s;
 ARMCPU *cpu = ARM_

[RFC PATCH 11/24] gdbstub: Use GDBFeature for GDBRegisterState

2023-07-31 Thread Akihiko Odaki
Simplify GDBRegisterState by replacing num_regs and xml members with
one member that points to GDBFeature.

Signed-off-by: Akihiko Odaki 
---
 gdbstub/gdbstub.c | 14 ++
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/gdbstub/gdbstub.c b/gdbstub/gdbstub.c
index ab75f6686b..182efe7e0f 100644
--- a/gdbstub/gdbstub.c
+++ b/gdbstub/gdbstub.c
@@ -47,10 +47,9 @@
 
 typedef struct GDBRegisterState {
 int base_reg;
-int num_regs;
 gdb_get_reg_cb get_reg;
 gdb_set_reg_cb set_reg;
-const char *xml;
+const GDBFeature *feature;
 struct GDBRegisterState *next;
 } GDBRegisterState;
 
@@ -390,7 +389,7 @@ static const char *get_feature_xml(const char *p, const 
char **newp,
 pstrcat(buf, buf_sz, "\"/>");
 for (r = cpu->gdb_regs; r; r = r->next) {
 pstrcat(buf, buf_sz, "xml);
+pstrcat(buf, buf_sz, r->feature->xml);
 pstrcat(buf, buf_sz, "\"/>");
 }
 pstrcat(buf, buf_sz, "");
@@ -438,7 +437,7 @@ static int gdb_read_register(CPUState *cpu, GByteArray 
*buf, int reg)
 }
 
 for (r = cpu->gdb_regs; r; r = r->next) {
-if (r->base_reg <= reg && reg < r->base_reg + r->num_regs) {
+if (r->base_reg <= reg && reg < r->base_reg + r->feature->num_regs) {
 return r->get_reg(env, buf, reg - r->base_reg);
 }
 }
@@ -456,7 +455,7 @@ static int gdb_write_register(CPUState *cpu, uint8_t 
*mem_buf, int reg)
 }
 
 for (r = cpu->gdb_regs; r; r = r->next) {
-if (r->base_reg <= reg && reg < r->base_reg + r->num_regs) {
+if (r->base_reg <= reg && reg < r->base_reg + r->feature->num_regs) {
 return r->set_reg(env, mem_buf, reg - r->base_reg);
 }
 }
@@ -479,17 +478,16 @@ void gdb_register_coprocessor(CPUState *cpu,
 p = &cpu->gdb_regs;
 while (*p) {
 /* Check for duplicates.  */
-if (strcmp((*p)->xml, feature->xmlname) == 0)
+if ((*p)->feature == feature)
 return;
 p = &(*p)->next;
 }
 
 s = g_new0(GDBRegisterState, 1);
 s->base_reg = cpu->gdb_num_regs;
-s->num_regs = feature->num_regs;
 s->get_reg = get_reg;
 s->set_reg = set_reg;
-s->xml = feature->xml;
+s->feature = feature;
 
 /* Add to end of list.  */
 cpu->gdb_num_regs += feature->num_regs;
-- 
2.41.0




[RFC PATCH 16/24] target/ppc: Fill new members of GDBFeature

2023-07-31 Thread Akihiko Odaki
These members will be used to help plugins to identify registers.

Signed-off-by: Akihiko Odaki 
---
 target/ppc/gdbstub.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/target/ppc/gdbstub.c b/target/ppc/gdbstub.c
index 19c4935260..ac4ed12371 100644
--- a/target/ppc/gdbstub.c
+++ b/target/ppc/gdbstub.c
@@ -323,7 +323,7 @@ void ppc_gdb_gen_spr_feature(PowerPCCPU *cpu)
 PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
 CPUPPCState *env = &cpu->env;
 GString *xml;
-char *spr_name;
+const char **regs;
 unsigned int num_regs = 0;
 int i;
 
@@ -350,6 +350,7 @@ void ppc_gdb_gen_spr_feature(PowerPCCPU *cpu)
 return;
 }
 
+regs = g_new(const char *, num_regs);
 xml = g_string_new("");
 g_string_append(xml, "");
 g_string_append(xml, "");
@@ -361,9 +362,8 @@ void ppc_gdb_gen_spr_feature(PowerPCCPU *cpu)
 continue;
 }
 
-spr_name = g_ascii_strdown(spr->name, -1);
-g_string_append_printf(xml, "gdb_id] = g_ascii_strdown(spr->name, -1);
+g_string_append_printf(xml, "gdb_id]);
 
 g_string_append_printf(xml, " bitsize=\"%d\"", TARGET_LONG_BITS);
 g_string_append(xml, " group=\"spr\"/>");
@@ -371,6 +371,8 @@ void ppc_gdb_gen_spr_feature(PowerPCCPU *cpu)
 
 g_string_append(xml, "");
 
+pcc->gdb_spr.name = "org.qemu.power.spr";
+pcc->gdb_spr.regs = regs;
 pcc->gdb_spr.num_regs = num_regs;
 pcc->gdb_spr.xmlname = "power-spr.xml";
 pcc->gdb_spr.xml = g_string_free(xml, false);
-- 
2.41.0




[RFC PATCH 22/24] contrib/plugins: Allow to log registers

2023-07-31 Thread Akihiko Odaki
This demonstrates how a register can be read from a plugin.

Signed-off-by: Akihiko Odaki 
---
 docs/devel/tcg-plugins.rst |  10 ++-
 contrib/plugins/execlog.c  | 130 -
 2 files changed, 108 insertions(+), 32 deletions(-)

diff --git a/docs/devel/tcg-plugins.rst b/docs/devel/tcg-plugins.rst
index 81dcd43a61..c9f8b27590 100644
--- a/docs/devel/tcg-plugins.rst
+++ b/docs/devel/tcg-plugins.rst
@@ -497,6 +497,15 @@ arguments if required::
   $ qemu-system-arm $(QEMU_ARGS) \
 -plugin ./contrib/plugins/libexeclog.so,ifilter=st1w,afilter=0x40001808 -d 
plugin
 
+This plugin can also dump a specified register. The specification of register
+follows `GDB standard target features 
`__.
+
+Specify the name of the feature that contains the register and the name of the
+register with ``rfile`` and ``reg`` options, respectively::
+
+  $ qemu-system-arm $(QEMU_ARGS) \
+-plugin ./contrib/plugins/libexeclog.so,rfile=org.gnu.gdb.arm.core,reg=sp 
-d plugin
+
 - contrib/plugins/cache.c
 
 Cache modelling plugin that measures the performance of a given L1 cache
@@ -583,4 +592,3 @@ The following API is generated from the inline 
documentation in
 include the full kernel-doc annotations.
 
 .. kernel-doc:: include/qemu/qemu-plugin.h
-
diff --git a/contrib/plugins/execlog.c b/contrib/plugins/execlog.c
index ce67acf145..031ad67fbb 100644
--- a/contrib/plugins/execlog.c
+++ b/contrib/plugins/execlog.c
@@ -15,27 +15,42 @@
 
 #include 
 
+typedef struct CPU {
+/* Store last executed instruction on each vCPU as a GString */
+GString *last_exec;
+GByteArray *reg_buf;
+
+int reg;
+} CPU;
+
 QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
 
-/* Store last executed instruction on each vCPU as a GString */
-static GPtrArray *last_exec;
+static CPU *cpus;
+static int num_cpus;
 static GRWLock expand_array_lock;
 
 static GPtrArray *imatches;
 static GArray *amatches;
 
+static char *rfile_name;
+static char *reg_name;
+
 /*
- * Expand last_exec array.
+ * Expand cpu array.
  *
  * As we could have multiple threads trying to do this we need to
  * serialise the expansion under a lock.
  */
-static void expand_last_exec(int cpu_index)
+static void expand_cpu(int cpu_index)
 {
-g_rw_lock_writer_unlock(&expand_array_lock);
-while (cpu_index >= last_exec->len) {
-GString *s = g_string_new(NULL);
-g_ptr_array_add(last_exec, s);
+g_rw_lock_writer_lock(&expand_array_lock);
+if (cpu_index >= num_cpus) {
+cpus = g_realloc_n(cpus, cpu_index + 1, sizeof(*cpus));
+while (cpu_index >= num_cpus) {
+cpus[num_cpus].last_exec = g_string_new(NULL);
+cpus[num_cpus].reg_buf = g_byte_array_new();
+num_cpus++;
+}
 }
 g_rw_lock_writer_unlock(&expand_array_lock);
 }
@@ -50,8 +65,8 @@ static void vcpu_mem(unsigned int cpu_index, 
qemu_plugin_meminfo_t info,
 
 /* Find vCPU in array */
 g_rw_lock_reader_lock(&expand_array_lock);
-g_assert(cpu_index < last_exec->len);
-s = g_ptr_array_index(last_exec, cpu_index);
+g_assert(cpu_index < num_cpus);
+s = cpus[cpu_index].last_exec;
 g_rw_lock_reader_unlock(&expand_array_lock);
 
 /* Indicate type of memory access */
@@ -77,28 +92,35 @@ static void vcpu_mem(unsigned int cpu_index, 
qemu_plugin_meminfo_t info,
  */
 static void vcpu_insn_exec(unsigned int cpu_index, void *udata)
 {
-GString *s;
+CPU cpu;
+int n;
+int i;
 
 /* Find or create vCPU in array */
 g_rw_lock_reader_lock(&expand_array_lock);
-if (cpu_index >= last_exec->len) {
-g_rw_lock_reader_unlock(&expand_array_lock);
-expand_last_exec(cpu_index);
-g_rw_lock_reader_lock(&expand_array_lock);
-}
-s = g_ptr_array_index(last_exec, cpu_index);
+cpu = cpus[cpu_index];
 g_rw_lock_reader_unlock(&expand_array_lock);
 
 /* Print previous instruction in cache */
-if (s->len) {
-qemu_plugin_outs(s->str);
+if (cpu.last_exec->len) {
+qemu_plugin_outs(cpu.last_exec->str);
 qemu_plugin_outs("\n");
 }
 
 /* Store new instruction in cache */
 /* vcpu_mem will add memory access information to last_exec */
-g_string_printf(s, "%u, ", cpu_index);
-g_string_append(s, (char *)udata);
+g_string_printf(cpu.last_exec, "%u, ", cpu_index);
+g_string_append(cpu.last_exec, (char *)udata);
+
+if (cpu.reg >= 0) {
+g_string_append(cpu.last_exec, ", reg,");
+n = qemu_plugin_read_register(cpu.reg_buf, cpu.reg);
+for (i = 0; i < n; i++) {
+g_string_append_printf(cpu.last_exec, " 0x%02X",
+   cpu.reg_buf->data[i]);
+}
+g_byte_array_set_size(cpu.reg_buf, 0);
+}
 }
 
 /**
@@ -167,8 +189,10 @@ static void vcpu_tb_trans(qemu_plugin_id_t id, struct 
qemu_plugin_tb *tb)
 

[RFC PATCH 24/24] contrib/plugins: Add cc plugin

2023-07-31 Thread Akihiko Odaki
This demonstrates how to write a plugin in C++.

Signed-off-by: Akihiko Odaki 
---
 docs/devel/tcg-plugins.rst |  8 
 configure  | 15 ---
 contrib/plugins/Makefile   |  5 +
 contrib/plugins/cc.cc  | 15 +++
 tests/tcg/Makefile.target  |  3 +++
 5 files changed, 43 insertions(+), 3 deletions(-)
 create mode 100644 contrib/plugins/cc.cc

diff --git a/docs/devel/tcg-plugins.rst b/docs/devel/tcg-plugins.rst
index c9f8b27590..0a11f8036c 100644
--- a/docs/devel/tcg-plugins.rst
+++ b/docs/devel/tcg-plugins.rst
@@ -584,6 +584,14 @@ The plugin has a number of arguments, all of them are 
optional:
   configuration arguments implies ``l2=on``.
   (default: N = 2097152 (2MB), B = 64, A = 16)
 
+- contrib/plugins/cc.cc
+
+cc plugin demonstrates how to write a plugin in C++. It simply outputs
+"hello, world" to the plugin log::
+
+  $ qemu-system-arm $(QEMU_ARGS) \
+-plugin ./contrib/plugins/libcc.so -d plugin
+
 API
 ---
 
diff --git a/configure b/configure
index 26ec5e4f54..0065b0dfe0 100755
--- a/configure
+++ b/configure
@@ -293,10 +293,18 @@ else
   cc="${CC-${cross_prefix}gcc}"
 fi
 
-if test -z "${CXX}${cross_prefix}"; then
-  cxx="c++"
+if test -n "${CXX+x}"; then
+  cxx="$CXX"
 else
-  cxx="${CXX-${cross_prefix}g++}"
+  if test -n "${cross_prefix}"; then
+cxx="${cross_prefix}g++"
+  else
+cxx="c++"
+  fi
+
+  if ! has "$cxx"; then
+cxx=
+  fi
 fi
 
 # Preferred ObjC compiler:
@@ -1702,6 +1710,7 @@ echo "MESON=$meson" >> $config_host_mak
 echo "NINJA=$ninja" >> $config_host_mak
 echo "PKG_CONFIG=${pkg_config}" >> $config_host_mak
 echo "CC=$cc" >> $config_host_mak
+echo "CXX=$cxx" >> $config_host_mak
 echo "EXESUF=$EXESUF" >> $config_host_mak
 
 # use included Linux headers
diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile
index b2b9db9f51..93d86b3d07 100644
--- a/contrib/plugins/Makefile
+++ b/contrib/plugins/Makefile
@@ -21,6 +21,9 @@ NAMES += lockstep
 NAMES += hwprofile
 NAMES += cache
 NAMES += drcov
+ifneq ($(CXX),)
+NAMES += cc
+endif
 
 SONAMES := $(addsuffix .so,$(addprefix lib,$(NAMES)))
 
@@ -31,6 +34,8 @@ CFLAGS += -fPIC -Wall
 CFLAGS += $(if $(CONFIG_DEBUG_TCG), -ggdb -O0)
 CFLAGS += -I$(SRC_PATH)/include/qemu
 
+CXXFLAGS := $(CFLAGS)
+
 all: $(SONAMES)
 
 %.o: %.c
diff --git a/contrib/plugins/cc.cc b/contrib/plugins/cc.cc
new file mode 100644
index 00..e7270d7adc
--- /dev/null
+++ b/contrib/plugins/cc.cc
@@ -0,0 +1,15 @@
+#include 
+
+extern "C" {
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
+   const qemu_info_t *info, int argc,
+   char **argv)
+{
+qemu_plugin_outs("hello, world\n");
+return 0;
+}
+
+};
diff --git a/tests/tcg/Makefile.target b/tests/tcg/Makefile.target
index 462289f47c..3d7837d3b8 100644
--- a/tests/tcg/Makefile.target
+++ b/tests/tcg/Makefile.target
@@ -149,6 +149,9 @@ PLUGIN_SRC=$(SRC_PATH)/tests/plugin
 PLUGIN_LIB=../../plugin
 VPATH+=$(PLUGIN_LIB)
 PLUGINS=$(patsubst %.c, lib%.so, $(notdir $(wildcard $(PLUGIN_SRC)/*.c)))
+ifneq ($(CXX),)
+PLUGINS+=$(patsubst %.cc, lib%.so, $(notdir $(wildcard $(PLUGIN_SRC)/*.cc)))
+endif
 
 # We need to ensure expand the run-plugin-TEST-with-PLUGIN
 # pre-requistes manually here as we can't use stems to handle it. We
-- 
2.41.0




[RFC PATCH 14/24] gdbstub: Add members to identify registers to GDBFeature

2023-07-31 Thread Akihiko Odaki
These members will be used to help plugins to identify registers.
The added members in instances of GDBFeature dynamically generated by
CPUs will be filled in later changes.

Signed-off-by: Akihiko Odaki 
---
 include/exec/gdbstub.h  |  2 ++
 scripts/feature_to_c.py | 14 +-
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/include/exec/gdbstub.h b/include/exec/gdbstub.h
index 9b3da5b257..6da4af9612 100644
--- a/include/exec/gdbstub.h
+++ b/include/exec/gdbstub.h
@@ -13,6 +13,8 @@
 typedef struct GDBFeature {
 const char *xmlname;
 const char *xml;
+const char *name;
+const char * const *regs;
 int num_regs;
 } GDBFeature;
 
diff --git a/scripts/feature_to_c.py b/scripts/feature_to_c.py
index 8eb8c81cf8..11b1bc05c9 100755
--- a/scripts/feature_to_c.py
+++ b/scripts/feature_to_c.py
@@ -46,7 +46,9 @@ def writeliteral(indent, bytes):
 sys.stderr.write(f'unexpected start tag: {element.tag}\n')
 exit(1)
 
+feature_name = element.attrib['name']
 regnum = 0
+regnames = []
 regnums = []
 tags = ['feature']
 for event, element in events:
@@ -63,6 +65,7 @@ def writeliteral(indent, bytes):
 if 'regnum' in element.attrib:
 regnum = int(element.attrib['regnum'])
 
+regnames.append(element.attrib['name'])
 regnums.append(regnum)
 regnum += 1
 
@@ -81,6 +84,15 @@ def writeliteral(indent, bytes):
 writeliteral(8, bytes(os.path.basename(input), 'utf-8'))
 sys.stdout.write(',\n')
 writeliteral(8, read)
-sys.stdout.write(f',\n{num_regs},\n}},\n')
+sys.stdout.write(',\n')
+writeliteral(8, bytes(feature_name, 'utf-8'))
+sys.stdout.write(',\n(const char * const []) {\n')
+
+for index, regname in enumerate(regnames):
+sys.stdout.write(f'[{regnums[index] - base_reg}] =\n')
+writeliteral(16, bytes(regname, 'utf-8'))
+sys.stdout.write(',\n')
+
+sys.stdout.write(f'}},\n{num_regs},\n}},\n')
 
 sys.stdout.write('{ NULL }\n};\n')
-- 
2.41.0




[RFC PATCH 18/24] hw/core/cpu: Add a parameter to gdb_read_register/gdb_write_register

2023-07-31 Thread Akihiko Odaki
gdbstub has a static variable named gdb_has_xml that tells if
workarounds for old GDB versions are required when manipulating
registers for GDB.

Now we are reusing the infrastructure to manipulate registers for
plugins. Plugins will not need these workarounds even when an old GDB
is attached. Converting the static variable to a function parameter
would allow to apply or not to apply workarounds depending on the
context.

This change adds the new parameter and replaces references to
gdb_has_xml with it. New code to pass different values to the functions
will be added later.

Signed-off-by: Akihiko Odaki 
---
 include/hw/core/cpu.h |  6 --
 target/alpha/cpu.h|  6 --
 target/arm/cpu.h  | 13 +
 target/avr/cpu.h  |  6 --
 target/cris/cpu.h |  9 ++---
 target/hexagon/internal.h |  6 --
 target/hppa/cpu.h |  6 --
 target/i386/cpu.h |  6 --
 target/loongarch/internals.h  |  6 --
 target/m68k/cpu.h |  6 --
 target/microblaze/cpu.h   |  6 --
 target/mips/internal.h|  6 --
 target/openrisc/cpu.h |  6 --
 target/ppc/cpu.h  | 12 
 target/riscv/cpu.h|  6 --
 target/rx/cpu.h   |  6 --
 target/s390x/s390x-internal.h |  6 --
 target/sh4/cpu.h  |  6 --
 target/sparc/cpu.h|  6 --
 target/tricore/cpu.h  |  6 --
 target/xtensa/cpu.h   |  6 --
 gdbstub/gdbstub.c |  4 ++--
 hw/core/cpu-common.c  |  6 --
 target/alpha/gdbstub.c|  6 --
 target/arm/gdbstub.c  | 14 --
 target/arm/gdbstub64.c|  6 --
 target/avr/gdbstub.c  |  6 --
 target/cris/gdbstub.c |  9 ++---
 target/hexagon/gdbstub.c  |  6 --
 target/hppa/gdbstub.c |  6 --
 target/i386/gdbstub.c | 10 +-
 target/loongarch/gdbstub.c|  6 --
 target/m68k/gdbstub.c |  6 --
 target/microblaze/gdbstub.c   |  6 --
 target/mips/gdbstub.c |  6 --
 target/nios2/cpu.c|  6 --
 target/openrisc/gdbstub.c |  6 --
 target/ppc/gdbstub.c  | 22 +-
 target/riscv/gdbstub.c|  6 --
 target/rx/gdbstub.c   |  6 --
 target/s390x/gdbstub.c|  6 --
 target/sh4/gdbstub.c  |  6 --
 target/sparc/gdbstub.c|  6 --
 target/tricore/gdbstub.c  |  6 --
 target/xtensa/gdbstub.c   |  6 --
 45 files changed, 205 insertions(+), 110 deletions(-)

diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 9323d26f84..9428e57e7c 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -157,8 +157,10 @@ struct CPUClass {
 int64_t (*get_arch_id)(CPUState *cpu);
 void (*set_pc)(CPUState *cpu, vaddr value);
 vaddr (*get_pc)(CPUState *cpu);
-int (*gdb_read_register)(CPUState *cpu, GByteArray *buf, int reg);
-int (*gdb_write_register)(CPUState *cpu, uint8_t *buf, int reg);
+int (*gdb_read_register)(CPUState *cpu, GByteArray *buf, int reg,
+ bool has_xml);
+int (*gdb_write_register)(CPUState *cpu, uint8_t *buf, int reg,
+  bool has_xml);
 vaddr (*gdb_adjust_breakpoint)(CPUState *cpu, vaddr addr);
 
 const GDBFeature *gdb_core_feature;
diff --git a/target/alpha/cpu.h b/target/alpha/cpu.h
index 13306665af..46bd9bc37b 100644
--- a/target/alpha/cpu.h
+++ b/target/alpha/cpu.h
@@ -279,8 +279,10 @@ bool alpha_cpu_exec_interrupt(CPUState *cpu, int int_req);
 hwaddr alpha_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
 #endif /* !CONFIG_USER_ONLY */
 void alpha_cpu_dump_state(CPUState *cs, FILE *f, int flags);
-int alpha_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
-int alpha_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
+int alpha_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg,
+bool has_xml);
+int alpha_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg,
+ bool has_xml);
 
 #define cpu_list alpha_cpu_list
 
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 09bf82034d..3dfa29c3aa 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -1128,8 +1128,11 @@ hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cpu, 
vaddr addr,
  MemTxAttrs *attrs);
 #endif /* !CONFIG_USER_ONLY */
 
-int arm_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
-int arm_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
+int arm_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg,
+  bool has_xml);
+
+int arm_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg,
+   bool has_xml);
 
 int arm_cpu_write_elf64_note(WriteCor

[RFC PATCH 21/24] plugins: Allow to read registers

2023-07-31 Thread Akihiko Odaki
It is based on GDB protocol to ensure interface stability.

The timing of the vcpu init hook is also changed so that the hook will
get called after GDB features are initialized.

Signed-off-by: Akihiko Odaki 
---
 include/qemu/qemu-plugin.h   | 65 ++--
 cpu.c| 11 --
 hw/core/cpu-common.c | 10 ++
 plugins/api.c| 40 ++
 plugins/qemu-plugins.symbols |  2 ++
 5 files changed, 114 insertions(+), 14 deletions(-)

diff --git a/include/qemu/qemu-plugin.h b/include/qemu/qemu-plugin.h
index 50a9957279..214b12bfd6 100644
--- a/include/qemu/qemu-plugin.h
+++ b/include/qemu/qemu-plugin.h
@@ -11,6 +11,7 @@
 #ifndef QEMU_QEMU_PLUGIN_H
 #define QEMU_QEMU_PLUGIN_H
 
+#include 
 #include 
 #include 
 #include 
@@ -51,7 +52,7 @@ typedef uint64_t qemu_plugin_id_t;
 
 extern QEMU_PLUGIN_EXPORT int qemu_plugin_version;
 
-#define QEMU_PLUGIN_VERSION 1
+#define QEMU_PLUGIN_VERSION 2
 
 /**
  * struct qemu_info_t - system information for plugins
@@ -218,8 +219,8 @@ struct qemu_plugin_insn;
  * @QEMU_PLUGIN_CB_R_REGS: callback reads the CPU's regs
  * @QEMU_PLUGIN_CB_RW_REGS: callback reads and writes the CPU's regs
  *
- * Note: currently unused, plugins cannot read or change system
- * register state.
+ * Note: currently QEMU_PLUGIN_CB_RW_REGS is unused, plugins cannot change
+ * system register state.
  */
 enum qemu_plugin_cb_flags {
 QEMU_PLUGIN_CB_NO_REGS,
@@ -664,4 +665,62 @@ uint64_t qemu_plugin_end_code(void);
  */
 uint64_t qemu_plugin_entry_code(void);
 
+/**
+ * struct qemu_plugin_register_file_t - register information
+ *
+ * This structure identifies registers. The identifiers included in this
+ * structure are identical with names used in GDB's standard target features
+ * with some extensions. For details, see:
+ * https://sourceware.org/gdb/onlinedocs/gdb/Standard-Target-Features.html
+ *
+ * A register is uniquely identified with the combination of a feature name
+ * and a register name or a register number. It is recommended to derive
+ * register numbers from feature names and register names each time a new vcpu
+ * starts.
+ *
+ * To derive the register number from a feature name and a register name,
+ * first look up qemu_plugin_register_file_t with the feature name, and then
+ * look up the register name in its @regs. The sum of the @base_reg and the
+ * index in the @reg is the register number.
+ *
+ * Note that @regs may have holes; some elements of @regs may be NULL.
+ */
+typedef struct qemu_plugin_register_file_t {
+/** @name: feature name */
+const char *name;
+/** @regs: register names */
+const char * const *regs;
+/** @base_reg: the base identified number */
+int base_reg;
+/** @num_regs: the number of elements in @regs */
+int num_regs;
+} qemu_plugin_register_file_t;
+
+/**
+ * qemu_plugin_get_register_files() - returns register information
+ *
+ * @vcpu_index: the index of the vcpu context
+ * @size: the pointer to the variable to hold the number of returned elements
+ *
+ * Returns an array of qemu_plugin_register_file_t. The user should g_free()
+ * the array once no longer needed.
+ */
+qemu_plugin_register_file_t *
+qemu_plugin_get_register_files(unsigned int vcpu_index, int *size);
+
+/**
+ * qemu_plugin_read_register() - read register
+ *
+ * @buf: the byte array to append the read register content to.
+ * @reg: the register identifier determined with
+ *   qemu_plugin_get_register_files().
+ *
+ * This function is only available in a context that register read access is
+ * explicitly requested.
+ *
+ * Returns the size of the read register. The content of @buf is in target byte
+ * order.
+ */
+int qemu_plugin_read_register(GByteArray *buf, int reg);
+
 #endif /* QEMU_QEMU_PLUGIN_H */
diff --git a/cpu.c b/cpu.c
index 1c948d1161..2552c85249 100644
--- a/cpu.c
+++ b/cpu.c
@@ -42,7 +42,6 @@
 #include "hw/core/accel-cpu.h"
 #include "trace/trace-root.h"
 #include "qemu/accel.h"
-#include "qemu/plugin.h"
 
 uintptr_t qemu_host_page_size;
 intptr_t qemu_host_page_mask;
@@ -148,11 +147,6 @@ void cpu_exec_realizefn(CPUState *cpu, Error **errp)
 /* Wait until cpu initialization complete before exposing cpu. */
 cpu_list_add(cpu);
 
-/* Plugin initialization must wait until cpu_index assigned. */
-if (tcg_enabled()) {
-qemu_plugin_vcpu_init_hook(cpu);
-}
-
 #ifdef CONFIG_USER_ONLY
 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL ||
qdev_get_vmsd(DEVICE(cpu))->unmigratable);
@@ -179,11 +173,6 @@ void cpu_exec_unrealizefn(CPUState *cpu)
 }
 #endif
 
-/* Call the plugin hook before clearing cpu->cpu_index in cpu_list_remove 
*/
-if (tcg_enabled()) {
-qemu_plugin_vcpu_exit_hook(cpu);
-}
-
 cpu_list_remove(cpu);
 /*
  * Now that the vCPU has been removed from the RCU list, we can call
diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c
index 549f52f46f..e06a70007a 100644
--- a/hw/

[RFC PATCH 13/24] hw/core/cpu: Remove gdb_get_dynamic_xml member

2023-07-31 Thread Akihiko Odaki
This function is no longer used.

Signed-off-by: Akihiko Odaki 
---
 include/hw/core/cpu.h |  4 
 target/arm/cpu.h  |  6 --
 target/ppc/cpu.h  |  1 -
 target/arm/cpu.c  |  1 -
 target/arm/gdbstub.c  | 18 --
 target/ppc/cpu_init.c |  3 ---
 target/ppc/gdbstub.c  | 10 --
 target/riscv/cpu.c| 14 --
 8 files changed, 57 deletions(-)

diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 84219c1885..9323d26f84 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -133,9 +133,6 @@ struct SysemuCPUOps;
  *   before the insn which triggers a watchpoint rather than after it.
  * @gdb_arch_name: Optional callback that returns the architecture name known
  * to GDB. The caller must free the returned string with g_free.
- * @gdb_get_dynamic_xml: Callback to return dynamically generated XML for the
- *   gdb stub. Returns a pointer to the XML contents for the specified XML file
- *   or NULL if the CPU doesn't have a dynamically generated content for it.
  * @disas_set_info: Setup architecture specific components of disassembly info
  * @adjust_watchpoint_address: Perform a target-specific adjustment to an
  * address before attempting to match it against watchpoints.
@@ -166,7 +163,6 @@ struct CPUClass {
 
 const GDBFeature *gdb_core_feature;
 gchar * (*gdb_arch_name)(CPUState *cpu);
-const char * (*gdb_get_dynamic_xml)(CPUState *cpu, const char *xmlname);
 
 void (*disas_set_info)(CPUState *cpu, disassemble_info *info);
 
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index d6c2378d05..09bf82034d 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -1131,12 +1131,6 @@ hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cpu, 
vaddr addr,
 int arm_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
 int arm_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
 
-/* Returns the dynamically generated XML for the gdb stub.
- * Returns a pointer to the XML contents for the specified XML file or NULL
- * if the XML name doesn't match the predefined one.
- */
-const char *arm_gdb_get_dynamic_xml(CPUState *cpu, const char *xmlname);
-
 int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
  int cpuid, DumpState *s);
 int arm_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs,
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 5f251bdffe..3dc6e545e3 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1382,7 +1382,6 @@ int ppc_cpu_gdb_write_register_apple(CPUState *cpu, 
uint8_t *buf, int reg);
 #ifndef CONFIG_USER_ONLY
 hwaddr ppc_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
 void ppc_gdb_gen_spr_feature(PowerPCCPU *cpu);
-const char *ppc_gdb_get_dynamic_xml(CPUState *cs, const char *xml_name);
 #endif
 int ppc64_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
int cpuid, DumpState *s);
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index a206ab6b1b..f51612070d 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -2354,7 +2354,6 @@ static void arm_cpu_class_init(ObjectClass *oc, void 
*data)
 cc->sysemu_ops = &arm_sysemu_ops;
 #endif
 cc->gdb_arch_name = arm_gdb_arch_name;
-cc->gdb_get_dynamic_xml = arm_gdb_get_dynamic_xml;
 cc->gdb_stop_before_watchpoint = true;
 cc->disas_set_info = arm_disas_set_info;
 
diff --git a/target/arm/gdbstub.c b/target/arm/gdbstub.c
index ab4ffe6264..100a6eed15 100644
--- a/target/arm/gdbstub.c
+++ b/target/arm/gdbstub.c
@@ -492,24 +492,6 @@ static GDBFeature 
*arm_gen_dynamic_m_secextreg_feature(CPUState *cs,
 #endif
 #endif /* CONFIG_TCG */
 
-const char *arm_gdb_get_dynamic_xml(CPUState *cs, const char *xmlname)
-{
-ARMCPU *cpu = ARM_CPU(cs);
-
-if (strcmp(xmlname, "system-registers.xml") == 0) {
-return cpu->dyn_sysreg_feature.desc.xml;
-} else if (strcmp(xmlname, "sve-registers.xml") == 0) {
-return cpu->dyn_svereg_feature.desc.xml;
-} else if (strcmp(xmlname, "arm-m-system.xml") == 0) {
-return cpu->dyn_m_systemreg_feature.desc.xml;
-#ifndef CONFIG_USER_ONLY
-} else if (strcmp(xmlname, "arm-m-secext.xml") == 0) {
-return cpu->dyn_m_secextreg_feature.desc.xml;
-#endif
-}
-return NULL;
-}
-
 void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu)
 {
 CPUState *cs = CPU(cpu);
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index 938cd2b7e1..a3153c4e9f 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -7370,9 +7370,6 @@ static void ppc_cpu_class_init(ObjectClass *oc, void 
*data)
 #endif
 
 cc->gdb_num_core_regs = 71;
-#ifndef CONFIG_USER_ONLY
-cc->gdb_get_dynamic_xml = ppc_gdb_get_dynamic_xml;
-#endif
 #ifdef USE_APPLE_GDB
 cc->gdb_read_register = ppc_cpu_gdb_read_register_apple;
 cc->gdb_write_register = ppc_cpu_gdb_write_register_apple;
diff --git a/target/ppc/gdbstub.c b/target/ppc/gdbstub.c
index a0da320e66..19c4935260 1006

[RFC PATCH 20/24] gdbstub: Expose functions to read registers

2023-07-31 Thread Akihiko Odaki
gdb_foreach_feature() enumerates features that are useful to identify
registers. gdb_read_register() actually reads registers.

Signed-off-by: Akihiko Odaki 
---
 include/exec/gdbstub.h |  6 ++
 gdbstub/gdbstub.c  | 38 ++
 2 files changed, 36 insertions(+), 8 deletions(-)

diff --git a/include/exec/gdbstub.h b/include/exec/gdbstub.h
index cbd1e6ead4..5e5789f7bb 100644
--- a/include/exec/gdbstub.h
+++ b/include/exec/gdbstub.h
@@ -38,6 +38,12 @@ int gdbserver_start(const char *port_or_device);
 
 const GDBFeature *gdb_find_static_feature(const char *xmlname);
 
+void gdb_foreach_feature(CPUState *cpu,
+ void (* callback)(void *, const GDBFeature *, int),
+ void *opaque);
+
+int gdb_read_register(CPUState *cpu, GByteArray *buf, int reg, bool has_xml);
+
 void gdb_set_stop_cpu(CPUState *cpu);
 
 /* in gdbstub-xml.c, generated by scripts/feature_to_c.py */
diff --git a/gdbstub/gdbstub.c b/gdbstub/gdbstub.c
index caef6aabab..5f76ff7271 100644
--- a/gdbstub/gdbstub.c
+++ b/gdbstub/gdbstub.c
@@ -416,14 +416,32 @@ const GDBFeature *gdb_find_static_feature(const char 
*xmlname)
 return NULL;
 }
 
-static int gdb_read_register(CPUState *cpu, GByteArray *buf, int reg)
+void gdb_foreach_feature(CPUState *cpu,
+ void (* callback)(void *, const GDBFeature *, int),
+ void *opaque)
+{
+CPUClass *cc = CPU_GET_CLASS(cpu);
+GDBRegisterState *r;
+
+if (!cc->gdb_core_feature) {
+return;
+}
+
+callback(opaque, cc->gdb_core_feature, 0);
+
+for (r = cpu->gdb_regs; r; r = r->next) {
+callback(opaque, r->feature, r->base_reg);
+}
+}
+
+int gdb_read_register(CPUState *cpu, GByteArray *buf, int reg, bool has_xml)
 {
 CPUClass *cc = CPU_GET_CLASS(cpu);
 CPUArchState *env = cpu->env_ptr;
 GDBRegisterState *r;
 
 if (reg < cc->gdb_num_core_regs) {
-return cc->gdb_read_register(cpu, buf, reg, gdb_has_xml);
+return cc->gdb_read_register(cpu, buf, reg, has_xml);
 }
 
 for (r = cpu->gdb_regs; r; r = r->next) {
@@ -434,14 +452,15 @@ static int gdb_read_register(CPUState *cpu, GByteArray 
*buf, int reg)
 return 0;
 }
 
-static int gdb_write_register(CPUState *cpu, uint8_t *mem_buf, int reg)
+static int gdb_write_register(CPUState *cpu, uint8_t *mem_buf, int reg,
+  bool has_xml)
 {
 CPUClass *cc = CPU_GET_CLASS(cpu);
 CPUArchState *env = cpu->env_ptr;
 GDBRegisterState *r;
 
 if (reg < cc->gdb_num_core_regs) {
-return cc->gdb_write_register(cpu, mem_buf, reg, gdb_has_xml);
+return cc->gdb_write_register(cpu, mem_buf, reg, has_xml);
 }
 
 for (r = cpu->gdb_regs; r; r = r->next) {
@@ -1067,7 +1086,7 @@ static void handle_set_reg(GArray *params, void *user_ctx)
 reg_size = strlen(get_param(params, 1)->data) / 2;
 gdb_hextomem(gdbserver_state.mem_buf, get_param(params, 1)->data, 
reg_size);
 gdb_write_register(gdbserver_state.g_cpu, gdbserver_state.mem_buf->data,
-   get_param(params, 0)->val_ull);
+   get_param(params, 0)->val_ull, gdb_has_xml);
 gdb_put_packet("OK");
 }
 
@@ -1087,7 +1106,8 @@ static void handle_get_reg(GArray *params, void *user_ctx)
 
 reg_size = gdb_read_register(gdbserver_state.g_cpu,
  gdbserver_state.mem_buf,
- get_param(params, 0)->val_ull);
+ get_param(params, 0)->val_ull,
+ gdb_has_xml);
 if (!reg_size) {
 gdb_put_packet("E14");
 return;
@@ -1174,7 +1194,8 @@ static void handle_write_all_regs(GArray *params, void 
*user_ctx)
 for (reg_id = 0;
  reg_id < gdbserver_state.g_cpu->gdb_num_g_regs && len > 0;
  reg_id++) {
-reg_size = gdb_write_register(gdbserver_state.g_cpu, registers, 
reg_id);
+reg_size = gdb_write_register(gdbserver_state.g_cpu, registers, reg_id,
+  gdb_has_xml);
 len -= reg_size;
 registers += reg_size;
 }
@@ -1192,7 +1213,8 @@ static void handle_read_all_regs(GArray *params, void 
*user_ctx)
 for (reg_id = 0; reg_id < gdbserver_state.g_cpu->gdb_num_g_regs; reg_id++) 
{
 len += gdb_read_register(gdbserver_state.g_cpu,
  gdbserver_state.mem_buf,
- reg_id);
+ reg_id,
+ gdb_has_xml);
 }
 g_assert(len == gdbserver_state.mem_buf->len);
 
-- 
2.41.0




[RFC PATCH 17/24] target/riscv: Fill new members of GDBFeature

2023-07-31 Thread Akihiko Odaki
These members will be used to help plugins to identify registers.

Signed-off-by: Akihiko Odaki 
---
 target/riscv/gdbstub.c | 21 ++---
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/target/riscv/gdbstub.c b/target/riscv/gdbstub.c
index 224c69ea99..b3d4d4de3e 100644
--- a/target/riscv/gdbstub.c
+++ b/target/riscv/gdbstub.c
@@ -217,6 +217,7 @@ static GDBFeature *riscv_gen_dynamic_csr_feature(CPUState 
*cs, int base_reg)
 RISCVCPU *cpu = RISCV_CPU(cs);
 CPURISCVState *env = &cpu->env;
 GString *s = g_string_new(NULL);
+const char **regs = g_new(const char *, CSR_TABLE_SIZE);
 riscv_csr_predicate_fn predicate;
 int bitsize = 16 << env->misa_mxl_max;
 int i;
@@ -240,11 +241,10 @@ static GDBFeature *riscv_gen_dynamic_csr_feature(CPUState 
*cs, int base_reg)
 }
 predicate = csr_ops[i].predicate;
 if (predicate && (predicate(env, i) == RISCV_EXCP_NONE)) {
-if (csr_ops[i].name) {
-g_string_append_printf(s, "", base_reg + i);
 }
@@ -252,6 +252,8 @@ static GDBFeature *riscv_gen_dynamic_csr_feature(CPUState 
*cs, int base_reg)
 
 g_string_append_printf(s, "");
 
+cpu->dyn_csr_feature.name = "org.gnu.gdb.riscv.csr";
+cpu->dyn_csr_feature.regs = regs;
 cpu->dyn_csr_feature.num_regs = CSR_TABLE_SIZE;
 cpu->dyn_csr_feature.xmlname = "riscv-csr.xml";
 cpu->dyn_csr_feature.xml = g_string_free(s, false);
@@ -268,10 +270,12 @@ static GDBFeature 
*ricsv_gen_dynamic_vector_feature(CPUState *cs, int base_reg)
 RISCVCPU *cpu = RISCV_CPU(cs);
 GString *s = g_string_new(NULL);
 g_autoptr(GString) ts = g_string_new("");
+const char **regs;
 int reg_width = cpu->cfg.vlen;
 int i;
 
 cpu->dyn_vreg_feature.num_regs = 32;
+regs = g_new(const char *, cpu->dyn_vreg_feature.num_regs);
 
 g_string_printf(s, "");
 g_string_append_printf(s, "");
@@ -297,15 +301,18 @@ static GDBFeature 
*ricsv_gen_dynamic_vector_feature(CPUState *cs, int base_reg)
 
 /* Define vector registers */
 for (i = 0; i < cpu->dyn_vreg_feature.num_regs; i++) {
+regs[i] = g_strdup_printf("v%d", i);
 g_string_append_printf(s,
-   "",
-   i, reg_width, base_reg++);
+   regs[i], reg_width, base_reg++);
 }
 
 g_string_append_printf(s, "");
 
+cpu->dyn_vreg_feature.name = "org.gnu.gdb.riscv.vector";
+cpu->dyn_vreg_feature.regs = regs;
 cpu->dyn_vreg_feature.xmlname = "riscv-vector.xml";
 cpu->dyn_vreg_feature.xml = g_string_free(s, false);
 return &cpu->dyn_vreg_feature;
-- 
2.41.0




[RFC PATCH 23/24] plugins: Support C++

2023-07-31 Thread Akihiko Odaki
Make qemu-plugin.h consumable for C++ platform.

Signed-off-by: Akihiko Odaki 
---
 include/qemu/qemu-plugin.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/include/qemu/qemu-plugin.h b/include/qemu/qemu-plugin.h
index 214b12bfd6..8637e3d8cf 100644
--- a/include/qemu/qemu-plugin.h
+++ b/include/qemu/qemu-plugin.h
@@ -16,6 +16,8 @@
 #include 
 #include 
 
+G_BEGIN_DECLS
+
 /*
  * For best performance, build the plugin with -fvisibility=hidden so that
  * QEMU_PLUGIN_LOCAL is implicit. Then, just mark qemu_plugin_install with
@@ -723,4 +725,6 @@ qemu_plugin_get_register_files(unsigned int vcpu_index, int 
*size);
  */
 int qemu_plugin_read_register(GByteArray *buf, int reg);
 
+G_END_DECLS
+
 #endif /* QEMU_QEMU_PLUGIN_H */
-- 
2.41.0




[RFC PATCH 09/24] target/riscv: Use GDBFeature for dynamic XML

2023-07-31 Thread Akihiko Odaki
In preparation for a change to use GDBFeature as a parameter of
gdb_register_coprocessor(), convert the internal representation of
dynamic feature from plain XML to GDBFeature.

Signed-off-by: Akihiko Odaki 
---
 target/riscv/cpu.h |  4 ++--
 target/riscv/cpu.c |  4 ++--
 target/riscv/gdbstub.c | 25 ++---
 3 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 6ea22e0eea..f67751d5b7 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -391,8 +391,8 @@ struct ArchCPU {
 CPUNegativeOffsetState neg;
 CPURISCVState env;
 
-char *dyn_csr_xml;
-char *dyn_vreg_xml;
+GDBFeature dyn_csr_feature;
+GDBFeature dyn_vreg_feature;
 
 /* Configuration Settings */
 RISCVCPUConfig cfg;
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 36de35270d..ceca40cdd9 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -1962,9 +1962,9 @@ static const char *riscv_gdb_get_dynamic_xml(CPUState 
*cs, const char *xmlname)
 RISCVCPU *cpu = RISCV_CPU(cs);
 
 if (strcmp(xmlname, "riscv-csr.xml") == 0) {
-return cpu->dyn_csr_xml;
+return cpu->dyn_csr_feature.xml;
 } else if (strcmp(xmlname, "riscv-vector.xml") == 0) {
-return cpu->dyn_vreg_xml;
+return cpu->dyn_vreg_feature.xml;
 }
 
 return NULL;
diff --git a/target/riscv/gdbstub.c b/target/riscv/gdbstub.c
index 524bede865..70c60ad8b1 100644
--- a/target/riscv/gdbstub.c
+++ b/target/riscv/gdbstub.c
@@ -212,7 +212,7 @@ static int riscv_gdb_set_virtual(CPURISCVState *cs, uint8_t 
*mem_buf, int n)
 return 0;
 }
 
-static int riscv_gen_dynamic_csr_xml(CPUState *cs, int base_reg)
+static GDBFeature *riscv_gen_dynamic_csr_feature(CPUState *cs, int base_reg)
 {
 RISCVCPU *cpu = RISCV_CPU(cs);
 CPURISCVState *env = &cpu->env;
@@ -252,24 +252,27 @@ static int riscv_gen_dynamic_csr_xml(CPUState *cs, int 
base_reg)
 
 g_string_append_printf(s, "");
 
-cpu->dyn_csr_xml = g_string_free(s, false);
+cpu->dyn_csr_feature.num_regs = CSR_TABLE_SIZE;
+cpu->dyn_csr_feature.xmlname = "riscv-csr.xml";
+cpu->dyn_csr_feature.xml = g_string_free(s, false);
 
 #if !defined(CONFIG_USER_ONLY)
 env->debugger = false;
 #endif
 
-return CSR_TABLE_SIZE;
+return &cpu->dyn_csr_feature;
 }
 
-static int ricsv_gen_dynamic_vector_xml(CPUState *cs, int base_reg)
+static GDBFeature *ricsv_gen_dynamic_vector_feature(CPUState *cs, int base_reg)
 {
 RISCVCPU *cpu = RISCV_CPU(cs);
 GString *s = g_string_new(NULL);
 g_autoptr(GString) ts = g_string_new("");
 int reg_width = cpu->cfg.vlen;
-int num_regs = 0;
 int i;
 
+cpu->dyn_vreg_feature.num_regs = 32;
+
 g_string_printf(s, "");
 g_string_append_printf(s, "");
 g_string_append_printf(s, "");
@@ -293,19 +296,19 @@ static int ricsv_gen_dynamic_vector_xml(CPUState *cs, int 
base_reg)
 g_string_append(s, "");
 
 /* Define vector registers */
-for (i = 0; i < 32; i++) {
+for (i = 0; i < cpu->dyn_vreg_feature.num_regs; i++) {
 g_string_append_printf(s,
"",
i, reg_width, base_reg++);
-num_regs++;
 }
 
 g_string_append_printf(s, "");
 
-cpu->dyn_vreg_xml = g_string_free(s, false);
-return num_regs;
+cpu->dyn_vreg_feature.xmlname = "riscv-vector.xml";
+cpu->dyn_vreg_feature.xml = g_string_free(s, false);
+return &cpu->dyn_vreg_feature;
 }
 
 void riscv_cpu_register_gdb_regs_for_features(CPUState *cs)
@@ -323,7 +326,7 @@ void riscv_cpu_register_gdb_regs_for_features(CPUState *cs)
 int base_reg = cs->gdb_num_regs;
 gdb_register_coprocessor(cs, riscv_gdb_get_vector,
  riscv_gdb_set_vector,
- ricsv_gen_dynamic_vector_xml(cs, base_reg),
+ ricsv_gen_dynamic_vector_feature(cs, 
base_reg)->num_regs,
  "riscv-vector.xml", 0);
 }
 switch (env->misa_mxl_max) {
@@ -345,7 +348,7 @@ void riscv_cpu_register_gdb_regs_for_features(CPUState *cs)
 if (cpu->cfg.ext_icsr) {
 int base_reg = cs->gdb_num_regs;
 gdb_register_coprocessor(cs, riscv_gdb_get_csr, riscv_gdb_set_csr,
- riscv_gen_dynamic_csr_xml(cs, base_reg),
+ riscv_gen_dynamic_csr_feature(cs, 
base_reg)->num_regs,
  "riscv-csr.xml", 0);
 }
 }
-- 
2.41.0




[RFC PATCH 19/24] gdbstub: Hide gdb_has_xml

2023-07-31 Thread Akihiko Odaki
gdb_has_xml is no longer referenced by the other components.

Signed-off-by: Akihiko Odaki 
---
 gdbstub/internals.h| 8 
 include/exec/gdbstub.h | 8 
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/gdbstub/internals.h b/gdbstub/internals.h
index f2b46cce41..92f5ce8cbb 100644
--- a/gdbstub/internals.h
+++ b/gdbstub/internals.h
@@ -234,4 +234,12 @@ void gdb_breakpoint_remove_all(CPUState *cs);
 int gdb_target_memory_rw_debug(CPUState *cs, hwaddr addr,
uint8_t *buf, int len, bool is_write);
 
+/**
+ * gdb_has_xml:
+ * This is an ugly hack to cope with both new and old gdb.
+ * If gdb sends qXfer:features:read then assume we're talking to a newish
+ * gdb that understands target descriptions.
+ */
+extern bool gdb_has_xml;
+
 #endif /* GDBSTUB_INTERNALS_H */
diff --git a/include/exec/gdbstub.h b/include/exec/gdbstub.h
index 6da4af9612..cbd1e6ead4 100644
--- a/include/exec/gdbstub.h
+++ b/include/exec/gdbstub.h
@@ -40,14 +40,6 @@ const GDBFeature *gdb_find_static_feature(const char 
*xmlname);
 
 void gdb_set_stop_cpu(CPUState *cpu);
 
-/**
- * gdb_has_xml:
- * This is an ugly hack to cope with both new and old gdb.
- * If gdb sends qXfer:features:read then assume we're talking to a newish
- * gdb that understands target descriptions.
- */
-extern bool gdb_has_xml;
-
 /* in gdbstub-xml.c, generated by scripts/feature_to_c.py */
 extern const GDBFeature gdb_features[];
 
-- 
2.41.0




[PATCH v2 0/2] Accompany -nostdlib with -fno-stack-protector

2023-07-31 Thread Akihiko Odaki
A build of GCC 13.2 will have stack protector enabled by default if it was
configured with --enable-default-ssp option. For such a compiler, it is
necessary to explicitly disable stack protector when linking without
standard libraries.

This is a tree-wide change to add -fno-stack-protector where -nostdlib is
present.

V1 -> V2:
  Dropped changes for pc-bios as they already had the option. (Thomas Huth)
  Make the change for softmmu tests in a central place. (Thomas Huth)

Akihiko Odaki (2):
  tests/migration: Add -fno-stack-protector
  tests/tcg: Add -fno-stack-protector

 tests/tcg/mips/hello-mips.c   | 4 ++--
 tests/migration/s390x/Makefile| 4 ++--
 tests/tcg/Makefile.target | 2 +-
 tests/tcg/aarch64/Makefile.target | 2 +-
 tests/tcg/arm/Makefile.target | 2 +-
 tests/tcg/cris/Makefile.target| 2 +-
 tests/tcg/hexagon/Makefile.target | 2 +-
 tests/tcg/i386/Makefile.target| 2 +-
 tests/tcg/minilib/Makefile.target | 2 +-
 tests/tcg/mips/Makefile.target| 2 +-
 10 files changed, 12 insertions(+), 12 deletions(-)

-- 
2.41.0




Re: [RFC PATCH 5/6] include/qemu/compiler: Fix problem with gcc_struct and Clang

2023-07-31 Thread Thomas Huth

On 28/07/2023 17.13, Peter Maydell wrote:

On Fri, 28 Jul 2023 at 15:28, Thomas Huth  wrote:


Clang on Windows does not seem to know the "gcc_struct" attribute
and emits a warning when we try to use it. Add an additional check
here with __has_attribute() to avoid this problem.

Signed-off-by: Thomas Huth 
---
  include/qemu/compiler.h | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/qemu/compiler.h b/include/qemu/compiler.h
index a309f90c76..5065b4447c 100644
--- a/include/qemu/compiler.h
+++ b/include/qemu/compiler.h
@@ -22,7 +22,7 @@
  #define QEMU_EXTERN_C extern
  #endif

-#if defined(_WIN32) && (defined(__x86_64__) || defined(__i386__))
+#if defined(_WIN32) && (defined(__x86_64__) || defined(__i386__)) && 
!defined(__clang__)
  # define QEMU_PACKED __attribute__((gcc_struct, packed))
  #else
  # define QEMU_PACKED __attribute__((packed))


I'm not sure about this. The idea of QEMU_PACKED is that
it's supposed to give you the same struct layout
regardless of compiler. With this change it no longer
does that, and there's no compile-time guard against
using something in a packed struct that has a different
layout on Windows clang vs everything else.

If it was OK to use plain attribute(packed) we wouldn't
need the ifdef at all because we could use it on GCC too.


I still haven't quite grasped whether this attribute just affects structures 
with bitfields in it, or whether it could also affect other structures 
without bitfields.


Looking at https://gcc.gnu.org/onlinedocs/gcc/x86-Type-Attributes.html and 
https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html#index-mms-bitfields , it 
sounds like it only affects structs with bitfields, unless you specify an 
"aligned(x)" attribute, too?


Anyway, using bitfields in structs for exchanging data with the guest is 
just way too error-prone, as you can see in the discussion about that 
VTD_IR_TableEntry in my other patch. We should maybe advise against 
bitfields in our coding style and point people to registerfields.h instead 
for new code? ... so that we use QEMU_PACKED mainly for legacy code. Would 
it then be OK for you, Peter, to go on with this approach?


Or do you see another possibility how we could fix that timeout problem in 
the 64-bit MSYS2 job? Still switching to clang, but compiling with 
--extra-cflags="-Wno-unknown-attributes" maybe?


 Thomas




[PATCH v2 1/2] tests/migration: Add -fno-stack-protector

2023-07-31 Thread Akihiko Odaki
A build of GCC 13.2 will have stack protector enabled by default if it
was configured with --enable-default-ssp option. For such a compiler,
it is necessary to explicitly disable stack protector when linking
without standard libraries.

Signed-off-by: Akihiko Odaki 
Reviewed-by: Juan Quintela 
Reviewed-by: Thomas Huth 
---
 tests/migration/s390x/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/migration/s390x/Makefile b/tests/migration/s390x/Makefile
index 6393c3e5b9..6671de2efc 100644
--- a/tests/migration/s390x/Makefile
+++ b/tests/migration/s390x/Makefile
@@ -6,8 +6,8 @@ all: a-b-bios.h
 fwdir=../../../pc-bios/s390-ccw
 
 CFLAGS+=-ffreestanding -fno-delete-null-pointer-checks -fPIE -Os \
-   -msoft-float -march=z900 -fno-asynchronous-unwind-tables -Wl,-pie \
-   -Wl,--build-id=none -nostdlib
+   -msoft-float -march=z900 -fno-asynchronous-unwind-tables \
+   -fno-stack-protector -Wl,-pie -Wl,--build-id=none -nostdlib
 
 a-b-bios.h: s390x.elf
echo "$$__note" > header.tmp
-- 
2.41.0




[PATCH v2 2/2] tests/tcg: Add -fno-stack-protector

2023-07-31 Thread Akihiko Odaki
A build of GCC 13.2 will have stack protector enabled by default if it
was configured with --enable-default-ssp option. For such a compiler,
it is necessary to explicitly disable stack protector when linking
without standard libraries.

Signed-off-by: Akihiko Odaki 
---
 tests/tcg/mips/hello-mips.c   | 4 ++--
 tests/tcg/Makefile.target | 2 +-
 tests/tcg/aarch64/Makefile.target | 2 +-
 tests/tcg/arm/Makefile.target | 2 +-
 tests/tcg/cris/Makefile.target| 2 +-
 tests/tcg/hexagon/Makefile.target | 2 +-
 tests/tcg/i386/Makefile.target| 2 +-
 tests/tcg/minilib/Makefile.target | 2 +-
 tests/tcg/mips/Makefile.target| 2 +-
 9 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/tests/tcg/mips/hello-mips.c b/tests/tcg/mips/hello-mips.c
index 4e1cf501af..0ba5f1bf23 100644
--- a/tests/tcg/mips/hello-mips.c
+++ b/tests/tcg/mips/hello-mips.c
@@ -5,8 +5,8 @@
 * http://www.linux-mips.org/wiki/MIPSABIHistory
 * http://www.linux.com/howtos/Assembly-HOWTO/mips.shtml
 *
-* mipsel-linux-gcc -nostdlib -mno-abicalls -fno-PIC -mabi=32 \
-*  -O2 -static -o hello-mips hello-mips.c
+* mipsel-linux-gcc -nostdlib -mno-abicalls -fno-PIC -fno-stack-protector \
+   -mabi=32 -O2 -static -o hello-mips hello-mips.c
 *
 */
 #define __NR_SYSCALL_BASE  4000
diff --git a/tests/tcg/Makefile.target b/tests/tcg/Makefile.target
index 3d7837d3b8..c43020d990 100644
--- a/tests/tcg/Makefile.target
+++ b/tests/tcg/Makefile.target
@@ -123,7 +123,7 @@ else
 # For softmmu targets we include a different Makefile fragment as the
 # build options for bare programs are usually pretty different. They
 # are expected to provide their own build recipes.
-EXTRA_CFLAGS += -ffreestanding
+EXTRA_CFLAGS += -ffreestanding -fno-stack-protector
 -include $(SRC_PATH)/tests/tcg/minilib/Makefile.target
 -include $(SRC_PATH)/tests/tcg/multiarch/system/Makefile.softmmu-target
 -include $(SRC_PATH)/tests/tcg/$(TARGET_NAME)/Makefile.softmmu-target
diff --git a/tests/tcg/aarch64/Makefile.target 
b/tests/tcg/aarch64/Makefile.target
index 617f821613..55f8609897 100644
--- a/tests/tcg/aarch64/Makefile.target
+++ b/tests/tcg/aarch64/Makefile.target
@@ -49,7 +49,7 @@ endif
 # bti-1 tests the elf notes, so we require special compiler support.
 ifneq ($(CROSS_CC_HAS_ARMV8_BTI),)
 AARCH64_TESTS += bti-1 bti-3
-bti-1 bti-3: CFLAGS += -mbranch-protection=standard
+bti-1 bti-3: CFLAGS += -fno-stack-protector -mbranch-protection=standard
 bti-1 bti-3: LDFLAGS += -nostdlib
 endif
 # bti-2 tests PROT_BTI, so no special compiler support required.
diff --git a/tests/tcg/arm/Makefile.target b/tests/tcg/arm/Makefile.target
index 0038cef02c..3473f4619e 100644
--- a/tests/tcg/arm/Makefile.target
+++ b/tests/tcg/arm/Makefile.target
@@ -12,7 +12,7 @@ float_madds: CFLAGS+=-mfpu=neon-vfpv4
 
 # Basic Hello World
 ARM_TESTS = hello-arm
-hello-arm: CFLAGS+=-marm -ffreestanding
+hello-arm: CFLAGS+=-marm -ffreestanding -fno-stack-protector
 hello-arm: LDFLAGS+=-nostdlib
 
 # IWMXT floating point extensions
diff --git a/tests/tcg/cris/Makefile.target b/tests/tcg/cris/Makefile.target
index 43587d2769..713e2a5b6c 100644
--- a/tests/tcg/cris/Makefile.target
+++ b/tests/tcg/cris/Makefile.target
@@ -30,7 +30,7 @@ AS= $(CC) -x assembler-with-cpp
 LD  = $(CC)
 
 # we rely on GCC inline:ing the stuff we tell it to in many places here.
-CFLAGS  = -Winline -Wall -g -O2 -static
+CFLAGS  = -Winline -Wall -g -O2 -static -fno-stack-protector
 NOSTDFLAGS = -nostartfiles -nostdlib
 ASFLAGS += -mcpu=v10 -g -Wa,-I,$(SRC_PATH)/tests/tcg/cris/bare
 CRT_FILES = crt.o sys.o
diff --git a/tests/tcg/hexagon/Makefile.target 
b/tests/tcg/hexagon/Makefile.target
index 87ed2c90b9..f839b2c0d5 100644
--- a/tests/tcg/hexagon/Makefile.target
+++ b/tests/tcg/hexagon/Makefile.target
@@ -19,7 +19,7 @@
 EXTRA_RUNS =
 
 CFLAGS += -Wno-incompatible-pointer-types -Wno-undefined-internal
-CFLAGS += -fno-unroll-loops
+CFLAGS += -fno-unroll-loops -fno-stack-protector
 
 HEX_SRC=$(SRC_PATH)/tests/tcg/hexagon
 VPATH += $(HEX_SRC)
diff --git a/tests/tcg/i386/Makefile.target b/tests/tcg/i386/Makefile.target
index fdf757c6ce..3dec7c6c42 100644
--- a/tests/tcg/i386/Makefile.target
+++ b/tests/tcg/i386/Makefile.target
@@ -35,7 +35,7 @@ run-test-aes: QEMU_OPTS += -cpu max
 #
 # hello-i386 is a barebones app
 #
-hello-i386: CFLAGS+=-ffreestanding
+hello-i386: CFLAGS+=-ffreestanding -fno-stack-protector
 hello-i386: LDFLAGS+=-nostdlib
 
 # test-386 includes a couple of additional objects that need to be
diff --git a/tests/tcg/minilib/Makefile.target 
b/tests/tcg/minilib/Makefile.target
index c821d2806a..af0bf54be9 100644
--- a/tests/tcg/minilib/Makefile.target
+++ b/tests/tcg/minilib/Makefile.target
@@ -12,7 +12,7 @@ SYSTEM_MINILIB_SRC=$(SRC_PATH)/tests/tcg/minilib
 MINILIB_SRCS=$(wildcard $(SYSTEM_MINILIB_SRC)/*.c)
 MINILIB_OBJS=$(patsubst $(SYSTEM_MINILIB_SRC)/%.c, %.o, $(MINILIB_SRCS))
 
-MINILIB_CFLAGS+=-nostdlib -ggdb -O0
+MINILIB_CFLAGS+=-nostdlib -fno-stack-protector -g

Re: [PATCH] ui/dbus: fix clang compilation issue

2023-07-31 Thread Philippe Mathieu-Daudé

On 26/7/23 17:12, marcandre.lur...@redhat.com wrote:

From: Marc-André Lureau 

../ui/dbus-listener.c:236:9: error: expected expression
 Error *err = NULL;

See:
https://gitlab.com/qemu-project/qemu/-/issues/1782#note_1488517427

Signed-off-by: Marc-André Lureau 
---
  ui/dbus-listener.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)


Reviewed-by: Philippe Mathieu-Daudé 





[PATCH] tests/avocado: fix waiting for vm shutdown in replay_linux This patch fixes the race condition in waiting for shutdown of the replay linux test.

2023-07-31 Thread pavel . dovgalyuk
From: Pavel Dovgalyuk 

Signed-off-by: Pavel Dovgalyuk 
Suggested-by: John Snow 
---
 tests/avocado/replay_linux.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/avocado/replay_linux.py b/tests/avocado/replay_linux.py
index a76dd507fc..270ccc1eae 100644
--- a/tests/avocado/replay_linux.py
+++ b/tests/avocado/replay_linux.py
@@ -93,7 +93,7 @@ def launch_and_wait(self, record, args, shift):
 % os.path.getsize(replay_path))
 else:
 vm.event_wait('SHUTDOWN', self.timeout)
-vm.shutdown(True)
+vm.wait()
 logger.info('successfully fihished the replay')
 elapsed = time.time() - start_time
 logger.info('elapsed time %.2f sec' % elapsed)
-- 
2.34.1




Re: linux-user/armeb: Fix __kernel_cmpxchg() for armeb

2023-07-31 Thread Philippe Mathieu-Daudé

On 27/7/23 23:19, Helge Deller wrote:

Words are stored in big endian in the guest memory for armeb.

Commit 7f4f0d9ea870 ("linux-user/arm: Implement __kernel_cmpxchg with
host atomics") switched to use qatomic_cmpxchg() to swap a word with the
memory content, but missed to endianess-swap the oldval and newval
values when emulating an armeb CPU.

The bug can be verified with qemu >= v7.2 on any little-endian host,
when starting the armeb binary of the upx program, which just hangs
without this patch.

Signed-off-by: Helge Deller 
Cc: Richard Henderson 
Cc: Peter Maydell 
Cc: qemu-sta...@nongnu.org
Reported-by: "Markus F.X.J. Oberhumer" 
Reported-by: John Reiser 
Closes: https://github.com/upx/upx/issues/687


Reviewed-by: Philippe Mathieu-Daudé 





Re: linux-user/armeb: Fix __kernel_cmpxchg() for armeb

2023-07-31 Thread Michael Tokarev

31.07.2023 12:26, Philippe Mathieu-Daudé wrote:

On 27/7/23 23:19, Helge Deller wrote:

Words are stored in big endian in the guest memory for armeb.

..

Reviewed-by: Philippe Mathieu-Daudé 


There was a v3 of this patch already, fwiw.

/mjt



Re: [PATCH 0/5] linux-user: brk/mmap fixes

2023-07-31 Thread Michael Tokarev

31.07.2023 11:03, Akihiko Odaki wrote:

linux-user was failing on M2 MacBook Air. Digging into the details, I found
several bugs in brk and mmap so here are fixes.


There's another work in this area by Helge Deller, have you seen it?
("linux-user: Fix and optimize target memory layout", a v5 already).

FWIW,

/mjt



Re: [RFC PATCH 5/6] include/qemu/compiler: Fix problem with gcc_struct and Clang

2023-07-31 Thread Daniel P . Berrangé
On Mon, Jul 31, 2023 at 11:10:58AM +0200, Thomas Huth wrote:
> On 28/07/2023 17.13, Peter Maydell wrote:
> > On Fri, 28 Jul 2023 at 15:28, Thomas Huth  wrote:
> > > 
> > > Clang on Windows does not seem to know the "gcc_struct" attribute
> > > and emits a warning when we try to use it. Add an additional check
> > > here with __has_attribute() to avoid this problem.
> > > 
> > > Signed-off-by: Thomas Huth 
> > > ---
> > >   include/qemu/compiler.h | 2 +-
> > >   1 file changed, 1 insertion(+), 1 deletion(-)
> > > 
> > > diff --git a/include/qemu/compiler.h b/include/qemu/compiler.h
> > > index a309f90c76..5065b4447c 100644
> > > --- a/include/qemu/compiler.h
> > > +++ b/include/qemu/compiler.h
> > > @@ -22,7 +22,7 @@
> > >   #define QEMU_EXTERN_C extern
> > >   #endif
> > > 
> > > -#if defined(_WIN32) && (defined(__x86_64__) || defined(__i386__))
> > > +#if defined(_WIN32) && (defined(__x86_64__) || defined(__i386__)) && 
> > > !defined(__clang__)
> > >   # define QEMU_PACKED __attribute__((gcc_struct, packed))
> > >   #else
> > >   # define QEMU_PACKED __attribute__((packed))
> > 
> > I'm not sure about this. The idea of QEMU_PACKED is that
> > it's supposed to give you the same struct layout
> > regardless of compiler. With this change it no longer
> > does that, and there's no compile-time guard against
> > using something in a packed struct that has a different
> > layout on Windows clang vs everything else.
> > 
> > If it was OK to use plain attribute(packed) we wouldn't
> > need the ifdef at all because we could use it on GCC too.
> 
> I still haven't quite grasped whether this attribute just affects structures
> with bitfields in it, or whether it could also affect other structures
> without bitfields.
> 
> Looking at https://gcc.gnu.org/onlinedocs/gcc/x86-Type-Attributes.html and
> https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html#index-mms-bitfields , it
> sounds like it only affects structs with bitfields, unless you specify an
> "aligned(x)" attribute, too?
> 
> Anyway, using bitfields in structs for exchanging data with the guest is
> just way too error-prone, as you can see in the discussion about that
> VTD_IR_TableEntry in my other patch. We should maybe advise against
> bitfields in our coding style and point people to registerfields.h instead
> for new code? ... so that we use QEMU_PACKED mainly for legacy code. Would
> it then be OK for you, Peter, to go on with this approach?
> 
> Or do you see another possibility how we could fix that timeout problem in
> the 64-bit MSYS2 job? Still switching to clang, but compiling with
> --extra-cflags="-Wno-unknown-attributes" maybe?

I was surprised to see that we're not using ccache in gitlab CI. It wouldn't
help the from-clean compile time, but thereafter it ought to help. I'm doing
some tests with that to see the impact.

Another option might be to try precompiled headers, which meson supports
quite nicely / transparently. Might especially help on Windows where the
entire world is declared in windows.h


With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




Re: [PATCH] Fix some typos in documentation and comments

2023-07-31 Thread Peter Maydell
On Sun, 30 Jul 2023 at 19:55, Stefan Weil via  wrote:
>
> Signed-off-by: Stefan Weil 
> ---

Reviewed-by: Peter Maydell 

thanks
-- PMM



[PATCH] target/riscv: Use accelerated helper for AES64KS1I

2023-07-31 Thread Ard Biesheuvel
Use the accelerated SubBytes/ShiftRows/AddRoundKey AES helper to
implement the first half of the key schedule derivation. This does not
actually involve shifting rows, so clone the same uint32_t 4 times into
the AES vector to counter that.

Cc: Richard Henderson 
Cc: Philippe Mathieu-Daudé 
Signed-off-by: Ard Biesheuvel 
---
 target/riscv/crypto_helper.c | 17 +
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/target/riscv/crypto_helper.c b/target/riscv/crypto_helper.c
index 4d65945429c6dcc4..257c5c4863fb160f 100644
--- a/target/riscv/crypto_helper.c
+++ b/target/riscv/crypto_helper.c
@@ -148,24 +148,17 @@ target_ulong HELPER(aes64ks1i)(target_ulong rs1, 
target_ulong rnum)
 
 uint8_t enc_rnum = rnum;
 uint32_t temp = (RS1 >> 32) & 0x;
-uint8_t rcon_ = 0;
-target_ulong result;
+AESState t, rc = {};
 
 if (enc_rnum != 0xA) {
 temp = ror32(temp, 8); /* Rotate right by 8 */
-rcon_ = round_consts[enc_rnum];
+rc.w[0] = rc.w[1] = rc.w[2] = rc.w[3] = round_consts[enc_rnum];
 }
 
-temp = ((uint32_t)AES_sbox[(temp >> 24) & 0xFF] << 24) |
-   ((uint32_t)AES_sbox[(temp >> 16) & 0xFF] << 16) |
-   ((uint32_t)AES_sbox[(temp >> 8) & 0xFF] << 8) |
-   ((uint32_t)AES_sbox[(temp >> 0) & 0xFF] << 0);
+t.w[0] = t.w[1] = t.w[2] = t.w[3] = temp;
+aesenc_SB_SR_AK(&t, &t, &rc, false);
 
-temp ^= rcon_;
-
-result = ((uint64_t)temp << 32) | temp;
-
-return result;
+return t.d[0];
 }
 
 target_ulong HELPER(aes64im)(target_ulong rs1)
-- 
2.39.2




Re: [PATCH] vdpa: set old virtio status at cvq isolation probing end

2023-07-31 Thread Eugenio Perez Martin
On Mon, Jul 31, 2023 at 10:42 AM Jason Wang  wrote:
>
> On Mon, Jul 31, 2023 at 4:05 PM Eugenio Perez Martin
>  wrote:
> >
> > On Mon, Jul 31, 2023 at 8:36 AM Jason Wang  wrote:
> > >
> > > On Wed, Jul 26, 2023 at 2:27 PM Eugenio Perez Martin
> > >  wrote:
> > > >
> > > > On Wed, Jul 26, 2023 at 4:07 AM Jason Wang  wrote:
> > > > >
> > > > > On Wed, Jul 26, 2023 at 2:21 AM Eugenio Pérez  
> > > > > wrote:
> > > > > >
> > > > > > The device already has a virtio status set by vhost_vdpa_init by the
> > > > > > time vhost_vdpa_probe_cvq_isolation is called. vhost_vdpa_init set
> > > > > > S_ACKNOWLEDGE and S_DRIVER, so it is invalid to just reset it.
> > > > > >
> > > > > > It is invalid to start the device after it, but all devices seems 
> > > > > > to be
> > > > > > fine with it.  Fixing qemu so it follows virtio start procedure.
> > > > > >
> > > > > > Fixes: 152128d64697 ("vdpa: move CVQ isolation check to 
> > > > > > net_init_vhost_vdpa")
> > > > > > Reported-by: Dragos Tatulea 
> > > > > > Signed-off-by: Eugenio Pérez 
> > > > > > ---
> > > > > >  net/vhost-vdpa.c | 2 ++
> > > > > >  1 file changed, 2 insertions(+)
> > > > > >
> > > > > > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > > > > > index 9795306742..d7e2b714b4 100644
> > > > > > --- a/net/vhost-vdpa.c
> > > > > > +++ b/net/vhost-vdpa.c
> > > > > > @@ -1333,6 +1333,8 @@ static int vhost_vdpa_probe_cvq_isolation(int 
> > > > > > device_fd, uint64_t features,
> > > > > >  out:
> > > > > >  status = 0;
> > > > > >  ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status);
> > > > > > +status = VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER;
> > > > > > +ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status);
> > > > >
> > > > > So if we fail after FEATURES_OK, this basically clears that bit. Spec
> > > > > doesn't say it can or not, I wonder if a reset is better?
> > > > >
> > > >
> > > > I don't follow this, the reset is just above the added code, isn't it?
> > >
> > > I meant for error path:
> > >
> > > E.g:
> > > uint8_t status = VIRTIO_CONFIG_S_ACKNOWLEDGE |
> > >  VIRTIO_CONFIG_S_DRIVER |
> > >  VIRTIO_CONFIG_S_FEATURES_OK;
> > > ...
> > > r = ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status);
> > > 
> > > if (cvq_group != -ENOTSUP) {
> > > r = cvq_group;
> > > goto out;
> > > }
> > >
> > > out:
> > > status = VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER;
> > > ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status);
> > >
> > > We're basically clearing FEATURES_OK?
> > >
> >
> > Yes, it is the state that previous functions (vhost_vdpa_init) set. We
> > need to leave it that way, either if the backend supports cvq
> > isolation or not, or in the case of an error. Not doing that way makes
> > vhost_dev_start (and vhost_vdpa_set_features) set the features before
> > setting VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER.
> > Otherwise, the guest can (and do) access to config space before
> > _S_ACKNOWLEDGE | _S_DRIVER.
>
> I'm not sure if it is supported by the spec or not (I meant clearing
> the FEATURES_OK). Or maybe we need a reset here?
>

Sorry, I'm still missing it :). The reset just above in all fail
paths. They go to "out" label, and the first ioctl reset the device,
the second set the VIRTIO_CONFIG_S_ACKNOWLEDGE |
VIRTIO_CONFIG_S_DRIVER.

> Thanks
>
> >
> >
> > > >
> > > > > Btw, spec requires a read of status after setting FEATURES_OK, this
> > > > > seems to be missed in the current code.
> > > > >
> > > >
> > > > I'm ok with that, but this patch does not touch that part.
> > > >
> > > > To fix this properly we should:
> > > > - Expose vhost_vdpa_set_dev_features_fd as we did in previous versions
> > > > of the series that added vhost_vdpa_probe_cvq_isolation [1].
> > > > - Get status after vhost_vdpa_add_status, so both vhost start code and
> > > > this follows the standard properly.
> > > >
> > > > Is it ok to do these on top of this patch?
> > >
> > > Fine.
> > >
> > > Thanks
> > >
> > > >
> > > > Thanks!
> > > >
> > > > [1] 
> > > > https://lore.kernel.org/qemu-devel/20230509154435.1410162-4-epere...@redhat.com/
> > > >
> > > >
> > > > > Thanks
> > > > >
> > > > > >  return r;
> > > > > >  }
> > > > > >
> > > > > > --
> > > > > > 2.39.3
> > > > > >
> > > > >
> > > >
> > >
> >
>




Re: [PATCH v3] linux-user/armeb: Fix __kernel_cmpxchg() for armeb

2023-07-31 Thread Philippe Mathieu-Daudé

On 28/7/23 21:23, Helge Deller wrote:

Commit 7f4f0d9ea870 ("linux-user/arm: Implement __kernel_cmpxchg with host
atomics") switched to use qatomic_cmpxchg() to swap a word with the memory
content, but missed to endianess-swap the oldval and newval values when
emulating an armeb CPU, which expects words to be stored in big endian in
the guest memory.

The bug can be verified with qemu >= v7.0 on any little-endian host, when
starting the armeb binary of the upx program, which just hangs without
this patch.

Signed-off-by: Helge Deller 
Cc: Richard Henderson 
Cc: Peter Maydell 
Cc: qemu-sta...@nongnu.org
Reported-by: "Markus F.X.J. Oberhumer" 
Reported-by: John Reiser 
Closes: https://github.com/upx/upx/issues/687

--
v3:
- use tswap64() in arm_kernel_cmpxchg64_helper() [review from Richard]
- mention that bug exists since qemu v7.0 [info from Markus]
v2:
- add tswap32() in arm_kernel_cmpxchg64_helper() [bisected by John]


Reviewed-by: Philippe Mathieu-Daudé 




Re: [PATCH v3 1/3] qapi: Moved architecture agnostic data types to `machine`

2023-07-31 Thread Philippe Mathieu-Daudé

On 30/7/23 08:40, Dinah Baum wrote:

Signed-off-by: Dinah Baum 
---
  qapi/machine-target.json | 78 +---
  qapi/machine.json| 77 +++
  2 files changed, 78 insertions(+), 77 deletions(-)


Reviewed-by: Philippe Mathieu-Daudé 




Re: [PATCH] target/hppa: Move iaoq registers and thus reduce generated code size

2023-07-31 Thread Philippe Mathieu-Daudé

On 30/7/23 18:30, Helge Deller wrote:

On hppa the Instruction Address Offset Queue (IAOQ) registers specifies
the next to-be-executed instructions addresses. Each generated TB writes those
registers at least once, so those registers are used heavily in generated
code.

Looking at the generated assembly, for a x86-64 host this code
to write the address $0x7ffe826f into iaoq_f is generated:
0x7f73e8000184:  c7 85 d4 01 00 00 6f 82  movl $0x7ffe826f, 0x1d4(%rbp)
0x7f73e800018c:  fe 7f
0x7f73e800018e:  c7 85 d8 01 00 00 73 82  movl $0x7ffe8273, 0x1d8(%rbp)
0x7f73e8000196:  fe 7f

With the trivial change, by moving the variables iaoq_f and iaoq_b to
the top of struct CPUArchState, the offset to %rbp is reduced (from
0x1d4 to 0), which allows the x86-64 tcg to generate 3 bytes less of
generated code per move instruction:
0x7fc1e800018c:  c7 45 00 6f 82 fe 7f movl $0x7ffe826f, (%rbp)
0x7fc1e8000193:  c7 45 04 73 82 fe 7f movl $0x7ffe8273, 4(%rbp)

Overall this is a reduction of generated code (not a reduction of
number of instructions).
A test run with checks the generated code size by running "/bin/ls"
with qemu-user shows that the code size shrinks from 1616767 to 1569273
bytes, which is ~97% of the former size.

Signed-off-by: Helge Deller 


Reviewed-by: Philippe Mathieu-Daudé 




Re: [PATCH] Fix some typos in documentation and comments

2023-07-31 Thread Philippe Mathieu-Daudé

On 30/7/23 20:03, Stefan Weil wrote:

Signed-off-by: Stefan Weil 
---

This patch was triggered by a spelling check for the generated
QEMU documentation using codespell. It does not try to fix all
typos which still exist in the QEMU code, but has a focus on
those required to fix the documentation. Nevertheless some code
comments with the same typos were fixed, too.

I think the patch is trivial, so maybe it can still be included
in the upcoming release, but that's not strictly necessary.

Stefan

  docs/about/deprecated.rst| 2 +-
  docs/devel/qom.rst   | 2 +-
  docs/system/devices/nvme.rst | 2 +-
  hw/core/loader.c | 4 ++--
  include/exec/memory.h| 2 +-
  ui/vnc-enc-tight.c   | 2 +-
  6 files changed, 7 insertions(+), 7 deletions(-)


Reviewed-by: Philippe Mathieu-Daudé 




Re: [RFC PATCH 5/6] include/qemu/compiler: Fix problem with gcc_struct and Clang

2023-07-31 Thread Peter Maydell
On Mon, 31 Jul 2023 at 10:11, Thomas Huth  wrote:
> Or do you see another possibility how we could fix that timeout problem in
> the 64-bit MSYS2 job? Still switching to clang, but compiling with
> --extra-cflags="-Wno-unknown-attributes" maybe?

Is there any way we can separate out the "take 25 minutes to
install a pile of packages" part from the "actually compile and
test QEMU" part ?

thanks
-- PMM



Re: [PATCH v5 0/3] linux-user: Fix and optimize target memory layout

2023-07-31 Thread Joel Stanley
On Fri, 28 Jul 2023 at 18:58, Helge Deller  wrote:
>
> While trying to fix a bug which prevents running a static
> armhf binary with linux-user, I noticed a whole bunch of
> memory layout issues on various platforms. Most noteably
> the free heap space was very limited in the current setup.
> A large heap is important for example, if you want to
> use qemu-user for building Linux packages where gcc requires
> lots of space (e.g. using qemu-user as buildd for debian
> packages).
>
> Those findings led to this patch series, which
> - fixes qemu-arm to run static armhf binaries

Applying this on top of master and trying to run a simple armhf binary
on a ppc64le host fails:

qemu$ ./build/qemu-arm -d guest_errors,page,strace ~/hello-armhf
host mmap_min_addr=0x1
pgb_find_hole: base @ 1 for 4294967296 bytes
pgb_static: base @ 1 for 4294967295 bytes
pgb_reserved_va: base @ 0x1 for 4294967296 bytes
Locating guest address space @ 0x1
page layout changed following mmap
startend  size prot
0001-0006 0005 ---
0006-00066000 6000 ---
- 0001 r-x
page layout changed following mmap
startend  size prot
0001-0006 0005 r-x
0006-00066000 6000 ---
- 0001 r-x
page layout changed following mmap
startend  size prot
0001-0006 0005 r-x
0006-00064000 4000 rw-
00064000-00066000 2000 ---
- 0001 r-x
page layout changed following mmap
startend  size prot
0001-0006 0005 r-x
0006-00064000 4000 rw-
00064000-00066000 2000 rw-
f300-f381 0081 rw-
- 0001 r-x
page layout changed following mmap
startend  size prot
0001-0006 0005 r-x
0006-00064000 4000 rw-
00064000-00066000 2000 rw-
f300-f301 0001 ---
f301-f3811000 00801000 rw-
- 0001 r-x
guest_base  0x1
page layout changed following binary load
startend  size prot
0001-0006 0005 r-x
0006-00064000 4000 rw-
00064000-00066000 2000 rw-
f300-f301 0001 ---
f301-f381 0080 rw-
f381-f3811000 1000 r-x
- 0001 r-x
start_brk   0x
end_code0x0005f9c8
start_code  0x0001
start_data  0x00060414
end_data0x0006327c
start_stack 0xf380f420
brk 0x00066000
entry   0x00010341
argv_start  0xf380f424
env_start   0xf380f42c
auxv_start  0xf380f4a8
95718 brk(NULL) = 0x00066000
95718 brk(0x00066874) = 0x00066874
95718 set_tid_address(0x66068) = 95718
95718 set_robust_list(0x6606c,12) = -1 errno=38 (Function not implemented)
95718 Unknown syscall 398
95718 ugetrlimit(3,-209652764,328608,404128,401408,1) = 0
95718 readlinkat(AT_FDCWD,"/proc/self/exe",0xf380e390,4096) = 22
95718 getrandom(0x65940,4,1) = 4
95718 brk(NULL) = 0x00066874
95718 brk(0x00087874)page layout changed following mmap
startend  size prot
0001-0006 0005 r-x
0006-00064000 4000 rw-
00064000-00066000 2000 rw-
0007-0009 0002 rw-
f300-f301 0001 ---
f301-f381 0080 rw-
f381-f3811000 1000 r-x
- 0001 r-x
 = 0x00087874
95718 brk(0x00088000) = 0x00088000
95718 mprotect(0x0006,8192,PROT_READ) = 0
95718 
statx(1,"",AT_EMPTY_PATH|AT_NO_AUTOMOUNT|AT_STATX_SYNC_AS_STAT,STATX_BASIC_STATS,0xf380f078)
= 0
95718 write(1,0x66b08,14) = -1 errno=14 (Bad address)
95718 exit_group(0)

A working arm binary by comparison:

qemu$ ./build/qemu-arm -d guest_errors,page,strace ~/hello
host mmap_min_addr=0x1
pgb_find_hole: base @ 1 for 4294967296 bytes
pgb_static: base @ 1 for 4294967295 bytes
pgb_reserved_va: base @ 0x1 for 4294967296 bytes
Locating guest address space @ 0x1
page layout changed following mmap
startend  size prot
0001-0009 0008 ---
0009-0009b000 b000 ---
- 0001 r-x
page layout changed following mmap
startend  size prot
0001-0009 0008 r-x
0009-0009b000 b000 ---
- 0001 r-x
page layout changed following mmap
startend  size prot
0001-0009 0008 r-x
0009-000a 0001 rw-
- 0001 r-x
page layout changed following mmap
startend  size prot
0001-0009 0008 r-x
0009-000a 0001 rw-
f300-f381 0081 rw-
- 0001 r-x
page layout changed following mmap
startend  size prot
0001-0009 0008 r-x
0009-000a 0001 rw-
f300-f301 0001 ---
f301-f3811000 00801000 rw-
- 0001 r-x
guest_base  0x1
page layout changed following binary load
startend  size prot
0001-0009 0008 r-x
0009-000a 0001 rw-
f300-f301 0001 ---
f301-f381 0080 rw-
f381-f3811000 1000 r-x
- 0001 r-x
start_br

Re: [PATCH 0/7] Enable vdpa net migration with features depending on CVQ

2023-07-31 Thread Eugenio Perez Martin
On Mon, Jul 31, 2023 at 8:41 AM Jason Wang  wrote:
>
> On Sat, Jul 29, 2023 at 1:20 AM Eugenio Pérez  wrote:
> >
> > At this moment the migration of net features that depends on CVQ is not
> > possible, as there is no reliable way to restore the device state like mac
> > address, number of enabled queues, etc to the destination.  This is mainly
> > caused because the device must only read CVQ, and process all the commands
> > before resuming the dataplane.
> >
> > This series lift that requirement, sending the VHOST_VDPA_SET_VRING_ENABLE
> > ioctl for dataplane vqs only after the device has processed all commands.
>
> I think it's better to explain (that is what I don't understand) why
> we can not simply reorder vhost_net_start_one() in vhost_net_start()?
>
> for (i = 0; i < nvhosts; i++) {
> if (i < data_queue_pairs) {
> peer = qemu_get_peer(ncs, i);
> } else {
> peer = qemu_get_peer(ncs, n->max_queue_pairs);
> }
>
> if (peer->vring_enable) {
> /* restore vring enable state */
> r = vhost_set_vring_enable(peer, peer->vring_enable);
>
> if (r < 0) {
> goto err_start;
> }
> }
>
> =>  r = vhost_net_start_one(get_vhost_net(peer), dev);
> if (r < 0) {
> goto err_start;
> }
> }
>
> Can we simply start cvq first here?
>

Well the current order is:
* set dev features (conditioned by
* Configure all vq addresses
* Configure all vq size
...
* Enable cvq
* DRIVER_OK
* Enable all the rest of the queues.

If we just start CVQ first, we need to modify vhost_vdpa_set_features
as minimum. A lot of code that depends on vdev->vq_index{,_end} may be
affected.

Also, I'm not sure if all the devices will support configure address,
vq size, etc after DRIVER_OK.

> Thanks
>
> > ---
> > From FRC:
> > * Enable vqs early in case CVQ cannot be shadowed.
> >
> > Eugenio Pérez (7):
> >   vdpa: export vhost_vdpa_set_vring_ready
> >   vdpa: add should_enable op
> >   vdpa: use virtio_ops->should_enable at vhost_vdpa_set_vrings_ready
> >   vdpa: add stub vhost_vdpa_should_enable
> >   vdpa: delay enable of data vqs
> >   vdpa: enable cvq svq if data vq are shadowed
> >   vdpa: remove net cvq migration blocker
> >
> >  include/hw/virtio/vhost-vdpa.h |  9 +
> >  hw/virtio/vhost-vdpa.c | 33 
> >  net/vhost-vdpa.c   | 69 ++
> >  hw/virtio/trace-events |  2 +-
> >  4 files changed, 89 insertions(+), 24 deletions(-)
> >
> > --
> > 2.39.3
> >
> >
>




Re: [PATCH 0/5] linux-user: brk/mmap fixes

2023-07-31 Thread Joel Stanley
On Mon, 31 Jul 2023 at 09:31, Michael Tokarev  wrote:
>
> 31.07.2023 11:03, Akihiko Odaki wrote:
> > linux-user was failing on M2 MacBook Air. Digging into the details, I found
> > several bugs in brk and mmap so here are fixes.
>
> There's another work in this area by Helge Deller, have you seen it?
> ("linux-user: Fix and optimize target memory layout", a v5 already).

Applying this series fixes the qemu-arm running the static armhf
binary on my ppc64le host that I reported here[1].

Tested-by: Joel Stanley 

The changes conflict with Helge's patches, so it would be good to work
out which of your changes should be combined with his.

Cheers,

Joel

[1] 
https://lore.kernel.org/qemu-devel/CACPK8XeyqcEDyyL3Jw2WYWs_gGdtTCf2=ly04cmgkshsmdj...@mail.gmail.com/



Re: [PATCH] elf2dmp: Don't abandon when Prcb is set to 0

2023-07-31 Thread Peter Maydell
On Sun, 30 Jul 2023 at 20:52, Viktor Prutyanov
 wrote:
>
>
> >> On 2023/06/12 12:42, Viktor Prutyanov wrote:
> >>
>  Prcb may be set to 0 for some CPUs if the dump was taken before they
>  start. The dump may still contain valuable information for started CPUs
>  so don't abandon conversion in such a case.
> 
>  Signed-off-by: Akihiko Odaki 
>  ---
>  contrib/elf2dmp/main.c | 5 +
>  1 file changed, 5 insertions(+)
> 
>  diff --git a/contrib/elf2dmp/main.c b/contrib/elf2dmp/main.c
>  index d77b8f98f7..91c58e4424 100644
>  --- a/contrib/elf2dmp/main.c
>  +++ b/contrib/elf2dmp/main.c
>  @@ -312,6 +312,11 @@ static int fill_context(KDDEBUGGER_DATA64 *kdbg,
>  return 1;
>  }
> 
>  + if (!Prcb) {
>  + eprintf("Context for CPU #%d is missing\n", i);
>  + continue;
>  + }
>  +
>  if (va_space_rw(vs, Prcb + kdbg->OffsetPrcbContext,
>  &Context, sizeof(Context), 0)) {
>  eprintf("Failed to read CPU #%d ContextFrame location\n", i);
> 
>  --
>  2.40.1
> >>>
> >>> Hi Akihiko,
> >>>
> >>> How this fix can be tested?
> >>
> >> It is a bit difficult to test it as you need to interrupt the very early
> >> stage of boot. I applied the following change to TCG so that it stops
> >> immediately after the first processor configures Prcb.
> >>
> >> diff --git a/target/i386/tcg/sysemu/misc_helper.c
> >> b/target/i386/tcg/sysemu/misc_helper.c
> >> index e1528b7f80..f68eba9cac 100644
> >> --- a/target/i386/tcg/sysemu/misc_helper.c
> >> +++ b/target/i386/tcg/sysemu/misc_helper.c
> >> @@ -25,6 +25,9 @@
> >> #include "exec/address-spaces.h"
> >> #include "exec/exec-all.h"
> >> #include "tcg/helper-tcg.h"
> >> +#include "exec/gdbstub.h"
> >> +#include "hw/core/cpu.h"
> >> +#include "sysemu/runstate.h"
> >>
> >> void helper_outb(CPUX86State *env, uint32_t port, uint32_t data)
> >> {
> >> @@ -217,7 +220,10 @@ void helper_wrmsr(CPUX86State *env)
> >> env->segs[R_FS].base = val;
> >> break;
> >> case MSR_GSBASE:
> >> + printf("%s: %" PRIx64 "\n", __func__, val);
> >> env->segs[R_GS].base = val;
> >> + gdb_set_stop_cpu(current_cpu);
> >> + vm_stop(RUN_STATE_PAUSED);
> >> break;
> >> case MSR_KERNELGSBASE:
> >> env->kernelgsbase = val;
> >>
> >>> NumberProcessors field is still set to qemu_elf.state_nr, how does WinDbg 
> >>> react to this?
> >>
> >> If Prcb for processor 1 is missing, WinDbg outputs: KiProcessorBlock[1]
> >> is null.
> >> You can still debug the started processors with no issue.
> >>
> >> Regards,
> >> Akihiko Odaki
> >>
> >>> Viktor
> >
> > Reviewed-by: Viktor Prutyanov 
>
> Hi Peter,
>
> Could you please put Akihiko's patch into your branch?

Sure. I've applied this to target-arm.next and should
be doing a pullreq for it later today or tomorrow.

-- PMM



Re: [PATCH 0/5] linux-user: brk/mmap fixes

2023-07-31 Thread Akihiko Odaki

On 2023/07/31 19:17, Joel Stanley wrote:

On Mon, 31 Jul 2023 at 09:31, Michael Tokarev  wrote:


31.07.2023 11:03, Akihiko Odaki wrote:

linux-user was failing on M2 MacBook Air. Digging into the details, I found
several bugs in brk and mmap so here are fixes.


There's another work in this area by Helge Deller, have you seen it?
("linux-user: Fix and optimize target memory layout", a v5 already).


Applying this series fixes the qemu-arm running the static armhf
binary on my ppc64le host that I reported here[1].

Tested-by: Joel Stanley 


Thanks for testing.



The changes conflict with Helge's patches, so it would be good to work
out which of your changes should be combined with his.


I suggest Helge to rebase his change to my series. The below is some 
detailed explanation:


It is almost orthogonal to my series, but patch 2 will conflict with my 
series since it changes how the initial brk is calculated.


Unfortunately I think patch 2 has a bug. Without my patch, do_brk() 
assumes the heap is aligned with the host page size, but the patch does 
not consider the case that the host and target page sizes are different.


My patch makes the heap aligned with the target page size so it will fix 
the bug.


Regards,
Akihiko Odaki



Cheers,

Joel

[1] 
https://lore.kernel.org/qemu-devel/CACPK8XeyqcEDyyL3Jw2WYWs_gGdtTCf2=ly04cmgkshsmdj...@mail.gmail.com/




[PATCH for-8.2 v2 1/6] vfio/migration: Move from STOP_COPY to STOP in vfio_save_cleanup()

2023-07-31 Thread Avihai Horon
Changing the device state from STOP_COPY to STOP can take time as the
device may need to free resources and do other operations as part of the
transition. Currently, this is done in vfio_save_complete_precopy() and
therefore it is counted in the migration downtime.

To avoid this, change the device state from STOP_COPY to STOP in
vfio_save_cleanup(), which is called after migration has completed and
thus is not part of migration downtime.

Signed-off-by: Avihai Horon 
---
 hw/vfio/migration.c | 19 +--
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 2674f4bc47..8acd182a8b 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -383,6 +383,19 @@ static void vfio_save_cleanup(void *opaque)
 VFIODevice *vbasedev = opaque;
 VFIOMigration *migration = vbasedev->migration;
 
+/*
+ * Changing device state from STOP_COPY to STOP can take time. Do it here,
+ * after migration has completed, so it won't increase downtime.
+ */
+if (migration->device_state == VFIO_DEVICE_STATE_STOP_COPY) {
+/*
+ * If setting the device in STOP state fails, the device should be
+ * reset. To do so, use ERROR state as a recover state.
+ */
+vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP,
+ VFIO_DEVICE_STATE_ERROR);
+}
+
 g_free(migration->data_buffer);
 migration->data_buffer = NULL;
 migration->precopy_init_size = 0;
@@ -508,12 +521,6 @@ static int vfio_save_complete_precopy(QEMUFile *f, void 
*opaque)
 return ret;
 }
 
-/*
- * If setting the device in STOP state fails, the device should be reset.
- * To do so, use ERROR state as a recover state.
- */
-ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP,
-   VFIO_DEVICE_STATE_ERROR);
 trace_vfio_save_complete_precopy(vbasedev->name, ret);
 
 return ret;
-- 
2.26.3




Re: [PATCH v2 4/4] vhost-user-scsi: support reconnect to backend

2023-07-31 Thread Li Feng


> 2023年7月31日 06:14,Raphael Norwitz  写道:
> 
> I don’t think we should be changing any vhost-scsi-common code here. I’d 
> rather implement wrappers around vhost_user_scsi_start/stop() around 
> vhost_user_scsi_common_start/stop() and check started_vu there.
> 
> Otherwise I think this is looking good. 
> 
> Glad to see you caught the vhost_user_scsi_handle_ouptut and implemented it 
> like vhost-user-blk. Can it go in a separate change?

I will fix it in v3.

> 
>> On Jul 25, 2023, at 6:42 AM, Li Feng  wrote:
>> 
>> If the backend crashes and restarts, the device is broken.
>> This patch adds reconnect for vhost-user-scsi.
>> 
>> Tested with spdk backend.
>> 
>> Signed-off-by: Li Feng 
>> ---
>> hw/scsi/vhost-scsi-common.c   |   6 +
>> hw/scsi/vhost-user-scsi.c | 220 +++---
>> include/hw/virtio/vhost-scsi-common.h |   3 +
>> include/hw/virtio/vhost-user-scsi.h   |   3 +
>> 4 files changed, 211 insertions(+), 21 deletions(-)
>> 
>> diff --git a/hw/scsi/vhost-scsi-common.c b/hw/scsi/vhost-scsi-common.c
>> index 664adb15b4..3fde477eee 100644
>> --- a/hw/scsi/vhost-scsi-common.c
>> +++ b/hw/scsi/vhost-scsi-common.c
>> @@ -81,6 +81,7 @@ int vhost_scsi_common_start(VHostSCSICommon *vsc)
>>error_report("Error start vhost dev");
>>goto err_guest_notifiers;
>>}
>> +vsc->started_vu = true;
>> 
>>/* guest_notifier_mask/pending not used yet, so just unmask
>> * everything here.  virtio-pci will do the right thing by
>> @@ -106,6 +107,11 @@ void vhost_scsi_common_stop(VHostSCSICommon *vsc)
>>VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
>>int ret = 0;
>> 
>> +if (!vsc->started_vu) {
>> +return;
>> +}
>> +vsc->started_vu = false;
>> +
>>vhost_dev_stop(&vsc->dev, vdev, true);
>> 
>>if (k->set_guest_notifiers) {
>> diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
>> index ee99b19e7a..bd32dcf999 100644
>> --- a/hw/scsi/vhost-user-scsi.c
>> +++ b/hw/scsi/vhost-user-scsi.c
>> @@ -46,20 +46,25 @@ enum VhostUserProtocolFeature {
>> static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status)
>> {
>>VHostUserSCSI *s = (VHostUserSCSI *)vdev;
>> +DeviceState *dev = &s->parent_obj.parent_obj.parent_obj.parent_obj;
>>VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
>> -bool start = (status & VIRTIO_CONFIG_S_DRIVER_OK) && vdev->vm_running;
>> +VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev);
>> +bool should_start = virtio_device_should_start(vdev, status);
>> +int ret;
>> 
>> -if (vhost_dev_is_started(&vsc->dev) == start) {
>> +if (!s->connected) {
>>return;
>>}
>> 
>> -if (start) {
>> -int ret;
>> +if (vhost_dev_is_started(&vsc->dev) == should_start) {
>> +return;
>> +}
>> 
>> +if (should_start) {
>>ret = vhost_scsi_common_start(vsc);
>>if (ret < 0) {
>>error_report("unable to start vhost-user-scsi: %s", 
>> strerror(-ret));
>> -exit(1);
>> +qemu_chr_fe_disconnect(&vs->conf.chardev);
>>}
>>} else {
>>vhost_scsi_common_stop(vsc);
>> @@ -85,8 +90,160 @@ static void vhost_user_scsi_reset(VirtIODevice *vdev)
>>}
>> }
>> 
>> -static void vhost_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq)
>> +static void vhost_user_scsi_handle_output(VirtIODevice *vdev, VirtQueue *vq)
>> {
>> +VHostUserSCSI *s = (VHostUserSCSI *)vdev;
>> +DeviceState *dev = &s->parent_obj.parent_obj.parent_obj.parent_obj;
>> +VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
>> +VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev);
>> +
>> +Error *local_err = NULL;
>> +int i, ret;
>> +
>> +if (!vdev->start_on_kick) {
>> +return;
>> +}
>> +
>> +if (!s->connected) {
>> +return;
>> +}
>> +
>> +if (vhost_dev_is_started(&vsc->dev)) {
>> +return;
>> +}
>> +
>> +/*
>> + * Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start
>> + * vhost here instead of waiting for .set_status().
>> + */
>> +ret = vhost_scsi_common_start(vsc);
>> +if (ret < 0) {
>> +error_reportf_err(local_err, "vhost-user-blk: vhost start failed: 
>> ");
>> +qemu_chr_fe_disconnect(&vs->conf.chardev);
>> +return;
>> +}
>> +
>> +/* Kick right away to begin processing requests already in vring */
>> +for (i = 0; i < vsc->dev.nvqs; i++) {
>> +VirtQueue *kick_vq = virtio_get_queue(vdev, i);
>> +
>> +if (!virtio_queue_get_desc_addr(vdev, i)) {
>> +continue;
>> +}
>> +event_notifier_set(virtio_queue_get_host_notifier(kick_vq));
>> +}
>> +}
>> +
>> +static int vhost_user_scsi_connect(DeviceState *dev, Error **errp)
>> +{
>> +VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>> +VHostUserSCSI *s = VHOST_USER_SCSI(vdev);
>> +VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
>> +VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev);
>> +int ret 

[PATCH for-8.2 v2 5/6] vfio/migration: Add P2P support for VFIO migration

2023-07-31 Thread Avihai Horon
VFIO migration uAPI defines an optional intermediate P2P quiescent
state. While in the P2P quiescent state, P2P DMA transactions cannot be
initiated by the device, but the device can respond to incoming ones.
Additionally, all outstanding P2P transactions are guaranteed to have
been completed by the time the device enters this state.

The purpose of this state is to support migration of multiple devices
that might do P2P transactions between themselves.

Add support for P2P migration by transitioning all the devices to the
P2P quiescent state before stopping or starting the devices. Use the new
VMChangeStateHandler prepare_cb to achieve that behavior.

This will allow migration of multiple VFIO devices if all of them
support P2P migration.

Signed-off-by: Avihai Horon 
---
 docs/devel/vfio-migration.rst | 93 +--
 hw/vfio/common.c  |  6 ++-
 hw/vfio/migration.c   | 47 --
 hw/vfio/trace-events  |  1 +
 4 files changed, 106 insertions(+), 41 deletions(-)

diff --git a/docs/devel/vfio-migration.rst b/docs/devel/vfio-migration.rst
index b433cb5bb2..605fe60e96 100644
--- a/docs/devel/vfio-migration.rst
+++ b/docs/devel/vfio-migration.rst
@@ -23,9 +23,21 @@ and recommends that the initial bytes are sent and loaded in 
the destination
 before stopping the source VM. Enabling this migration capability will
 guarantee that and thus, can potentially reduce downtime even further.
 
-Note that currently VFIO migration is supported only for a single device. This
-is due to VFIO migration's lack of P2P support. However, P2P support is planned
-to be added later on.
+To support migration of multiple devices that might do P2P transactions between
+themselves, VFIO migration uAPI defines an intermediate P2P quiescent state.
+While in the P2P quiescent state, P2P DMA transactions cannot be initiated by
+the device, but the device can respond to incoming ones. Additionally, all
+outstanding P2P transactions are guaranteed to have been completed by the time
+the device enters this state.
+
+All the devices that support P2P migration are first transitioned to the P2P
+quiescent state and only then are they stopped or started. This makes migration
+safe P2P-wise, since starting and stopping the devices is not done atomically
+for all the devices together.
+
+Thus, multiple VFIO devices migration is allowed only if all the devices
+support P2P migration. Single VFIO device migration is allowed regardless of
+P2P migration support.
 
 A detailed description of the UAPI for VFIO device migration can be found in
 the comment for the ``vfio_device_mig_state`` structure in the header file
@@ -132,54 +144,63 @@ will be blocked.
 Flow of state changes during Live migration
 ===
 
-Below is the flow of state change during live migration.
+Below is the state change flow during live migration for a VFIO device that
+supports both precopy and P2P migration. The flow for devices that don't
+support it is similar, except that the relevant states for precopy and P2P are
+skipped.
 The values in the parentheses represent the VM state, the migration state, and
 the VFIO device state, respectively.
-The text in the square brackets represents the flow if the VFIO device supports
-pre-copy.
 
 Live migration save path
 
 
 ::
 
-QEMU normal running state
-(RUNNING, _NONE, _RUNNING)
-  |
+   QEMU normal running state
+   (RUNNING, _NONE, _RUNNING)
+  |
  migrate_init spawns migration_thread
-Migration thread then calls each device's .save_setup()
-  (RUNNING, _SETUP, _RUNNING [_PRE_COPY])
-  |
-  (RUNNING, _ACTIVE, _RUNNING [_PRE_COPY])
-  If device is active, get pending_bytes by 
.state_pending_{estimate,exact}()
-  If total pending_bytes >= threshold_size, call .save_live_iterate()
-  [Data of VFIO device for pre-copy phase is copied]
-Iterate till total pending bytes converge and are less than threshold
-  |
-  On migration completion, vCPU stops and calls .save_live_complete_precopy for
-  each active device. The VFIO device is then transitioned into _STOP_COPY 
state
-  (FINISH_MIGRATE, _DEVICE, _STOP_COPY)
-  |
- For the VFIO device, iterate in .save_live_complete_precopy until
- pending data is 0
-   (FINISH_MIGRATE, _DEVICE, _STOP)
-  |
- (FINISH_MIGRATE, _COMPLETED, _STOP)
- Migraton thread schedules cleanup bottom half and exits
+Migration thread then calls each device's .save_setup()
+  (RU

[PATCH for-8.2 v2 3/6] qdev: Add qdev_add_vm_change_state_handler_full()

2023-07-31 Thread Avihai Horon
Add qdev_add_vm_change_state_handler_full() variant that allows setting
a prepare callback in addition to the main callback.

This will facilitate adding P2P support for VFIO migration in the
following patches.

Signed-off-by: Avihai Horon 
Signed-off-by: Joao Martins 
---
 include/sysemu/runstate.h |  3 +++
 hw/core/vm-change-state-handler.c | 14 +-
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/include/sysemu/runstate.h b/include/sysemu/runstate.h
index 764a0fc6a4..08afb97695 100644
--- a/include/sysemu/runstate.h
+++ b/include/sysemu/runstate.h
@@ -23,6 +23,9 @@ 
qemu_add_vm_change_state_handler_prio_full(VMChangeStateHandler *cb,
 VMChangeStateEntry *qdev_add_vm_change_state_handler(DeviceState *dev,
  VMChangeStateHandler *cb,
  void *opaque);
+VMChangeStateEntry *qdev_add_vm_change_state_handler_full(
+DeviceState *dev, VMChangeStateHandler *cb,
+VMChangeStateHandler *prepare_cb, void *opaque);
 void qemu_del_vm_change_state_handler(VMChangeStateEntry *e);
 /**
  * vm_state_notify: Notify the state of the VM
diff --git a/hw/core/vm-change-state-handler.c 
b/hw/core/vm-change-state-handler.c
index 1f3630986d..8e2639224e 100644
--- a/hw/core/vm-change-state-handler.c
+++ b/hw/core/vm-change-state-handler.c
@@ -55,8 +55,20 @@ static int qdev_get_dev_tree_depth(DeviceState *dev)
 VMChangeStateEntry *qdev_add_vm_change_state_handler(DeviceState *dev,
  VMChangeStateHandler *cb,
  void *opaque)
+{
+return qdev_add_vm_change_state_handler_full(dev, cb, NULL, opaque);
+}
+
+/*
+ * Exactly like qdev_add_vm_change_state_handler() but passes a prepare_cb
+ * argument too.
+ */
+VMChangeStateEntry *qdev_add_vm_change_state_handler_full(
+DeviceState *dev, VMChangeStateHandler *cb,
+VMChangeStateHandler *prepare_cb, void *opaque)
 {
 int depth = qdev_get_dev_tree_depth(dev);
 
-return qemu_add_vm_change_state_handler_prio(cb, opaque, depth);
+return qemu_add_vm_change_state_handler_prio_full(cb, prepare_cb, opaque,
+  depth);
 }
-- 
2.26.3




[PATCH for-8.2 v2 2/6] sysemu: Add prepare callback to struct VMChangeStateEntry

2023-07-31 Thread Avihai Horon
Add prepare callback to struct VMChangeStateEntry.

The prepare callback is optional and can be set by the new function
qemu_add_vm_change_state_handler_prio_full() that allows setting this
callback in addition to the main callback.

The prepare callbacks and main callbacks are called in two separate
phases: First all prepare callbacks are called and only then all main
callbacks are called.

The purpose of the new prepare callback is to allow all devices to run a
preliminary task before calling the devices' main callbacks.

This will facilitate adding P2P support for VFIO migration where all
VFIO devices need to be put in an intermediate P2P quiescent state
before being stopped or started by the main callback.

Signed-off-by: Avihai Horon 
---
 include/sysemu/runstate.h |  4 
 softmmu/runstate.c| 40 +++
 2 files changed, 44 insertions(+)

diff --git a/include/sysemu/runstate.h b/include/sysemu/runstate.h
index 7beb29c2e2..764a0fc6a4 100644
--- a/include/sysemu/runstate.h
+++ b/include/sysemu/runstate.h
@@ -16,6 +16,10 @@ VMChangeStateEntry 
*qemu_add_vm_change_state_handler(VMChangeStateHandler *cb,
  void *opaque);
 VMChangeStateEntry *qemu_add_vm_change_state_handler_prio(
 VMChangeStateHandler *cb, void *opaque, int priority);
+VMChangeStateEntry *
+qemu_add_vm_change_state_handler_prio_full(VMChangeStateHandler *cb,
+   VMChangeStateHandler *prepare_cb,
+   void *opaque, int priority);
 VMChangeStateEntry *qdev_add_vm_change_state_handler(DeviceState *dev,
  VMChangeStateHandler *cb,
  void *opaque);
diff --git a/softmmu/runstate.c b/softmmu/runstate.c
index f3bd862818..1652ed0439 100644
--- a/softmmu/runstate.c
+++ b/softmmu/runstate.c
@@ -271,6 +271,7 @@ void qemu_system_vmstop_request(RunState state)
 }
 struct VMChangeStateEntry {
 VMChangeStateHandler *cb;
+VMChangeStateHandler *prepare_cb;
 void *opaque;
 QTAILQ_ENTRY(VMChangeStateEntry) entries;
 int priority;
@@ -293,12 +294,39 @@ static QTAILQ_HEAD(, VMChangeStateEntry) 
vm_change_state_head =
  */
 VMChangeStateEntry *qemu_add_vm_change_state_handler_prio(
 VMChangeStateHandler *cb, void *opaque, int priority)
+{
+return qemu_add_vm_change_state_handler_prio_full(cb, NULL, opaque,
+  priority);
+}
+
+/**
+ * qemu_add_vm_change_state_handler_prio_full:
+ * @cb: the main callback to invoke
+ * @prepare_cb: a callback to invoke before the main callback
+ * @opaque: user data passed to the callbacks
+ * @priority: low priorities execute first when the vm runs and the reverse is
+ *true when the vm stops
+ *
+ * Register a main callback function and an optional prepare callback function
+ * that are invoked when the vm starts or stops running. The main callback and
+ * the prepare callback are called in two separate phases: First all prepare
+ * callbacks are called and only then all main callbacks are called. As its
+ * name suggests, the prepare callback can be used to do some preparatory work
+ * before invoking the main callback.
+ *
+ * Returns: an entry to be freed using qemu_del_vm_change_state_handler()
+ */
+VMChangeStateEntry *
+qemu_add_vm_change_state_handler_prio_full(VMChangeStateHandler *cb,
+   VMChangeStateHandler *prepare_cb,
+   void *opaque, int priority)
 {
 VMChangeStateEntry *e;
 VMChangeStateEntry *other;
 
 e = g_malloc0(sizeof(*e));
 e->cb = cb;
+e->prepare_cb = prepare_cb;
 e->opaque = opaque;
 e->priority = priority;
 
@@ -333,10 +361,22 @@ void vm_state_notify(bool running, RunState state)
 trace_vm_state_notify(running, state, RunState_str(state));
 
 if (running) {
+QTAILQ_FOREACH_SAFE(e, &vm_change_state_head, entries, next) {
+if (e->prepare_cb) {
+e->prepare_cb(e->opaque, running, state);
+}
+}
+
 QTAILQ_FOREACH_SAFE(e, &vm_change_state_head, entries, next) {
 e->cb(e->opaque, running, state);
 }
 } else {
+QTAILQ_FOREACH_REVERSE_SAFE(e, &vm_change_state_head, entries, next) {
+if (e->prepare_cb) {
+e->prepare_cb(e->opaque, running, state);
+}
+}
+
 QTAILQ_FOREACH_REVERSE_SAFE(e, &vm_change_state_head, entries, next) {
 e->cb(e->opaque, running, state);
 }
-- 
2.26.3




[PATCH for-8.2 v2 0/6] vfio/migration: Add P2P support for VFIO migration

2023-07-31 Thread Avihai Horon
Hi all,

The first patch in this series adds a small optimization to VFIO
migration by moving the STOP_COPY->STOP transition to
vfio_save_cleanup(). Testing with a ConnectX-7 VFIO device showed that
this can reduce downtime by up to 6%.

The rest of the series adds P2P support for VFIO migration.

VFIO migration uAPI defines an optional intermediate P2P quiescent
state. While in the P2P quiescent state, P2P DMA transactions cannot be
initiated by the device, but the device can respond to incoming ones.
Additionally, all outstanding P2P transactions are guaranteed to have
been completed by the time the device enters this state.

The purpose of this state is to support migration of multiple devices
that might do P2P transactions between themselves.

To implement P2P migration support, all the devices must be transitioned
to the P2P quiescent state before being stopped or started.

This behavior is achieved by adding an optional prepare callback to
VMChangeStateEntry. These callbacks are invoked before the main VM state
change callbacks, transitioning all the VFIO devices to the P2P state,
and only then are the main callbacks invoked, which stop or start the
devices.

This will allow migration of multiple VFIO devices if all of them
support P2P migration.

Thanks.

Changes from v1 [1]:
* Rebased on latest master branch.
* Renamed pre_change_cb to prepare_cb and adjusted relevant code
  accordingly. (Cedric)
* Split VFIO vmstate change handler to two separate handlers, one for
  prepare callback and another for main callback. (Cedric)
* Renamed vfio_should_block_multiple_devices_migration() to
  vfio_multiple_devices_migration_is_supported() and reverted its logic
  accordingly. (Cedric)
* Rephrased "that are doing P2P" to "that might do P2P" in docs and
  commit message. (Jason)
* Added Cedric R-b tag to patch #4.

[1]
https://lore.kernel.org/qemu-devel/20230716081541.27900-1-avih...@nvidia.com/

Avihai Horon (5):
  vfio/migration: Move from STOP_COPY to STOP in vfio_save_cleanup()
  sysemu: Add prepare callback to struct VMChangeStateEntry
  qdev: Add qdev_add_vm_change_state_handler_full()
  vfio/migration: Add P2P support for VFIO migration
  vfio/migration: Allow migration of multiple P2P supporting devices

Joao Martins (1):
  vfio/migration: Refactor PRE_COPY and RUNNING state checks

 docs/devel/vfio-migration.rst | 93 +++
 include/hw/vfio/vfio-common.h |  2 +
 include/sysemu/runstate.h |  7 +++
 hw/core/vm-change-state-handler.c | 14 -
 hw/vfio/common.c  | 50 +
 hw/vfio/migration.c   | 76 -
 softmmu/runstate.c| 40 +
 hw/vfio/trace-events  |  1 +
 8 files changed, 219 insertions(+), 64 deletions(-)

-- 
2.26.3




[PATCH for-8.2 v2 4/6] vfio/migration: Refactor PRE_COPY and RUNNING state checks

2023-07-31 Thread Avihai Horon
From: Joao Martins 

Move the PRE_COPY and RUNNING state checks to helper functions.

This is in preparation for adding P2P VFIO migration support, where
these helpers will also test for PRE_COPY_P2P and RUNNING_P2P states.

Signed-off-by: Joao Martins 
Signed-off-by: Avihai Horon 
Reviewed-by: Cédric Le Goater 
---
 include/hw/vfio/vfio-common.h |  2 ++
 hw/vfio/common.c  | 22 ++
 hw/vfio/migration.c   | 10 --
 3 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index da43d27352..e9b8954595 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -230,6 +230,8 @@ void vfio_unblock_multiple_devices_migration(void);
 bool vfio_viommu_preset(VFIODevice *vbasedev);
 int64_t vfio_mig_bytes_transferred(void);
 void vfio_reset_bytes_transferred(void);
+bool vfio_device_state_is_running(VFIODevice *vbasedev);
+bool vfio_device_state_is_precopy(VFIODevice *vbasedev);
 
 #ifdef CONFIG_LINUX
 int vfio_get_region_info(VFIODevice *vbasedev, int index,
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 9aac21abb7..16cf79a76c 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -437,6 +437,20 @@ static void vfio_set_migration_error(int err)
 }
 }
 
+bool vfio_device_state_is_running(VFIODevice *vbasedev)
+{
+VFIOMigration *migration = vbasedev->migration;
+
+return migration->device_state == VFIO_DEVICE_STATE_RUNNING;
+}
+
+bool vfio_device_state_is_precopy(VFIODevice *vbasedev)
+{
+VFIOMigration *migration = vbasedev->migration;
+
+return migration->device_state == VFIO_DEVICE_STATE_PRE_COPY;
+}
+
 static bool vfio_devices_all_dirty_tracking(VFIOContainer *container)
 {
 VFIOGroup *group;
@@ -457,8 +471,8 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer 
*container)
 }
 
 if (vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF &&
-(migration->device_state == VFIO_DEVICE_STATE_RUNNING ||
- migration->device_state == VFIO_DEVICE_STATE_PRE_COPY)) {
+(vfio_device_state_is_running(vbasedev) ||
+ vfio_device_state_is_precopy(vbasedev))) {
 return false;
 }
 }
@@ -503,8 +517,8 @@ static bool 
vfio_devices_all_running_and_mig_active(VFIOContainer *container)
 return false;
 }
 
-if (migration->device_state == VFIO_DEVICE_STATE_RUNNING ||
-migration->device_state == VFIO_DEVICE_STATE_PRE_COPY) {
+if (vfio_device_state_is_running(vbasedev) ||
+vfio_device_state_is_precopy(vbasedev)) {
 continue;
 } else {
 return false;
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 8acd182a8b..48f9c23cbe 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -411,7 +411,7 @@ static void vfio_state_pending_estimate(void *opaque, 
uint64_t *must_precopy,
 VFIODevice *vbasedev = opaque;
 VFIOMigration *migration = vbasedev->migration;
 
-if (migration->device_state != VFIO_DEVICE_STATE_PRE_COPY) {
+if (!vfio_device_state_is_precopy(vbasedev)) {
 return;
 }
 
@@ -444,7 +444,7 @@ static void vfio_state_pending_exact(void *opaque, uint64_t 
*must_precopy,
 vfio_query_stop_copy_size(vbasedev, &stop_copy_size);
 *must_precopy += stop_copy_size;
 
-if (migration->device_state == VFIO_DEVICE_STATE_PRE_COPY) {
+if (vfio_device_state_is_precopy(vbasedev)) {
 vfio_query_precopy_size(migration);
 
 *must_precopy +=
@@ -459,9 +459,8 @@ static void vfio_state_pending_exact(void *opaque, uint64_t 
*must_precopy,
 static bool vfio_is_active_iterate(void *opaque)
 {
 VFIODevice *vbasedev = opaque;
-VFIOMigration *migration = vbasedev->migration;
 
-return migration->device_state == VFIO_DEVICE_STATE_PRE_COPY;
+return vfio_device_state_is_precopy(vbasedev);
 }
 
 static int vfio_save_iterate(QEMUFile *f, void *opaque)
@@ -656,7 +655,6 @@ static const SaveVMHandlers savevm_vfio_handlers = {
 static void vfio_vmstate_change(void *opaque, bool running, RunState state)
 {
 VFIODevice *vbasedev = opaque;
-VFIOMigration *migration = vbasedev->migration;
 enum vfio_device_mig_state new_state;
 int ret;
 
@@ -664,7 +662,7 @@ static void vfio_vmstate_change(void *opaque, bool running, 
RunState state)
 new_state = VFIO_DEVICE_STATE_RUNNING;
 } else {
 new_state =
-(migration->device_state == VFIO_DEVICE_STATE_PRE_COPY &&
+(vfio_device_state_is_precopy(vbasedev) &&
  (state == RUN_STATE_FINISH_MIGRATE || state == RUN_STATE_PAUSED)) 
?
 VFIO_DEVICE_STATE_STOP_COPY :
 VFIO_DEVICE_STATE_STOP;
-- 
2.26.3




[PATCH for-8.2 v2 6/6] vfio/migration: Allow migration of multiple P2P supporting devices

2023-07-31 Thread Avihai Horon
Now that P2P support has been added to VFIO migration, allow migration
of multiple devices if all of them support P2P migration.

Single device migration is allowed regardless of P2P migration support.

Signed-off-by: Avihai Horon 
Signed-off-by: Joao Martins 
---
 hw/vfio/common.c | 26 ++
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 7c3d636025..8a8d074e18 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -363,21 +363,31 @@ bool vfio_mig_active(void)
 
 static Error *multiple_devices_migration_blocker;
 
-static unsigned int vfio_migratable_device_num(void)
+/*
+ * Multiple devices migration is allowed only if all devices support P2P
+ * migration. Single device migration is allowed regardless of P2P migration
+ * support.
+ */
+static bool vfio_multiple_devices_migration_is_supported(void)
 {
 VFIOGroup *group;
 VFIODevice *vbasedev;
 unsigned int device_num = 0;
+bool all_support_p2p = true;
 
 QLIST_FOREACH(group, &vfio_group_list, next) {
 QLIST_FOREACH(vbasedev, &group->device_list, next) {
 if (vbasedev->migration) {
 device_num++;
+
+if (!(vbasedev->migration->mig_flags & VFIO_MIGRATION_P2P)) {
+all_support_p2p = false;
+}
 }
 }
 }
 
-return device_num;
+return all_support_p2p || device_num <= 1;
 }
 
 int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp)
@@ -385,19 +395,19 @@ int vfio_block_multiple_devices_migration(VFIODevice 
*vbasedev, Error **errp)
 int ret;
 
 if (multiple_devices_migration_blocker ||
-vfio_migratable_device_num() <= 1) {
+vfio_multiple_devices_migration_is_supported()) {
 return 0;
 }
 
 if (vbasedev->enable_migration == ON_OFF_AUTO_ON) {
-error_setg(errp, "Migration is currently not supported with multiple "
- "VFIO devices");
+error_setg(errp, "Multiple VFIO devices migration is supported only if 
"
+ "all of them support P2P migration");
 return -EINVAL;
 }
 
 error_setg(&multiple_devices_migration_blocker,
-   "Migration is currently not supported with multiple "
-   "VFIO devices");
+   "Multiple VFIO devices migration is supported only if all of "
+   "them support P2P migration");
 ret = migrate_add_blocker(multiple_devices_migration_blocker, errp);
 if (ret < 0) {
 error_free(multiple_devices_migration_blocker);
@@ -410,7 +420,7 @@ int vfio_block_multiple_devices_migration(VFIODevice 
*vbasedev, Error **errp)
 void vfio_unblock_multiple_devices_migration(void)
 {
 if (!multiple_devices_migration_blocker ||
-vfio_migratable_device_num() > 1) {
+!vfio_multiple_devices_migration_is_supported()) {
 return;
 }
 
-- 
2.26.3




Re: assert fails in s390x TCG

2023-07-31 Thread Claudio Fontana
On 7/21/23 11:08, Claudio Fontana wrote:
> 
> Hello Cornelia, Richard,
> 
> I had some strange behavior in an s390x TCG VM that I am debugging,
> 
> and configured latest upstream QEMU with --enable-debug --enable-debug-tcg
> 
> and I am running the qemu binary with -d unimp,guest_errors .
> 
> I get:
> 
> /usr/bin/qemu-system-s390x -nodefaults -no-reboot -nographic -vga none -cpu 
> qemu -d unimp,guest_errors -object rng-random,filename=/dev/random,id=rng0 
> -device virtio-rng-ccw,rng=rng0 -runas qemu -net none -kernel 
> /var/tmp/boot/kernel -initrd /var/tmp/boot/initrd -append 
> root=/dev/disk/by-id/virtio-0 rootfstype=ext3 
> rootflags=data=writeback,nobarrier,commit=150,noatime elevator=noop 
> nmi_watchdog=0 rw oops=panic panic=1 quiet elevator=noop console=hvc0 
> init=build -m 2048 -drive 
> file=/var/tmp/img,format=raw,if=none,id=disk,cache=unsafe -device 
> virtio-blk-ccw,drive=disk,serial=0 -drive 
> file=/var/tmp/swap,format=raw,if=none,id=swap,cache=unsafe -device 
> virtio-blk-ccw,drive=swap,serial=1 -device virtio-serial-ccw -device 
> virtconsole,chardev=virtiocon0 -chardev stdio,id=virtiocon0 -chardev 
> socket,id=monitor,server=on,wait=off,path=/var/tmp/img.qemu/monitor -mon 
> chardev=monitor,mode=readline -smp 8
> 
> unimplemented opcode 0xb9ab
> unimplemented opcode 0xb2af
> 
> ERROR:../accel/tcg/tb-maint.c:348:page_unlock__debug: assertion failed: 
> (page_is_locked(pd))
> Bail out! ERROR:../accel/tcg/tb-maint.c:348:page_unlock__debug: assertion 
> failed: (page_is_locked(pd))
> 
> Thread 3 "qemu-system-s39" received signal SIGABRT, Aborted.
> [Switching to Thread 0x753516c0 (LWP 215975)]
> (gdb) bt
> #0  0x7730dabc in __pthread_kill_implementation () at /lib64/libc.so.6
> #1  0x772bc266 in raise () at /lib64/libc.so.6
> #2  0x772a4897 in abort () at /lib64/libc.so.6
> #3  0x776f0eee in  () at /lib64/libglib-2.0.so.0
> #4  0x7775649a in g_assertion_message_expr () at 
> /lib64/libglib-2.0.so.0
> #5  0x55b96134 in page_unlock__debug (pd=0x7ffee8680440) at 
> ../accel/tcg/tb-maint.c:348
> #6  0x55b962a9 in page_unlock (pd=0x7ffee8680440) at 
> ../accel/tcg/tb-maint.c:397
> #7  0x55b96580 in tb_unlock_pages (tb=0x7fffefffeb00) at 
> ../accel/tcg/tb-maint.c:483
> #8  0x55b94698 in cpu_exec_longjmp_cleanup (cpu=0x56566a30) at 
> ../accel/tcg/cpu-exec.c:556
> #9  0x55b954e0 in cpu_exec_setjmp (cpu=0x56566a30, 
> sc=0x75350540) at ../accel/tcg/cpu-exec.c:1054
> #10 0x55b9557a in cpu_exec (cpu=0x56566a30) at 
> ../accel/tcg/cpu-exec.c:1083
> #11 0x55bb9af6 in tcg_cpus_exec (cpu=0x56566a30) at 
> ../accel/tcg/tcg-accel-ops.c:75
> #12 0x55bba1ae in mttcg_cpu_thread_fn (arg=0x56566a30) at 
> ../accel/tcg/tcg-accel-ops-mttcg.c:95
> #13 0x55dc0af3 in qemu_thread_start (args=0x565ba150) at 
> ../util/qemu-thread-posix.c:541
> #14 0x7730bc64 in start_thread () at /lib64/libc.so.6
> #15 0x77393550 in clone3 () at /lib64/libc.so.6
> 
> (gdb) frame 5
> #5  0x55b96134 in page_unlock__debug (pd=0x7ffee8680440) at 
> ../accel/tcg/tb-maint.c:348
> 348 g_assert(page_is_locked(pd));
> (gdb) list 348
> 343 static void page_unlock__debug(const PageDesc *pd)
> 344 {
> 345 bool removed;
> 346
> 347 ht_pages_locked_debug_init();
> 348 g_assert(page_is_locked(pd));
> 349 removed = g_hash_table_remove(ht_pages_locked_debug, pd);
> 350 g_assert(removed);
> 351 }
> 352
> 
> (gdb) info threads
>   Id   Target IdFrame 
>   1Thread 0x763bef40 (LWP 215971) "qemu-system-s39" 
> 0x77385596 in ppoll () from /lib64/libc.so.6
>   2Thread 0x763bb6c0 (LWP 215974) "qemu-system-s39" 
> 0x7738b41d in syscall () from /lib64/libc.so.6
> * 3Thread 0x753516c0 (LWP 215975) "qemu-system-s39" 
> 0x7730dabc in __pthread_kill_implementation () from /lib64/libc.so.6
>   4Thread 0x74b506c0 (LWP 215976) "qemu-system-s39" 
> 0x7730820e in __futex_abstimed_wait_common () from /lib64/libc.so.6
>   5Thread 0x7ffeefdff6c0 (LWP 215977) "qemu-system-s39" 
> 0x7730820e in __futex_abstimed_wait_common () from /lib64/libc.so.6
>   6Thread 0x7ffeef5fe6c0 (LWP 215978) "qemu-system-s39" 
> 0x7730820e in __futex_abstimed_wait_common () from /lib64/libc.so.6
>   7Thread 0x7ffeeedfd6c0 (LWP 215979) "qemu-system-s39" 
> 0x7730820e in __futex_abstimed_wait_common () from /lib64/libc.so.6
>   8Thread 0x7ffeee5fc6c0 (LWP 215980) "qemu-system-s39" 
> 0x7730820e in __futex_abstimed_wait_common () from /lib64/libc.so.6
>   9Thread 0x7ffeeddfb6c0 (LWP 215981) "qemu-system-s39" 
> 0x7730820e in __futex_abstimed_wait_common () from /lib64/libc.so.6
>   10   Thread 0x7ffeed5fa6c0 (LWP 215982) "qemu-system-s39" 
> 0x7730820e in __futex_abstimed_wait_common () from /lib64/

Re: [PATCH] vhost-user-scsi: support reconnect to backend

2023-07-31 Thread Li Feng


> 2023年7月31日 06:09,Raphael Norwitz  写道:
> 
> 
> 
>> On Jul 28, 2023, at 3:48 AM, Li Feng  wrote:
>> 
>> Thanks for your reply.
>> 
>>> 2023年7月28日 上午5:21,Raphael Norwitz  写道:
>>> 
>>> 
>>> 
 On Jul 25, 2023, at 6:19 AM, Li Feng  wrote:
 
 Thanks for your comments.
 
> 2023年7月25日 上午1:21,Raphael Norwitz  写道:
> 
> Very excited to see this. High level looks good modulo a few small things.
> 
> My major concern is around existing vhost-user-scsi backends which don’t 
> support VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD. IMO we should hide the 
> reconnect behavior behind a VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD check. 
> We may want to do the same for vhost-user-blk.
> 
> The question is then what happens if the check is false. IIUC without an 
> inflight FD, if a device processes requests out of order, it’s not safe 
> to continue execution on reconnect, as there’s no way for the backend to 
> know how to replay IO. Should we permanently wedge the device or have 
> QEMU fail out? May be nice to have a toggle for this.
 
 Based on what MST said, is there anything else I need to do?
>>> 
>>> I don’t think so.
>>> 
> 
>> On Jul 21, 2023, at 6:51 AM, Li Feng  wrote:
>> 
>> If the backend crashes and restarts, the device is broken.
>> This patch adds reconnect for vhost-user-scsi.
>> 
>> Tested with spdk backend.
>> 
>> Signed-off-by: Li Feng 
>> ---
>> hw/block/vhost-user-blk.c   |   2 -
>> hw/scsi/vhost-scsi-common.c |  27 ++---
>> hw/scsi/vhost-user-scsi.c   | 163 +---
>> include/hw/virtio/vhost-user-scsi.h |   3 +
>> include/hw/virtio/vhost.h   |   2 +
>> 5 files changed, 165 insertions(+), 32 deletions(-)
>> 
>> diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
>> index eecf3f7a81..f250c740b5 100644
>> --- a/hw/block/vhost-user-blk.c
>> +++ b/hw/block/vhost-user-blk.c
>> @@ -32,8 +32,6 @@
>> #include "sysemu/sysemu.h"
>> #include "sysemu/runstate.h"
>> 
>> -#define REALIZE_CONNECTION_RETRIES 3
>> -
>> static const int user_feature_bits[] = {
>>  VIRTIO_BLK_F_SIZE_MAX,
>>  VIRTIO_BLK_F_SEG_MAX,
>> diff --git a/hw/scsi/vhost-scsi-common.c b/hw/scsi/vhost-scsi-common.c
> 
> Why can’t all the vhost-scsi-common stuff be moved to a separate change?
 
 I will move this code to separate patch.
> 
> Especially the stuff introduced for vhost-user-blk in 
> 1b0063b3048af65dfaae6422a572c87db8575a92 should be moved out.
 OK.
 
> 
>> index a06f01af26..08801886b8 100644
>> --- a/hw/scsi/vhost-scsi-common.c
>> +++ b/hw/scsi/vhost-scsi-common.c
>> @@ -52,16 +52,22 @@ int vhost_scsi_common_start(VHostSCSICommon *vsc)
>> 
>>  vsc->dev.acked_features = vdev->guest_features;
>> 
>> -assert(vsc->inflight == NULL);
>> -vsc->inflight = g_new0(struct vhost_inflight, 1);
>> -ret = vhost_dev_get_inflight(&vsc->dev,
>> - vs->conf.virtqueue_size,
>> - vsc->inflight);
>> +ret = vhost_dev_prepare_inflight(&vsc->dev, vdev);
>>  if (ret < 0) {
>> -error_report("Error get inflight: %d", -ret);
>> +error_report("Error setting inflight format: %d", -ret);
>>  goto err_guest_notifiers;
>>  }
>> 
>> +if (!vsc->inflight->addr) {
>> +ret = vhost_dev_get_inflight(&vsc->dev,
>> +vs->conf.virtqueue_size,
>> +vsc->inflight);
>> +if (ret < 0) {
>> +error_report("Error get inflight: %d", -ret);
>> +goto err_guest_notifiers;
>> +}
>> +}
>> +
>>  ret = vhost_dev_set_inflight(&vsc->dev, vsc->inflight);
>>  if (ret < 0) {
>>  error_report("Error set inflight: %d", -ret);
>> @@ -85,9 +91,6 @@ int vhost_scsi_common_start(VHostSCSICommon *vsc)
>>  return ret;
>> 
>> err_guest_notifiers:
>> -g_free(vsc->inflight);
>> -vsc->inflight = NULL;
>> -
>>  k->set_guest_notifiers(qbus->parent, vsc->dev.nvqs, false);
>> err_host_notifiers:
>>  vhost_dev_disable_notifiers(&vsc->dev, vdev);
>> @@ -111,12 +114,6 @@ void vhost_scsi_common_stop(VHostSCSICommon *vsc)
>>  }
>>  assert(ret >= 0);
>> 
> 
> In the vhost-scsi (kernel backend) path, what will cleanup vsc->inflight 
> now?
 OK, we should check the vsc->inflight if it is null, the vhost-scsi 
 doesn’t allocate the
 inflight object memory.
>>> 
>>> Are you saying vhost-scsi never allocates inflight so we don’t need to 
>>> check for it?
>> We have checked the vsc->inflight, and only if allocated, we send the 
>> get/set_inflight_fd.
>> This works with vhost-user-scsi/vhost-scsi

Re: [PATCH 4/7] spapr: Fix record-replay machine reset consuming too many events

2023-07-31 Thread Pavel Dovgalyuk

Acked-by: Pavel Dovgalyuk 

On 26.07.2023 21:35, Nicholas Piggin wrote:

spapr_machine_reset gets a random number to populate the device-tree
rng seed with. When loading a snapshot for record-replay, the machine
is reset again, and that tries to consume the random event record
again, crashing due to inconsistent record

Fix this by saving the seed to populate the device tree with, and
skipping the rng on snapshot load.

Cc: Pavel Dovgalyuk 
Signed-off-by: Nicholas Piggin 
---
  hw/ppc/spapr.c | 12 +---
  include/hw/ppc/spapr.h |  1 +
  2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 7d84244f03..ecfbdb0030 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1022,7 +1022,6 @@ static void spapr_dt_chosen(SpaprMachineState *spapr, 
void *fdt, bool reset)
  {
  MachineState *machine = MACHINE(spapr);
  SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
-uint8_t rng_seed[32];
  int chosen;
  
  _FDT(chosen = fdt_add_subnode(fdt, 0, "chosen"));

@@ -1100,8 +1099,7 @@ static void spapr_dt_chosen(SpaprMachineState *spapr, 
void *fdt, bool reset)
  spapr_dt_ov5_platform_support(spapr, fdt, chosen);
  }
  
-qemu_guest_getrandom_nofail(rng_seed, sizeof(rng_seed));

-_FDT(fdt_setprop(fdt, chosen, "rng-seed", rng_seed, sizeof(rng_seed)));
+_FDT(fdt_setprop(fdt, chosen, "rng-seed", spapr->fdt_rng_seed, 32));
  
  _FDT(spapr_dt_ovec(fdt, chosen, spapr->ov5_cas, "ibm,architecture-vec-5"));

  }
@@ -1654,6 +1652,14 @@ static void spapr_machine_reset(MachineState *machine, 
ShutdownCause reason)
  void *fdt;
  int rc;
  
+if (reason != SHUTDOWN_CAUSE_SNAPSHOT_LOAD) {

+/*
+ * Record-replay snapshot load must not consume random, this was
+ * already replayed from initial machine reset.
+ */
+qemu_guest_getrandom_nofail(spapr->fdt_rng_seed, 32);
+}
+
  pef_kvm_reset(machine->cgs, &error_fatal);
  spapr_caps_apply(spapr);
  
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h

index f47e8419a5..f4bd204d86 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -204,6 +204,7 @@ struct SpaprMachineState {
  uint32_t fdt_size;
  uint32_t fdt_initial_size;
  void *fdt_blob;
+uint8_t fdt_rng_seed[32];
  long kernel_size;
  bool kernel_le;
  uint64_t kernel_addr;





Re: [PATCH v2 2/4] vhost-user-common: send get_inflight_fd once

2023-07-31 Thread Li Feng


> 2023年7月31日 06:13,Raphael Norwitz  写道:
> 
>> 
>> On Jul 28, 2023, at 3:49 AM, Li Feng  wrote:
>> 
>> 
>> 
>>> 2023年7月28日 下午2:04,Michael S. Tsirkin  写道:
>>> 
>>> On Tue, Jul 25, 2023 at 06:42:45PM +0800, Li Feng wrote:
 Get_inflight_fd is sent only once. When reconnecting to the backend,
 qemu sent set_inflight_fd to the backend.
>>> 
>>> I don't understand what you are trying to say here.
>>> Should be:
>>> Currently ABCD. This is wrong/unnecessary because EFG. This patch HIJ.
>> 
>> Thanks, I will reorganize the commit message in v3.
>>> 
 Signed-off-by: Li Feng 
 ---
 hw/scsi/vhost-scsi-common.c | 37 ++---
 1 file changed, 18 insertions(+), 19 deletions(-)
 
 diff --git a/hw/scsi/vhost-scsi-common.c b/hw/scsi/vhost-scsi-common.c
 index a06f01af26..664adb15b4 100644
 --- a/hw/scsi/vhost-scsi-common.c
 +++ b/hw/scsi/vhost-scsi-common.c
 @@ -52,20 +52,28 @@ int vhost_scsi_common_start(VHostSCSICommon *vsc)
 
vsc->dev.acked_features = vdev->guest_features;
 
 -assert(vsc->inflight == NULL);
 -vsc->inflight = g_new0(struct vhost_inflight, 1);
 -ret = vhost_dev_get_inflight(&vsc->dev,
 - vs->conf.virtqueue_size,
 - vsc->inflight);
 +ret = vhost_dev_prepare_inflight(&vsc->dev, vdev);
if (ret < 0) {
 -error_report("Error get inflight: %d", -ret);
 +error_report("Error setting inflight format: %d", -ret);
goto err_guest_notifiers;
}
 
 -ret = vhost_dev_set_inflight(&vsc->dev, vsc->inflight);
 -if (ret < 0) {
 -error_report("Error set inflight: %d", -ret);
 -goto err_guest_notifiers;
 +if (vsc->inflight) {
 +if (!vsc->inflight->addr) {
 +ret = vhost_dev_get_inflight(&vsc->dev,
 +vs->conf.virtqueue_size,
 +vsc->inflight);
 +if (ret < 0) {
 +error_report("Error get inflight: %d", -ret);
>>> 
>>> As long as you are fixing this - should be "getting inflight”.
>> I will fix it in v3.
>>> 
 +goto err_guest_notifiers;
 +}
 +}
 +
> 
> Looks like you reworked this a bit so to avoid a potential crash if 
> vsc->inflight is NULL
> 
> Should we fix it for vhost-user-blk too?
> 
This check is mainly for the vhost-scsi code, that doesn’t need allocate the 
inflight memory.

The vhost-user-blk doesn’t need this check, because there isn't a vhost-blk 
device that reuse the code.

 +ret = vhost_dev_set_inflight(&vsc->dev, vsc->inflight);
 +if (ret < 0) {
 +error_report("Error set inflight: %d", -ret);
 +goto err_guest_notifiers;
 +}
}
 
ret = vhost_dev_start(&vsc->dev, vdev, true);
 @@ -85,9 +93,6 @@ int vhost_scsi_common_start(VHostSCSICommon *vsc)
return ret;
 
 err_guest_notifiers:
 -g_free(vsc->inflight);
 -vsc->inflight = NULL;
 -
k->set_guest_notifiers(qbus->parent, vsc->dev.nvqs, false);
 err_host_notifiers:
vhost_dev_disable_notifiers(&vsc->dev, vdev);
 @@ -111,12 +116,6 @@ void vhost_scsi_common_stop(VHostSCSICommon *vsc)
}
assert(ret >= 0);
 
> 
> As I said before, I think this introduces a leak.
I have answered in the previous mail.

> 
 -if (vsc->inflight) {
 -vhost_dev_free_inflight(vsc->inflight);
 -g_free(vsc->inflight);
 -vsc->inflight = NULL;
 -}
 -
vhost_dev_disable_notifiers(&vsc->dev, vdev);
 }
 
 -- 
 2.41.0



Re: [PATCH 5/7] tests/avocado: boot ppc64 pseries replay-record test to Linux VFS mount

2023-07-31 Thread Pavel Dovgalyuk

Acked-by: Pavel Dovgalyuk 

On 26.07.2023 21:35, Nicholas Piggin wrote:

This the ppc64 record-replay test is able to replay the full kernel boot
so try enabling it.

Cc: Pavel Dovgalyuk 
Signed-off-by: Nicholas Piggin 
---
  tests/avocado/replay_kernel.py | 3 +--
  1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/avocado/replay_kernel.py b/tests/avocado/replay_kernel.py
index 79c607b0e7..a18610542e 100644
--- a/tests/avocado/replay_kernel.py
+++ b/tests/avocado/replay_kernel.py
@@ -255,8 +255,7 @@ def test_ppc64_pseries(self):
  kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash)
  
  kernel_command_line = self.KERNEL_COMMON_COMMAND_LINE + 'console=hvc0'

-# icount is not good enough for PPC64 for complete boot yet
-console_pattern = 'Kernel command line: %s' % kernel_command_line
+console_pattern = 'VFS: Cannot open root device'
  self.run_rr(kernel_path, kernel_command_line, console_pattern)
  
  def test_ppc64_powernv(self):





Re: [PATCH 3/5] linux-user: Use MAP_FIXED_NOREPLACE for do_brk()

2023-07-31 Thread Peter Maydell
On Mon, 31 Jul 2023 at 09:04, Akihiko Odaki  wrote:
>
> MAP_FIXED_NOREPLACE can ensure the mapped address is fixed without
> concerning that the new mapping overwrites something else.

MAP_FIXED_NOREPLACE only came in with Linux 4.17. So
I think we still need to handle the "mapped address
is not the one we asked for" error condition, because
it can happen on older host kernels that ignore the
MAP_FIXED_NOREPLACE flag.

thanks
-- PMM



Re: [RFC PATCH 5/6] include/qemu/compiler: Fix problem with gcc_struct and Clang

2023-07-31 Thread Thomas Huth

On 31/07/2023 11.32, Daniel P. Berrangé wrote:

On Mon, Jul 31, 2023 at 11:10:58AM +0200, Thomas Huth wrote:

...

Or do you see another possibility how we could fix that timeout problem in
the 64-bit MSYS2 job? Still switching to clang, but compiling with
--extra-cflags="-Wno-unknown-attributes" maybe?


I was surprised to see that we're not using ccache in gitlab CI. It wouldn't
help the from-clean compile time, but thereafter it ought to help. I'm doing
some tests with that to see the impact.


I tried that two years ago, but the results were rather disappointing:

 https://lists.nongnu.org/archive/html/qemu-devel/2021-04/msg02189.html

... but that was only with the Linux runners. Maybe it helps with the MSYS 
runners?


 Thomas




Re: [RFC PATCH 5/6] include/qemu/compiler: Fix problem with gcc_struct and Clang

2023-07-31 Thread Thomas Huth

On 31/07/2023 12.05, Peter Maydell wrote:

On Mon, 31 Jul 2023 at 10:11, Thomas Huth  wrote:

Or do you see another possibility how we could fix that timeout problem in
the 64-bit MSYS2 job? Still switching to clang, but compiling with
--extra-cflags="-Wno-unknown-attributes" maybe?


Is there any way we can separate out the "take 25 minutes to
install a pile of packages" part from the "actually compile and
test QEMU" part ?


At least I don't have a clue ... we'd need someone who knows how to deal 
with that Windows stuff...


 Thomas





Re: [PATCH 6/7] tests/avocado: reverse-debugging cope with re-executing breakpoints

2023-07-31 Thread Pavel Dovgalyuk

Reviewed-by: Pavel Dovgalyuk 

On 26.07.2023 21:35, Nicholas Piggin wrote:

The reverse-debugging test creates a trace, then replays it and:

1. Steps the first 10 instructions and records their addresses.
2. Steps backward and verifies their addresses match.
3. Runs to (near) the end of the trace.
4. Sets breakpoints on the first 10 instructions.
5. Continues backward and verifies execution stops at the last
breakpoint.

Step 5 breaks if any of the other 9 breakpoints are re-executed in the
trace after the 10th instruction is run, because those will be
unexpectedly hit when reverse continuing. This situation does arise
with the ppc pseries machine, the SLOF bios branches to its own entry
point.

Permit this breakpoint re-execution by switching steps 4 and 5, so that
the trace will be run to the end *or* the next breakpoint hit.
Reversing from there to the 10th intsruction will not hit another
breakpoint, by definition.

Another step is added between steps 2 and 3, which steps forward over
the first 10 instructions and verifies their addresses, to support this.

Cc: Pavel Dovgalyuk 
Signed-off-by: Nicholas Piggin 
---
  tests/avocado/reverse_debugging.py | 25 +
  1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/tests/avocado/reverse_debugging.py 
b/tests/avocado/reverse_debugging.py
index 680c314cfc..7d1a478df1 100644
--- a/tests/avocado/reverse_debugging.py
+++ b/tests/avocado/reverse_debugging.py
@@ -150,16 +150,33 @@ def reverse_debugging(self, shift=7, args=None):
  self.check_pc(g, addr)
  logger.info('found position %x' % addr)
  
-logger.info('seeking to the end (icount %s)' % (last_icount - 1))

-vm.qmp('replay-break', icount=last_icount - 1)
-# continue - will return after pausing
-g.cmd(b'c', b'T02thread:01;')
+# visit the recorded instruction in forward order
+logger.info('stepping forward')
+for addr in steps:
+self.check_pc(g, addr)
+self.gdb_step(g)
+logger.info('found position %x' % addr)
  
+# set breakpoints for the instructions just stepped over

  logger.info('setting breakpoints')
  for addr in steps:
  # hardware breakpoint at addr with len=1
  g.cmd(b'Z1,%x,1' % addr, b'OK')
  
+# this may hit a breakpoint if first instructions are executed

+# again
+logger.info('continuing execution')
+vm.qmp('replay-break', icount=last_icount - 1)
+# continue - will return after pausing
+# This could stop at the end and get a T02 return, or by
+# re-executing one of the breakpoints and get a T05 return.
+g.cmd(b'c')
+if self.vm_get_icount(vm) == last_icount - 1:
+logger.info('reached the end (icount %s)' % (last_icount - 1))
+else:
+logger.info('hit a breakpoint again at %x (icount %s)' %
+(self.get_pc(g), self.vm_get_icount(vm)))
+
  logger.info('running reverse continue to reach %x' % steps[-1])
  # reverse continue - will return after stopping at the breakpoint
  g.cmd(b'bc', b'T05thread:01;')





Re: [PATCH 7/7] tests/avocado: ppc64 reverse debugging tests for pseries and powernv

2023-07-31 Thread Pavel Dovgalyuk

Reviewed-by: Pavel Dovgalyuk 

On 26.07.2023 21:35, Nicholas Piggin wrote:

These machines run reverse-debugging well enough to pass basic tests.
Wire them up.

Cc: Pavel Dovgalyuk 
Signed-off-by: Nicholas Piggin 
---
  tests/avocado/reverse_debugging.py | 29 +
  1 file changed, 29 insertions(+)

diff --git a/tests/avocado/reverse_debugging.py 
b/tests/avocado/reverse_debugging.py
index 7d1a478df1..fc47874eda 100644
--- a/tests/avocado/reverse_debugging.py
+++ b/tests/avocado/reverse_debugging.py
@@ -233,3 +233,32 @@ def test_aarch64_virt(self):
  
  self.reverse_debugging(

  args=('-kernel', kernel_path))
+
+class ReverseDebugging_ppc64(ReverseDebugging):
+"""
+:avocado: tags=accel:tcg
+"""
+
+REG_PC = 0x40
+
+# unidentified gitlab timeout problem
+@skipIf(os.getenv('GITLAB_CI'), 'Running on GitLab')
+def test_ppc64_pseries(self):
+"""
+:avocado: tags=arch:ppc64
+:avocado: tags=machine:pseries
+"""
+# SLOF branches back to its entry point, which causes this test
+# to take the 'hit a breakpoint again' path. That's not a problem,
+# just slightly different than the other machines.
+self.endian_is_le = False
+self.reverse_debugging()
+
+@skipIf(os.getenv('GITLAB_CI'), 'Running on GitLab')
+def test_ppc64_powernv(self):
+"""
+:avocado: tags=arch:ppc64
+:avocado: tags=machine:powernv
+"""
+self.endian_is_le = False
+self.reverse_debugging()





[PATCH v3 1/5] vhost: fix the fd leak

2023-07-31 Thread Li Feng
When the vhost-user reconnect to the backend, the notifer should be
cleanup. Otherwise, the fd resource will be exhausted.

Fixes: f9a09ca3ea ("vhost: add support for configure interrupt")

Signed-off-by: Li Feng 
Reviewed-by: Raphael Norwitz 
---
 hw/virtio/vhost.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index abf0d03c8d..e2f6ffb446 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -2044,6 +2044,8 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice 
*vdev, bool vrings)
 event_notifier_test_and_clear(
 &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier);
 event_notifier_test_and_clear(&vdev->config_notifier);
+event_notifier_cleanup(
+&hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier);
 
 trace_vhost_dev_stop(hdev, vdev->name, vrings);
 
-- 
2.41.0




[PATCH v3 5/5] vhost-user-scsi: start vhost when guest kicks

2023-07-31 Thread Li Feng
Let's keep the same behavior as vhost-user-blk.

Some old guests kick virtqueue before setting VIRTIO_CONFIG_S_DRIVER_OK.

Signed-off-by: Li Feng 
---
 hw/scsi/vhost-user-scsi.c | 48 +++
 1 file changed, 44 insertions(+), 4 deletions(-)

diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
index 5bf012461b..a7fa8e8df2 100644
--- a/hw/scsi/vhost-user-scsi.c
+++ b/hw/scsi/vhost-user-scsi.c
@@ -113,8 +113,48 @@ static void vhost_user_scsi_reset(VirtIODevice *vdev)
 }
 }
 
-static void vhost_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq)
+static void vhost_user_scsi_handle_output(VirtIODevice *vdev, VirtQueue *vq)
 {
+VHostUserSCSI *s = (VHostUserSCSI *)vdev;
+DeviceState *dev = &s->parent_obj.parent_obj.parent_obj.parent_obj;
+VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
+VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev);
+
+Error *local_err = NULL;
+int i, ret;
+
+if (!vdev->start_on_kick) {
+return;
+}
+
+if (!s->connected) {
+return;
+}
+
+if (vhost_dev_is_started(&vsc->dev)) {
+return;
+}
+
+/*
+ * Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start
+ * vhost here instead of waiting for .set_status().
+ */
+ret = vhost_user_scsi_start(s);
+if (ret < 0) {
+error_reportf_err(local_err, "vhost-user-scsi: vhost start failed: ");
+qemu_chr_fe_disconnect(&vs->conf.chardev);
+return;
+}
+
+/* Kick right away to begin processing requests already in vring */
+for (i = 0; i < vsc->dev.nvqs; i++) {
+VirtQueue *kick_vq = virtio_get_queue(vdev, i);
+
+if (!virtio_queue_get_desc_addr(vdev, i)) {
+continue;
+}
+event_notifier_set(virtio_queue_get_host_notifier(kick_vq));
+}
 }
 
 static int vhost_user_scsi_connect(DeviceState *dev, Error **errp)
@@ -243,9 +283,9 @@ static void vhost_user_scsi_realize(DeviceState *dev, Error 
**errp)
 return;
 }
 
-virtio_scsi_common_realize(dev, vhost_dummy_handle_output,
-   vhost_dummy_handle_output,
-   vhost_dummy_handle_output, &err);
+virtio_scsi_common_realize(dev, vhost_user_scsi_handle_output,
+   vhost_user_scsi_handle_output,
+   vhost_user_scsi_handle_output, &err);
 if (err != NULL) {
 error_propagate(errp, err);
 return;
-- 
2.41.0




[PATCH v3 0/5] Implement reconnect for vhost-user-scsi

2023-07-31 Thread Li Feng
This patchset adds reconnect support for vhost-user-scsi. At the same
times, fix vhost fd leak and refactor some code.

Changes for v3:
- Split the vhost_user_scsi_handle_output to a separate patch;
- Move the started_vu from vhost scsi common header to vhost-user-scsi header;
- Fix a log print error;

Changes for v2:
- Split the v1 patch to small separate patchset;
- New patch for fixing fd leak, which has sent to reviewers in another
  mail;
- Implement the `vhost_user_scsi_handle_output`;
- Add the started_vu safe check;
- Fix error handler;
- Check the inflight before set/get inflight fd.

Li Feng (5):
  vhost: fix the fd leak
  vhost-user-common: send get_inflight_fd once
  vhost: move and rename the conn retry times
  vhost-user-scsi: support reconnect to backend
  vhost-user-scsi: start vhost when guest kicks

 hw/block/vhost-user-blk.c   |   4 +-
 hw/scsi/vhost-scsi-common.c |  37 ++---
 hw/scsi/vhost-user-scsi.c   | 247 +---
 hw/virtio/vhost-user-gpio.c |   3 +-
 hw/virtio/vhost.c   |   2 +
 include/hw/virtio/vhost-user-scsi.h |   4 +
 include/hw/virtio/vhost.h   |   2 +
 7 files changed, 252 insertions(+), 47 deletions(-)

-- 
2.41.0




[PATCH v3 4/5] vhost-user-scsi: support reconnect to backend

2023-07-31 Thread Li Feng
If the backend crashes and restarts, the device is broken.
This patch adds reconnect for vhost-user-scsi.

Tested with spdk backend.

Signed-off-by: Li Feng 
---
 hw/scsi/vhost-user-scsi.c   | 199 +---
 include/hw/virtio/vhost-user-scsi.h |   4 +
 2 files changed, 184 insertions(+), 19 deletions(-)

diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
index ee99b19e7a..5bf012461b 100644
--- a/hw/scsi/vhost-user-scsi.c
+++ b/hw/scsi/vhost-user-scsi.c
@@ -43,26 +43,54 @@ enum VhostUserProtocolFeature {
 VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
 };
 
+static int vhost_user_scsi_start(VHostUserSCSI *s)
+{
+VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
+int ret;
+
+ret = vhost_scsi_common_start(vsc);
+s->started_vu = (ret < 0 ? false : true);
+
+return ret;
+}
+
+static void vhost_user_scsi_stop(VHostUserSCSI *s)
+{
+VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
+
+if (!s->started_vu) {
+return;
+}
+s->started_vu = false;
+
+vhost_scsi_common_stop(vsc);
+}
+
 static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status)
 {
 VHostUserSCSI *s = (VHostUserSCSI *)vdev;
+DeviceState *dev = &s->parent_obj.parent_obj.parent_obj.parent_obj;
 VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
-bool start = (status & VIRTIO_CONFIG_S_DRIVER_OK) && vdev->vm_running;
+VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev);
+bool should_start = virtio_device_should_start(vdev, status);
+int ret;
 
-if (vhost_dev_is_started(&vsc->dev) == start) {
+if (!s->connected) {
 return;
 }
 
-if (start) {
-int ret;
+if (vhost_dev_is_started(&vsc->dev) == should_start) {
+return;
+}
 
-ret = vhost_scsi_common_start(vsc);
+if (should_start) {
+ret = vhost_user_scsi_start(s);
 if (ret < 0) {
 error_report("unable to start vhost-user-scsi: %s", 
strerror(-ret));
-exit(1);
+qemu_chr_fe_disconnect(&vs->conf.chardev);
 }
 } else {
-vhost_scsi_common_stop(vsc);
+vhost_user_scsi_stop(s);
 }
 }
 
@@ -89,14 +117,126 @@ static void vhost_dummy_handle_output(VirtIODevice *vdev, 
VirtQueue *vq)
 {
 }
 
+static int vhost_user_scsi_connect(DeviceState *dev, Error **errp)
+{
+VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+VHostUserSCSI *s = VHOST_USER_SCSI(vdev);
+VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
+VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev);
+int ret = 0;
+
+if (s->connected) {
+return 0;
+}
+s->connected = true;
+
+vsc->dev.num_queues = vs->conf.num_queues;
+vsc->dev.nvqs = VIRTIO_SCSI_VQ_NUM_FIXED + vs->conf.num_queues;
+vsc->dev.vqs = s->vhost_vqs;
+vsc->dev.vq_index = 0;
+vsc->dev.backend_features = 0;
+
+ret = vhost_dev_init(&vsc->dev, &s->vhost_user, VHOST_BACKEND_TYPE_USER, 0,
+ errp);
+if (ret < 0) {
+return ret;
+}
+
+/* restore vhost state */
+if (virtio_device_started(vdev, vdev->status)) {
+ret = vhost_user_scsi_start(s);
+if (ret < 0) {
+return ret;
+}
+}
+
+return 0;
+}
+
+static void vhost_user_scsi_event(void *opaque, QEMUChrEvent event);
+
+static void vhost_user_scsi_disconnect(DeviceState *dev)
+{
+VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+VHostUserSCSI *s = VHOST_USER_SCSI(vdev);
+VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
+VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev);
+
+if (!s->connected) {
+return;
+}
+s->connected = false;
+
+vhost_user_scsi_stop(s);
+
+vhost_dev_cleanup(&vsc->dev);
+
+/* Re-instate the event handler for new connections */
+qemu_chr_fe_set_handlers(&vs->conf.chardev, NULL, NULL,
+ vhost_user_scsi_event, NULL, dev, NULL, true);
+}
+
+static void vhost_user_scsi_event(void *opaque, QEMUChrEvent event)
+{
+DeviceState *dev = opaque;
+VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+VHostUserSCSI *s = VHOST_USER_SCSI(vdev);
+VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
+VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev);
+Error *local_err = NULL;
+
+switch (event) {
+case CHR_EVENT_OPENED:
+if (vhost_user_scsi_connect(dev, &local_err) < 0) {
+error_report_err(local_err);
+qemu_chr_fe_disconnect(&vs->conf.chardev);
+return;
+}
+break;
+case CHR_EVENT_CLOSED:
+/* defer close until later to avoid circular close */
+vhost_user_async_close(dev, &vs->conf.chardev, &vsc->dev,
+   vhost_user_scsi_disconnect);
+break;
+case CHR_EVENT_BREAK:
+case CHR_EVENT_MUX_IN:
+case CHR_EVENT_MUX_OUT:
+/* Ignore */
+break;
+}
+}
+
+static int vhost_user_scsi_realize_connect(VHostUserSCSI *s, Error **errp)
+{
+DeviceState *dev = &s->parent_obj.parent_obj.parent_

  1   2   3   >