date:20230905

RE: [PATCH v3] arm/kvm: Enable support for KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE

2023-09-05 Thread Shameerali Kolothum Thodi via



> -Original Message-
> From: Gavin Shan [mailto:gs...@redhat.com]
> Sent: 31 August 2023 02:43
> To: Shameerali Kolothum Thodi ;
> qemu-devel@nongnu.org; qemu-...@nongnu.org
> Cc: peter.mayd...@linaro.org; ricar...@google.com; Jonathan Cameron
> ; k...@vger.kernel.org; Linuxarm
> 
> Subject: Re: [PATCH v3] arm/kvm: Enable support for
> KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE
> 
> Hi Shameer,
> 
> On 8/30/23 21:48, Shameer Kolothum wrote:
> > Now that we have Eager Page Split support added for ARM in the kernel,
> > enable it in Qemu. This adds,
> >   -eager-split-size to -accel sub-options to set the eager page split chunk
> size.
> >   -enable KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE.
> >
> > The chunk size specifies how many pages to break at a time, using a
> > single allocation. Bigger the chunk size, more pages need to be
> > allocated ahead of time.
> >
> > Signed-off-by: Shameer Kolothum
> 
> > ---
> > v2:
> https://lore.kernel.org/qemu-devel/20230815092709.1290-1-shameerali.kol
> othum.th...@huawei.com/
> > -Addressed comments from Gavin(Thanks).
> > RFC v1:
> https://lore.kernel.org/qemu-devel/20230725150002.621-1-shameerali.kolo
> thum.th...@huawei.com/
> >-Updated qemu-options.hx with description
> >-Addressed review comments from Peter and Gavin(Thanks).
> > ---
> >   accel/kvm/kvm-all.c  |  1 +
> >   include/sysemu/kvm_int.h |  1 +
> >   qemu-options.hx  | 15 +
> >   target/arm/kvm.c | 68
> 
> >   4 files changed, 85 insertions(+)
> >
> 
> One more question below. Please check if it's worthy to be addressed in v4,
> needed
> to resolved other comments. Otherwise, it looks fine to me.
> 
> Reviewed-by: Gavin Shan 

Thanks. I will send out a v4 with the above tag and the below suggestion to 
get rid of the kvm_arm_eager_split_size_valid().

Shameer.

> 
> > diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
> > index 2ba7521695..ff1578bb32 100644
> > --- a/accel/kvm/kvm-all.c
> > +++ b/accel/kvm/kvm-all.c
> > @@ -3763,6 +3763,7 @@ static void kvm_accel_instance_init(Object *obj)
> >   /* KVM dirty ring is by default off */
> >   s->kvm_dirty_ring_size = 0;
> >   s->kvm_dirty_ring_with_bitmap = false;
> > +s->kvm_eager_split_size = 0;
> >   s->notify_vmexit = NOTIFY_VMEXIT_OPTION_RUN;
> >   s->notify_window = 0;
> >   s->xen_version = 0;
> > diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h
> > index 511b42bde5..a5b9122cb8 100644
> > --- a/include/sysemu/kvm_int.h
> > +++ b/include/sysemu/kvm_int.h
> > @@ -116,6 +116,7 @@ struct KVMState
> >   uint64_t kvm_dirty_ring_bytes;  /* Size of the per-vcpu dirty ring
> */
> >   uint32_t kvm_dirty_ring_size;   /* Number of dirty GFNs per ring
> */
> >   bool kvm_dirty_ring_with_bitmap;
> > +uint64_t kvm_eager_split_size;  /* Eager Page Splitting chunk size */
> >   struct KVMDirtyRingReaper reaper;
> >   NotifyVmexitOption notify_vmexit;
> >   uint32_t notify_window;
> > diff --git a/qemu-options.hx b/qemu-options.hx
> > index 29b98c3d4c..2e70704ee8 100644
> > --- a/qemu-options.hx
> > +++ b/qemu-options.hx
> > @@ -186,6 +186,7 @@ DEF("accel", HAS_ARG, QEMU_OPTION_accel,
> >   "split-wx=on|off (enable TCG split w^x
> mapping)\n"
> >   "tb-size=n (TCG translation block cache size)\n"
> >   "dirty-ring-size=n (KVM dirty ring GFN count,
> default 0)\n"
> > +"eager-split-size=n (KVM Eager Page Split chunk
> size, default 0, disabled. ARM only)\n"
> >   "
> notify-vmexit=run|internal-error|disable,notify-window=n (enable notify VM
> exit and set notify window, x86 only)\n"
> >   "thread=single|multi (enable multi-threaded
> TCG)\n", QEMU_ARCH_ALL)
> >   SRST
> > @@ -244,6 +245,20 @@ SRST
> >   is disabled (dirty-ring-size=0).  When enabled, KVM will
> instead
> >   record dirty pages in a bitmap.
> >
> > +``eager-split-size=n``
> > +KVM implements dirty page logging at the PAGE_SIZE granularity
> and
> > +enabling dirty-logging on a huge-page requires breaking it into
> > +PAGE_SIZE pages in the first place. KVM on ARM does this
> splitting
> > +lazily by default. There are performance benefits in doing
> huge-page
> > +split eagerly, especially in situations where TLBI costs associated
> > +with break-before-make sequences are considerable and also if
> guest
> > +workloads are read intensive. The size here specifies how many
> pages
> > +to break at a time and needs to be a valid block size which is
> > +1GB/2MB/4KB, 32MB/16KB and 512MB/64KB for
> 4KB/16KB/64KB PAGE_SIZE
> > +respectively. Be wary of specifying a higher size as it will have 
> > an
> > +impact on the memory. By default, this feature is disabled
> > +(eager-split-size=0).
> > +
> >   ``notify-vmexit=run|internal-error|dis

[PATCH v3] migration/calc-dirty-rate: millisecond-granularity period

2023-09-05 Thread Andrei Gudkov via

This patch allows to measure dirty page rate for
sub-second intervals of time. An optional argument is
introduced -- calc-time-unit. For example:
{"execute": "calc-dirty-rate", "arguments":
  {"calc-time": 500, "calc-time-unit": "millisecond"} }

Millisecond granularity allows to make predictions whether
migration will succeed or not. To do this, calculate dirty
rate with calc-time set to max allowed downtime (e.g. 300ms),
convert measured rate into volume of dirtied memory,
and divide by network throughput. If the value is lower
than max allowed downtime, then migration will converge.

Measurement results for single thread randomly writing to
a 1/4/24GiB memory region:

++---+
| calc-time  |dirty rate MiB/s   |
| (milliseconds) ++---+--+
|| theoretical| page-sampling | dirty-bitmap |
|| (at 3M wr/sec) |   |  |
+++---+--+
|   1GiB |
+++---+--+
|100 |   6996 |  7100 | 3192 |
|200 |   4606 |  4660 | 2655 |
|300 |   3305 |  3280 | 2371 |
|400 |   2534 |  2525 | 2154 |
|500 |   2041 |  2044 | 1871 |
|750 |   1365 |  1341 | 1358 |
|   1000 |   1024 |  1052 | 1025 |
|   1500 |683 |   678 |  684 |
|   2000 |512 |   507 |  513 |
+++---+--+
|   4GiB |
+++---+--+
|100 |  10232 |  8880 | 4070 |
|200 |   8954 |  8049 | 3195 |
|300 |   7889 |  7193 | 2881 |
|400 |   6996 |  6530 | 2700 |
|500 |   6245 |  5772 | 2312 |
|750 |   4829 |  4586 | 2465 |
|   1000 |   3865 |  3780 | 2178 |
|   1500 |   2694 |  2633 | 2004 |
|   2000 |   2041 |  2031 | 1789 |
+++---+--+
|   24GiB|
+++---+--+
|100 |  11495 |  8640 | 5597 |
|200 |  11226 |  8616 | 3527 |
|300 |  10965 |  8386 | 2355 |
|400 |  10713 |  8370 | 2179 |
|500 |  10469 |  8196 | 2098 |
|750 |   9890 |  7885 | 2556 |
|   1000 |   9354 |  7506 | 2084 |
|   1500 |   8397 |  6944 | 2075 |
|   2000 |   7574 |  6402 | 2062 |
+++---+--+

Theoretical values are computed according to the following formula:
size * (1 - (1-(4096/size))^(time*wps)) / (time * 2^20),
where size is in bytes, time is in seconds, and wps is number of
writes per second.

Signed-off-by: Andrei Gudkov 
---
 qapi/migration.json   |  58 ++-
 migration/dirtyrate.h |  12 +++--
 migration/dirtyrate.c | 107 +-
 3 files changed, 128 insertions(+), 49 deletions(-)

diff --git a/qapi/migration.json b/qapi/migration.json
index 8843e74b59..1717aa4bbd 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -1836,6 +1836,21 @@
 { 'enum': 'DirtyRateMeasureMode',
   'data': ['page-sampling', 'dirty-ring', 'dirty-bitmap'] }
 
+##
+# @TimeUnit:
+#
+# Specifies unit in which time-related value is specified.
+#
+# @second: value is in seconds
+#
+# @millisecond: value is in milliseconds
+#
+# Since 8.2
+#
+##
+{ 'enum': 'TimeUnit',
+  'data': ['second', 'millisecond'] }
+
 ##
 # @DirtyRateInfo:
 #
@@ -1848,8 +1863,10 @@
 #
 # @start-time: start time in units of second for calculation
 #
-# @calc-time: time period for which dirty page rate was measured
-# (in seconds)
+# @calc-time: time period for which dirty page rate was measured,
+# expressed and rounded down to @calc-time-unit.
+#
+# @calc-time-unit: time unit of @calc-time  (Since 8.2)
 #
 # @sample-pages: number of sampled pages per GiB of guest memory.
 # Valid only in page-sampling mode (Since 6.1)
@@ -1866,6 +1883,7 @@

Re: [PATCH v8 10/12] virtio-sound: implement audio output (TX)

2023-09-05 Thread Volker Rümelin


Am 04.09.23 um 23:34 schrieb Volker Rümelin:

Am 04.09.23 um 12:34 schrieb Manos Pitsidianakis:
On Mon, 04 Sep 2023 13:26, Philippe Mathieu-Daudé  
wrote:



+/*
+ * AUD_* output callback.
+ *
+ * @data: VirtIOSoundPCMStream stream
+ * @available: number of bytes that can be written with AUD_write()
+ */
+static void virtio_snd_pcm_out_cb(void *data, int available)
+{
+    VirtIOSoundPCMStream *stream = data;
+    VirtIOSoundPCMBlock *block;
+    VirtIOSoundPCMBlock *next;
+    size_t size;
+
+    WITH_QEMU_LOCK_GUARD(&stream->queue_mutex) {
+    QSIMPLEQ_FOREACH_SAFE(block, &stream->queue, entry, next) {
+    for (;;) {
+    size = MIN(block->size, available);
+    size = AUD_write(stream->voice.out,
+    block->data + block->offset,
+    size);


If AUD_write() returns 0, is this an infinite loop?


Hm since we have available > 0 bytes this wouldn't theoretically 
happen, but I see there are code paths that return 0 on 
bugs/failures, I will add the check.


Before QEMU 8.0.0 it was possible that AUD_write() couldn't write the 
last audio frame and sometimes 'available' was just miscalculated. 
Since commit e1e6a6fcc9 ("audio: handle leftover audio frame from 
upsampling") AUD_write() writes all 'available' bytes.




I thought about this again. The error check is necessary.


With best regards,
Volker




+    block->size -= size;
+    block->offset += size;
+    if (!block->size) {
+    virtqueue_push(block->vq,
+    block->elem,
+    sizeof(block->elem));
+ virtio_notify(VIRTIO_DEVICE(stream->s),
+    block->vq);
+ QSIMPLEQ_REMOVE_HEAD(&stream->queue, entry);
+    g_free(block);
+    available -= size;
+    break;
+    }
+
+    available -= size;
+    if (!available) {
+    break;
+    }
+    }
+    if (!available) {
+    break;
+    }
+    }
+    }
+}
+
+/*
+ * Flush all buffer data from this stream's queue into the 
driver's virtual

+ * queue.
+ *
+ * @stream: VirtIOSoundPCMStream *stream
+ */
+static void virtio_snd_pcm_flush(VirtIOSoundPCMStream *stream)
+{
+    VirtIOSoundPCMBlock *block;
+    VirtIOSoundPCMBlock *next;
+
+    WITH_QEMU_LOCK_GUARD(&stream->queue_mutex) {
+    QSIMPLEQ_FOREACH_SAFE(block, &stream->queue, entry, next) {
+    AUD_write(stream->voice.out, block->data + 
block->offset, block->size);

Re: [PATCH v2 1/4] softmmu: Support concurrent bounce buffers

2023-09-05 Thread Mattias Nissler

On Fri, Sep 1, 2023 at 3:41 PM Markus Armbruster  wrote:
>
> Stefan Hajnoczi  writes:
>
> > On Wed, Aug 23, 2023 at 04:54:06PM -0400, Peter Xu wrote:
> >> On Wed, Aug 23, 2023 at 10:08:08PM +0200, Mattias Nissler wrote:
> >> > On Wed, Aug 23, 2023 at 7:35 PM Peter Xu  wrote:
> >> > > On Wed, Aug 23, 2023 at 02:29:02AM -0700, Mattias Nissler wrote:
> >> > > > diff --git a/softmmu/vl.c b/softmmu/vl.c
> >> > > > index b0b96f67fa..dbe52f5ea1 100644
> >> > > > --- a/softmmu/vl.c
> >> > > > +++ b/softmmu/vl.c
> >> > > > @@ -3469,6 +3469,12 @@ void qemu_init(int argc, char **argv)
> >> > > >  exit(1);
> >> > > >  #endif
> >> > > >  break;
> >> > > > +case QEMU_OPTION_max_bounce_buffer_size:
> >> > > > +if (qemu_strtosz(optarg, NULL, 
> >> > > > &max_bounce_buffer_size) < 0) {
> >> > > > +error_report("invalid -max-ounce-buffer-size 
> >> > > > value");
> >> > > > +exit(1);
> >> > > > +}
> >> > > > +break;
> >> > >
> >> > > PS: I had a vague memory that we do not recommend adding more qemu 
> >> > > cmdline
> >> > > options, but I don't know enough on the plan to say anything real.
> >> >
> >> > I am aware of that, and I'm really not happy with the command line
> >> > option myself. Consider the command line flag a straw man I put in to
> >> > see whether any reviewers have better ideas :)
> >> >
> >> > More seriously, I actually did look around to see whether I can add
> >> > the parameter to one of the existing option groupings somewhere, but
> >> > neither do I have a suitable QOM object that I can attach the
> >> > parameter to, nor did I find any global option groups that fits: this
> >> > is not really memory configuration, and it's not really CPU
> >> > configuration, it's more related to shared device model
> >> > infrastructure... If you have a good idea for a home for this, I'm all
> >> > ears.
> >>
> >> No good & simple suggestion here, sorry.  We can keep the option there
> >> until someone jumps in, then the better alternative could also come along.
> >>
> >> After all I expect if we can choose a sensible enough default value, this
> >> new option shouldn't be used by anyone for real.
> >
> > QEMU commits to stability in its external interfaces. Once the
> > command-line option is added, it needs to be supported in the future or
> > go through the deprecation process. I think we should agree on the
> > command-line option now.
> >
> > Two ways to avoid the issue:
> > 1. Drop the command-line option until someone needs it.
>
> Avoiding unneeded configuration knobs is always good.
>
> > 2. Make it an experimental option (with an "x-" prefix).
>
> Fine if actual experiments are planned.
>
> Also fine if it's a development or debugging aid.

To a certain extent it is: I've been playing with different device
models and bumping the parameter until their DMA requests stopped
failing.

>
> > The closest to a proper solution that I found was adding it as a
> > -machine property. What bothers me is that if QEMU supports
> > multi-machine emulation in a single process some day, then the property
> > doesn't make sense since it's global rather than specific to a machine.
> >
> > CCing Markus Armbruster for ideas.
>
> I'm afraid I'm lacking context.  Glancing at the patch...
>
> ``-max-bounce-buffer-size size``
> Set the limit in bytes for DMA bounce buffer allocations.
>
> DMA bounce buffers are used when device models request memory-mapped 
> access
> to memory regions that can't be directly mapped by the qemu process, 
> so the
> memory must read or written to a temporary local buffer for the device
> model to work with. This is the case e.g. for I/O memory regions, and 
> when
> running in multi-process mode without shared access to memory.
>
> Whether bounce buffering is necessary depends heavily on the device 
> model
> implementation. Some devices use explicit DMA read and write 
> operations
> which do not require bounce buffers. Some devices, notably storage, 
> will
> retry a failed DMA map request after bounce buffer space becomes 
> available
> again. Most other devices will bail when encountering map request 
> failures,
> which will typically appear to the guest as a hardware error.
>
> Suitable bounce buffer size values depend on the workload and guest
> configuration. A few kilobytes up to a few megabytes are common sizes
> encountered in practice.
>
> Sounds quite device-specific.  Why isn't this configured per device?

It would be nice to use a property on the device that originates the
DMA operation to configure this. However, I don't see how to do this
in a reasonable way without bigger changes: A typical call path is
pci_dma_map -> dma_memory_map -> address_space_map. While pci_dma_map
has a PCIDevice*, address_space_map only receives the Add

Re: [PATCH 00/13] VIRTIO-IOMMU/VFIO: Don't assume 64b IOVA space

2023-09-05 Thread YangHang Liu

I have runned the following two tests, but both tests failed:
[1] start a VM with virtio-iommu + 2 ice PFs only via qemu-kvm 8.1.5
Test result : the qemu-kvm keeps throwing the error:  VFIO_MAP_DMA
failed: File exists. vfio_dma_map(0x56443d20fbe0, 0xe000, 0x1000,
0x7fb545709000) = -17 (File exists)
[2] start a VM with virtio-iommu + 2 ice PFs via libvirt-9.5 + qemu-kvm 8.1.5
Test result: the qemu-kvm core dump with
ERROR:../qom/object.c:1198:object_unref: assertion failed: (obj->ref >
0). Bail out! ERROR:../qom/object.c:1198:object_unref: assertion
failed: (obj->ref > 0)

After removing the 2 PF from the VM, both tests passed.

Tested-by: Yanghang Liu 

Best Regards,
YangHang Liu


On Mon, Sep 4, 2023 at 4:08 PM Eric Auger  wrote:
>
> On x86, when assigning VFIO-PCI devices protected with virtio-iommu
> we encounter the case where the guest tries to map IOVAs beyond 48b
> whereas the physical VTD IOMMU only supports 48b. This ends up with
> VFIO_MAP_DMA failures at qemu level because at kernel level,
> vfio_iommu_iova_dma_valid() check returns false on vfio_map_do_map().
>
> This is due to the fact the virtio-iommu currently unconditionally
> exposes an IOVA range of 64b through its config input range fields.
>
> This series removes this assumption by retrieving the usable IOVA
> regions through the VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE UAPI when
> a VFIO device is attached. This info is communicated to the
> virtio-iommu memory region, transformed into the inversed info, ie.
> the host reserved IOVA regions. Then those latter are combined with the
> reserved IOVA regions set though the virtio-iommu reserved-regions
> property. That way, the guest virtio-iommu driver, unchanged, is
> able to probe the whole set of reserved regions and prevent any IOVA
> belonging to those ranges from beeing used, achieving the original goal.
>
> Best Regards
>
> Eric
>
> This series can be found at:
> https://github.com/eauger/qemu/tree/virtio-iommu_geometry_v1
>
> Eric Auger (13):
>   memory: Let ReservedRegion use Range
>   memory: Introduce memory_region_iommu_set_iova_ranges
>   vfio: Collect container iova range info
>   virtio-iommu: Rename reserved_regions into prop_resv_regions
>   virtio-iommu: Introduce per IOMMUDevice reserved regions
>   range: Introduce range_inverse_array()
>   virtio-iommu: Implement set_iova_ranges() callback
>   range: Make range_compare() public
>   util/reserved-region: Add new ReservedRegion helpers
>   virtio-iommu: Consolidate host reserved regions and property set ones
>   test: Add some tests for range and resv-mem helpers
>   virtio-iommu: Resize memory region according to the max iova info
>   vfio: Remove 64-bit IOVA address space assumption
>
>  include/exec/memory.h|  30 -
>  include/hw/vfio/vfio-common.h|   2 +
>  include/hw/virtio/virtio-iommu.h |   7 +-
>  include/qemu/range.h |   9 ++
>  include/qemu/reserved-region.h   |  32 +
>  hw/core/qdev-properties-system.c |   9 +-
>  hw/vfio/common.c |  70 ---
>  hw/virtio/virtio-iommu-pci.c |   8 +-
>  hw/virtio/virtio-iommu.c |  85 +++--
>  softmmu/memory.c |  15 +++
>  tests/unit/test-resv-mem.c   | 198 +++
>  util/range.c |  41 ++-
>  util/reserved-region.c   |  94 +++
>  hw/virtio/trace-events   |   1 +
>  tests/unit/meson.build   |   1 +
>  util/meson.build |   1 +
>  16 files changed, 562 insertions(+), 41 deletions(-)
>  create mode 100644 include/qemu/reserved-region.h
>  create mode 100644 tests/unit/test-resv-mem.c
>  create mode 100644 util/reserved-region.c
>
> --
> 2.41.0
>
>

[PATCH v5 0/3] hw/{i2c,nvme}: mctp endpoint, nvme management interface model

2023-09-05 Thread Klaus Jensen

This adds a generic MCTP endpoint model that other devices may derive
from.

Also included is a very basic implementation of an NVMe-MI device,
supporting only a small subset of the required commands.

Since this all relies on i2c target mode, this can currently only be
used with an SoC that includes the Aspeed I2C controller.

The easiest way to get up and running with this, is to grab my buildroot
overlay[1] (aspeed_ast2600evb_nmi_defconfig). It includes modified a
modified dts as well as a couple of required packages.

QEMU can then be launched along these lines:

  qemu-system-arm \
-nographic \
-M ast2600-evb \
-kernel output/images/zImage \
-initrd output/images/rootfs.cpio \
-dtb output/images/aspeed-ast2600-evb-nmi.dtb \
-nic user,hostfwd=tcp::-:22 \
-device nmi-i2c,address=0x3a \
-serial mon:stdio

>From within the booted system,

  mctp addr add 8 dev mctpi2c15
  mctp link set mctpi2c15 up
  mctp route add 9 via mctpi2c15
  mctp neigh add 9 dev mctpi2c15 lladdr 0x3a
  mi-mctp 1 9 info

Comments are very welcome!

  [1]: https://github.com/birkelund/hwtests/tree/main/br2-external

Signed-off-by: Klaus Jensen 
---
Changes in v5:
- Added a nmi_scratch_append() that asserts available space in the
  scratch buffer. This is a similar defensive strategy as used in
  hw/i2c/mctp.c
- Various small fixups in response to review (Jonathan)
- Link to v4: 
https://lore.kernel.org/r/20230823-nmi-i2c-v4-0-2b0f86e5b...@samsung.com

---
Klaus Jensen (3):
  hw/i2c: add smbus pec utility function
  hw/i2c: add mctp core
  hw/nvme: add nvme management interface model

 MAINTAINERS   |   7 +
 hw/arm/Kconfig|   1 +
 hw/i2c/Kconfig|   4 +
 hw/i2c/mctp.c | 432 ++
 hw/i2c/meson.build|   1 +
 hw/i2c/smbus_master.c |  26 +++
 hw/i2c/trace-events   |  13 ++
 hw/nvme/Kconfig   |   4 +
 hw/nvme/meson.build   |   1 +
 hw/nvme/nmi-i2c.c | 424 +
 hw/nvme/trace-events  |   6 +
 include/hw/i2c/mctp.h | 125 
 include/hw/i2c/smbus_master.h |   2 +
 include/net/mctp.h|  35 
 14 files changed, 1081 insertions(+)
---
base-commit: 17780edd81d27fcfdb7a802efc870a99788bd2fc
change-id: 20230822-nmi-i2c-d804ed5be7e6

Best regards,
-- 
Klaus Jensen

Re: [RFC PATCH 1/3] Python: Drop support for Python 3.7

2023-09-05 Thread Philippe Mathieu-Daudé


On 5/9/23 08:00, Paolo Bonzini wrote:



Il lun 4 set 2023, 12:34 Philippe Mathieu-Daudé > ha scritto:


 > Since it is safe to under our supported platform policy, bump our

Is 'under' a verb? This sentence is not obvious to me.


No, just drop "to".


Argh, now it is clearly obvious... :)

[PATCH v5 1/3] hw/i2c: add smbus pec utility function

2023-09-05 Thread Klaus Jensen

From: Klaus Jensen 

Add i2c_smbus_pec() to calculate the SMBus Packet Error Code for a
message.

Reviewed-by: Jonathan Cameron 
Signed-off-by: Klaus Jensen 
---
 hw/i2c/smbus_master.c | 26 ++
 include/hw/i2c/smbus_master.h |  2 ++
 2 files changed, 28 insertions(+)

diff --git a/hw/i2c/smbus_master.c b/hw/i2c/smbus_master.c
index 6a53c34e70b7..01a8e4700222 100644
--- a/hw/i2c/smbus_master.c
+++ b/hw/i2c/smbus_master.c
@@ -15,6 +15,32 @@
 #include "hw/i2c/i2c.h"
 #include "hw/i2c/smbus_master.h"
 
+static uint8_t crc8(uint16_t data)
+{
+int i;
+
+for (i = 0; i < 8; i++) {
+if (data & 0x8000) {
+data ^= 0x1070U << 3;
+}
+
+data <<= 1;
+}
+
+return (uint8_t)(data >> 8);
+}
+
+uint8_t i2c_smbus_pec(uint8_t crc, uint8_t *buf, size_t len)
+{
+int i;
+
+for (i = 0; i < len; i++) {
+crc = crc8((crc ^ buf[i]) << 8);
+}
+
+return crc;
+}
+
 /* Master device commands.  */
 int smbus_quick_command(I2CBus *bus, uint8_t addr, int read)
 {
diff --git a/include/hw/i2c/smbus_master.h b/include/hw/i2c/smbus_master.h
index bb13bc423c22..d90f81767d86 100644
--- a/include/hw/i2c/smbus_master.h
+++ b/include/hw/i2c/smbus_master.h
@@ -27,6 +27,8 @@
 
 #include "hw/i2c/i2c.h"
 
+uint8_t i2c_smbus_pec(uint8_t crc, uint8_t *buf, size_t len);
+
 /* Master device commands.  */
 int smbus_quick_command(I2CBus *bus, uint8_t addr, int read);
 int smbus_receive_byte(I2CBus *bus, uint8_t addr);

-- 
2.42.0

[PATCH v5 3/3] hw/nvme: add nvme management interface model

2023-09-05 Thread Klaus Jensen

From: Klaus Jensen 

Add the 'nmi-i2c' device that emulates an NVMe Management Interface
controller.

Initial support is very basic (Read NMI DS, Configuration Get).

This is based on previously posted code by Padmakar Kalghatgi, Arun
Kumar Agasar and Saurav Kumar.

Signed-off-by: Klaus Jensen 
---
 hw/nvme/Kconfig  |   4 +
 hw/nvme/meson.build  |   1 +
 hw/nvme/nmi-i2c.c| 424 +++
 hw/nvme/trace-events |   6 +
 4 files changed, 435 insertions(+)

diff --git a/hw/nvme/Kconfig b/hw/nvme/Kconfig
index 8ac90942e55e..1d89a4f4ecea 100644
--- a/hw/nvme/Kconfig
+++ b/hw/nvme/Kconfig
@@ -2,3 +2,7 @@ config NVME_PCI
 bool
 default y if PCI_DEVICES
 depends on PCI
+
+config NVME_NMI_I2C
+bool
+default y if I2C_MCTP
diff --git a/hw/nvme/meson.build b/hw/nvme/meson.build
index 1a6a2ca2f307..7bc85f31c149 100644
--- a/hw/nvme/meson.build
+++ b/hw/nvme/meson.build
@@ -1 +1,2 @@
 system_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('ctrl.c', 'dif.c', 
'ns.c', 'subsys.c'))
+system_ss.add(when: 'CONFIG_NVME_NMI_I2C', if_true: files('nmi-i2c.c'))
diff --git a/hw/nvme/nmi-i2c.c b/hw/nvme/nmi-i2c.c
new file mode 100644
index ..6e0ba7bd2a37
--- /dev/null
+++ b/hw/nvme/nmi-i2c.c
@@ -0,0 +1,424 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2023 Samsung Electronics Co., Ltd.
+ *
+ * SPDX-FileContributor: Padmakar Kalghatgi 
+ * SPDX-FileContributor: Arun Kumar Agasar 
+ * SPDX-FileContributor: Saurav Kumar 
+ * SPDX-FileContributor: Klaus Jensen 
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/crc32c.h"
+#include "hw/registerfields.h"
+#include "hw/i2c/i2c.h"
+#include "hw/i2c/mctp.h"
+#include "net/mctp.h"
+#include "trace.h"
+
+/* NVM Express Management Interface 1.2c, Section 3.1 */
+#define NMI_MAX_MESSAGE_LENGTH 4224
+
+#define TYPE_NMI_I2C_DEVICE "nmi-i2c"
+OBJECT_DECLARE_SIMPLE_TYPE(NMIDevice, NMI_I2C_DEVICE)
+
+typedef struct NMIDevice {
+MCTPI2CEndpoint mctp;
+
+uint8_t buffer[NMI_MAX_MESSAGE_LENGTH];
+uint8_t scratch[NMI_MAX_MESSAGE_LENGTH];
+
+size_t  len;
+int64_t pos;
+} NMIDevice;
+
+FIELD(NMI_MCTPD, MT, 0, 7)
+FIELD(NMI_MCTPD, IC, 7, 1)
+
+#define NMI_MCTPD_MT_NMI 0x4
+#define NMI_MCTPD_IC_ENABLED 0x1
+
+FIELD(NMI_NMP, ROR, 7, 1)
+FIELD(NMI_NMP, NMIMT, 3, 4)
+
+#define NMI_NMP_NMIMT_NVME_MI 0x1
+#define NMI_NMP_NMIMT_NVME_ADMIN 0x2
+
+typedef struct NMIMessage {
+uint8_t mctpd;
+uint8_t nmp;
+uint8_t rsvd2[2];
+uint8_t payload[]; /* includes the Message Integrity Check */
+} NMIMessage;
+
+typedef struct NMIRequest {
+   uint8_t opc;
+   uint8_t rsvd1[3];
+   uint32_t dw0;
+   uint32_t dw1;
+   uint32_t mic;
+} NMIRequest;
+
+FIELD(NMI_CMD_READ_NMI_DS_DW0, DTYP, 24, 8)
+
+typedef enum NMIReadDSType {
+NMI_CMD_READ_NMI_DS_SUBSYSTEM   = 0x0,
+NMI_CMD_READ_NMI_DS_PORTS   = 0x1,
+NMI_CMD_READ_NMI_DS_CTRL_LIST   = 0x2,
+NMI_CMD_READ_NMI_DS_CTRL_INFO   = 0x3,
+NMI_CMD_READ_NMI_DS_OPT_CMD_SUPPORT = 0x4,
+NMI_CMD_READ_NMI_DS_MEB_CMD_SUPPORT = 0x5,
+} NMIReadDSType;
+
+#define NMI_STATUS_INVALID_PARAMETER 0x4
+
+static void nmi_scratch_append(NMIDevice *nmi, const void *buf, size_t count)
+{
+assert(nmi->pos + count <= NMI_MAX_MESSAGE_LENGTH);
+
+memcpy(nmi->scratch + nmi->pos, buf, count);
+nmi->pos += count;
+}
+
+static void nmi_set_parameter_error(NMIDevice *nmi, uint8_t bit, uint16_t byte)
+{
+/* NVM Express Management Interface 1.2c, Figure 30 */
+struct resp {
+uint8_t  status;
+uint8_t  bit;
+uint16_t byte;
+};
+
+struct resp buf = {
+.status = NMI_STATUS_INVALID_PARAMETER,
+.bit = bit & 0x3,
+.byte = byte,
+};
+
+nmi_scratch_append(nmi, &buf, sizeof(buf));
+}
+
+static void nmi_set_error(NMIDevice *nmi, uint8_t status)
+{
+const uint8_t buf[4] = {status,};
+
+nmi_scratch_append(nmi, buf, sizeof(buf));
+}
+
+static void nmi_handle_mi_read_nmi_ds(NMIDevice *nmi, NMIRequest *request)
+{
+I2CSlave *i2c = I2C_SLAVE(nmi);
+
+uint32_t dw0 = le32_to_cpu(request->dw0);
+uint8_t dtyp = FIELD_EX32(dw0, NMI_CMD_READ_NMI_DS_DW0, DTYP);
+const uint8_t *buf;
+size_t len;
+
+trace_nmi_handle_mi_read_nmi_ds(dtyp);
+
+static const uint8_t nmi_ds_subsystem[36] = {
+0x00,   /* success */
+0x20, 0x00, /* response data length */
+0x00,   /* reserved */
+0x00,   /* number of ports */
+0x01,   /* major version */
+0x01,   /* minor version */
+};
+
+/*
+ * Cannot be static (or const) since we need to patch in the i2c
+ * address.
+ */
+const uint8_t nmi_ds_ports[36] = {
+0x00,   /* success */
+0x20, 0x00, /* response data length */
+0x00,   /* reserved */
+0x02,   /* port type (smbus) */
+0x00,   /* reserved */
+0x40, 0x00, /* maximum mctp transission u

[PATCH v5 2/3] hw/i2c: add mctp core

2023-09-05 Thread Klaus Jensen

From: Klaus Jensen 

Add an abstract MCTP over I2C endpoint model. This implements MCTP
control message handling as well as handling the actual I2C transport
(packetization).

Devices are intended to derive from this and implement the class
methods.

Parts of this implementation is inspired by code[1] previously posted by
Jonathan Cameron.

Squashed a fix[2] from Matt Johnston.

  [1]: 
https://lore.kernel.org/qemu-devel/20220520170128.4436-1-jonathan.came...@huawei.com/
  [2]: 
https://lore.kernel.org/qemu-devel/20221121080445.ga29...@codeconstruct.com.au/

Tested-by: Jonathan Cameron 
Reviewed-by: Jonathan Cameron 
Signed-off-by: Klaus Jensen 
---
 MAINTAINERS   |   7 +
 hw/arm/Kconfig|   1 +
 hw/i2c/Kconfig|   4 +
 hw/i2c/mctp.c | 432 ++
 hw/i2c/meson.build|   1 +
 hw/i2c/trace-events   |  13 ++
 include/hw/i2c/mctp.h | 125 +++
 include/net/mctp.h|  35 
 8 files changed, 618 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 6111b6b4d928..8ca71167607d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3395,6 +3395,13 @@ F: tests/qtest/adm1272-test.c
 F: tests/qtest/max34451-test.c
 F: tests/qtest/isl_pmbus_vr-test.c
 
+MCTP I2C Transport
+M: Klaus Jensen 
+S: Maintained
+F: hw/i2c/mctp.c
+F: include/hw/i2c/mctp.h
+F: include/net/mctp.h
+
 Firmware schema specifications
 M: Philippe Mathieu-Daudé 
 R: Daniel P. Berrange 
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index 7e6834844051..5bcb1e0e8a6f 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -541,6 +541,7 @@ config ASPEED_SOC
 select DS1338
 select FTGMAC100
 select I2C
+select I2C_MCTP
 select DPS310
 select PCA9552
 select SERIAL
diff --git a/hw/i2c/Kconfig b/hw/i2c/Kconfig
index 14886b35dac2..2b2a50b83d1e 100644
--- a/hw/i2c/Kconfig
+++ b/hw/i2c/Kconfig
@@ -6,6 +6,10 @@ config I2C_DEVICES
 # to any board's i2c bus
 bool
 
+config I2C_MCTP
+bool
+select I2C
+
 config SMBUS
 bool
 select I2C
diff --git a/hw/i2c/mctp.c b/hw/i2c/mctp.c
new file mode 100644
index ..8d8e74567745
--- /dev/null
+++ b/hw/i2c/mctp.c
@@ -0,0 +1,432 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2023 Samsung Electronics Co., Ltd.
+ * SPDX-FileContributor: Klaus Jensen 
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+
+#include "hw/qdev-properties.h"
+#include "hw/i2c/i2c.h"
+#include "hw/i2c/smbus_master.h"
+#include "hw/i2c/mctp.h"
+#include "net/mctp.h"
+
+#include "trace.h"
+
+/* DSP0237 1.2.0, Figure 1 */
+typedef struct MCTPI2CPacketHeader {
+uint8_t dest;
+#define MCTP_I2C_COMMAND_CODE 0xf
+uint8_t command_code;
+uint8_t byte_count;
+uint8_t source;
+} MCTPI2CPacketHeader;
+
+typedef struct MCTPI2CPacket {
+MCTPI2CPacketHeader i2c;
+MCTPPacket  mctp;
+} MCTPI2CPacket;
+
+#define i2c_mctp_payload_offset offsetof(MCTPI2CPacket, mctp.payload)
+#define i2c_mctp_payload(buf) (buf + i2c_mctp_payload_offset)
+
+/* DSP0236 1.3.0, Figure 20 */
+typedef struct MCTPControlMessage {
+#define MCTP_MESSAGE_TYPE_CONTROL 0x0
+uint8_t type;
+#define MCTP_CONTROL_FLAGS_RQ   (1 << 7)
+#define MCTP_CONTROL_FLAGS_D(1 << 6)
+uint8_t flags;
+uint8_t command_code;
+uint8_t data[];
+} MCTPControlMessage;
+
+enum MCTPControlCommandCodes {
+MCTP_CONTROL_SET_EID= 0x01,
+MCTP_CONTROL_GET_EID= 0x02,
+MCTP_CONTROL_GET_VERSION= 0x04,
+MCTP_CONTROL_GET_MESSAGE_TYPE_SUPPORT   = 0x05,
+};
+
+#define MCTP_CONTROL_ERROR_UNSUPPORTED_CMD 0x5
+
+#define i2c_mctp_control_data_offset \
+(i2c_mctp_payload_offset + offsetof(MCTPControlMessage, data))
+#define i2c_mctp_control_data(buf) (buf + i2c_mctp_control_data_offset)
+
+/**
+ * The byte count field in the SMBUS Block Write containers the number of bytes
+ * *following* the field itself.
+ *
+ * This is at least 5.
+ *
+ * 1 byte for the MCTP/I2C piggy-backed I2C source address in addition to the
+ * size of the MCTP transport/packet header.
+ */
+#define MCTP_I2C_BYTE_COUNT_OFFSET (sizeof(MCTPPacketHeader) + 1)
+
+void i2c_mctp_schedule_send(MCTPI2CEndpoint *mctp)
+{
+I2CBus *i2c = I2C_BUS(qdev_get_parent_bus(DEVICE(mctp)));
+
+mctp->tx.state = I2C_MCTP_STATE_TX_START_SEND;
+
+i2c_bus_master(i2c, mctp->tx.bh);
+}
+
+static void i2c_mctp_tx(void *opaque)
+{
+DeviceState *dev = DEVICE(opaque);
+I2CBus *i2c = I2C_BUS(qdev_get_parent_bus(dev));
+I2CSlave *slave = I2C_SLAVE(dev);
+MCTPI2CEndpoint *mctp = MCTP_I2C_ENDPOINT(dev);
+MCTPI2CEndpointClass *mc = MCTP_I2C_ENDPOINT_GET_CLASS(mctp);
+MCTPI2CPacket *pkt = (MCTPI2CPacket *)mctp->buffer;
+uint8_t flags = 0;
+
+switch (mctp->tx.state) {
+case I2C_MCTP_STATE_TX_SEND_BYTE:
+if (mctp->pos < mctp->len) {
+uint8_t byte = mctp->buffer[mctp->pos];
+
+t

Re: [PATCH] hw/pci-bridge/cxl-upstream: Add serial number extended capability support

2023-09-05 Thread Philippe Mathieu-Daudé


Hi Jonathan,

On 4/9/23 19:57, Jonathan Cameron wrote:

Will be needed so there is a defined serial number for
information queries via the Switch CCI.

Signed-off-by: Jonathan Cameron 
---
No ordering dependencies wrt to other CXL patch sets.

Whilst we 'need' it for the Switch CCI set it is valid without
it and aligns with existing EP serial number support. Seems sensible
to upstream this first and reduce my out of tree backlog a little!

  hw/pci-bridge/cxl_upstream.c | 15 +--
  1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/hw/pci-bridge/cxl_upstream.c b/hw/pci-bridge/cxl_upstream.c
index 2b9cf0cc97..15c4d84a56 100644
--- a/hw/pci-bridge/cxl_upstream.c
+++ b/hw/pci-bridge/cxl_upstream.c
@@ -14,6 +14,11 @@
  #include "hw/pci/msi.h"
  #include "hw/pci/pcie.h"
  #include "hw/pci/pcie_port.h"
+/*
+ * Null value of all Fs suggested by IEEE RA guidelines for use of
+ * EU, OUI and CID
+ */
+#define UI64_NULL (~0ULL)


Already defined in hw/mem/cxl_type3.c, can we move it to some common
CXL header? Or include/qemu/units.h?


  #define CXL_UPSTREAM_PORT_MSI_NR_VECTOR 2
  
@@ -30,6 +35,7 @@ typedef struct CXLUpstreamPort {

  /*< public >*/
  CXLComponentState cxl_cstate;
  DOECap doe_cdat;
+uint64_t sn;
  } CXLUpstreamPort;
  
  CXLComponentState *cxl_usp_to_cstate(CXLUpstreamPort *usp)

@@ -326,8 +332,12 @@ static void cxl_usp_realize(PCIDevice *d, Error **errp)
  if (rc) {
  goto err_cap;
  }
-
-cxl_cstate->dvsec_offset = CXL_UPSTREAM_PORT_DVSEC_OFFSET;
+if (usp->sn != UI64_NULL) {
+pcie_dev_ser_num_init(d, CXL_UPSTREAM_PORT_DVSEC_OFFSET, usp->sn);
+cxl_cstate->dvsec_offset = CXL_UPSTREAM_PORT_DVSEC_OFFSET + 0x0c;


Could it be clearer to have:

diff --git a/hw/pci-bridge/cxl_upstream.c b/hw/pci-bridge/cxl_upstream.c
@@ -23,2 +23,2 @@
-#define CXL_UPSTREAM_PORT_DVSEC_OFFSET \
-(CXL_UPSTREAM_PORT_AER_OFFSET + PCI_ERR_SIZEOF)
+#define CXL_UPSTREAM_PORT_DVSEC_OFFSET(offset) \
+(CXL_UPSTREAM_PORT_AER_OFFSET + PCI_ERR_SIZEOF + offset)

?


+} else {
+cxl_cstate->dvsec_offset = CXL_UPSTREAM_PORT_DVSEC_OFFSET;
+}
  cxl_cstate->pdev = d;
  build_dvsecs(cxl_cstate);
  cxl_component_register_block_init(OBJECT(d), cxl_cstate, TYPE_CXL_USP);
@@ -366,6 +376,7 @@ static void cxl_usp_exitfn(PCIDevice *d)
  }
  
  static Property cxl_upstream_props[] = {

+DEFINE_PROP_UINT64("sn", CXLUpstreamPort, sn, UI64_NULL),
  DEFINE_PROP_STRING("cdat", CXLUpstreamPort, cxl_cstate.cdat.filename),
  DEFINE_PROP_END_OF_LIST()
  };

Re: [PATCH 1/5] cxl/mailbox: move mailbox effect definitions to a header

2023-09-05 Thread Philippe Mathieu-Daudé


On 1/9/23 03:29, Gregory Price wrote:

Preparation for allowing devices to define their own CCI commands

Signed-off-by: Gregory Price 
---
  hw/cxl/cxl-mailbox-utils.c   | 35 +++
  include/hw/cxl/cxl_mailbox.h | 18 ++
  2 files changed, 37 insertions(+), 16 deletions(-)
  create mode 100644 include/hw/cxl/cxl_mailbox.h


Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH 2/5] cxl/type3: Cleanup multiple CXL_TYPE3() calls in read/write functions

2023-09-05 Thread Philippe Mathieu-Daudé


On 1/9/23 03:29, Gregory Price wrote:

Call CXL_TYPE3 once at top of function to avoid multiple invocations.

Signed-off-by: Gregory Price 
---
  hw/mem/cxl_type3.c | 10 ++
  1 file changed, 6 insertions(+), 4 deletions(-)


Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH 0/1] qom: fix setting of qdev array properties

2023-09-05 Thread Kevin Wolf

Am 04.09.2023 um 18:25 hat Daniel P. Berrangé geschrieben:
> By the time of the 8.2.0 release, it will have been 2 years and 6
> releases since we accidentally broke setting of array properties
> for user creatable devices:
> 
>   https://gitlab.com/qemu-project/qemu/-/issues/1090

Oh, nice!

Well, maybe that sounds a bit wrong, but the syntax that was broken was
problematic and more of a hack, and after two years there is clearly no
need to bring the exact same syntax back now.

So I'd suggest we bring the funcionality back, but with proper QAPI
lists instead of len-foo/foo[*].

If we ever want to continue with command line QAPIfication, this change
would already solve one of the compatibility concerns we've had in the
past.

> I still think for user creatable devices we'd be better off just
> mandating the use of JSON syntax for -device and thus leveraging
> the native JSON array type. This patch was the quick fix for the
> existing array property syntax though.

I agree, let's not apply this one. It puts another ugly hack in the
common QOM code path just to bring back the old ugly hack in qdev.

Kevin

Re: [PATCH 3/5] cxl/type3: Expose ct3 functions so that inheriters can call them

2023-09-05 Thread Philippe Mathieu-Daudé


Hi Gregory,

On 1/9/23 03:29, Gregory Price wrote:

For devices built on top of ct3, we need the init, realize, and
exit functions exposed to correctly start up and tear down.


You shouldn't need this. Your device class can inherit from
the CT3 base class by setting its .parent to TYPE_CXL_TYPE3:

static const TypeInfo my_cxl_types[] = {
{
.name   = TYPE_MY_CXL_DEVICE_X,
.parent = TYPE_CXL_TYPE3,
.class_init = dev_x_class_init,
},
{
.name   = TYPE_MY_CXL_DEVICE_Y,
.parent = TYPE_CXL_TYPE3,
.class_init = dev_y_class_init,
}
};

You can see some documentation about QOM here:
https://qemu-project.gitlab.io/qemu/devel/qom.html
But still you'll have to look at examples in the tree.


Signed-off-by: Gregory Price 
---
  hw/mem/cxl_type3.c  | 8 
  include/hw/cxl/cxl_device.h | 5 +
  2 files changed, 9 insertions(+), 4 deletions(-)

Re: [PATCH v22 01/20] CPU topology: extend with s390 specifics

2023-09-05 Thread Thomas Huth


On 01/09/2023 17.57, Nina Schoetterl-Glausch wrote:

From: Pierre Morel 

S390 adds two new SMP levels, drawers and books to the CPU
topology.
S390 CPUs have specific topology features like dedication and
entitlement. These indicate to the guest information on host
vCPU scheduling and help the guest make better scheduling decisions.

Let us provide the SMP properties with books and drawers levels
and S390 CPU with dedication and entitlement,

Signed-off-by: Pierre Morel 
Reviewed-by: Nina Schoetterl-Glausch 
Co-developed-by: Nina Schoetterl-Glausch 
Signed-off-by: Nina Schoetterl-Glausch 
---
  qapi/machine-common.json| 21 +
  qapi/machine.json   | 21 ++---
  include/hw/boards.h | 10 +-
  include/hw/qdev-properties-system.h |  4 +++
  target/s390x/cpu.h  |  6 
  hw/core/machine-smp.c   | 48 -
  hw/core/machine.c   |  4 +++
  hw/core/qdev-properties-system.c| 13 
  hw/s390x/s390-virtio-ccw.c  |  2 ++
  softmmu/vl.c|  6 
  target/s390x/cpu.c  |  7 +
  qapi/meson.build|  1 +
  qemu-options.hx |  7 +++--
  13 files changed, 136 insertions(+), 14 deletions(-)
  create mode 100644 qapi/machine-common.json

diff --git a/qapi/machine-common.json b/qapi/machine-common.json
new file mode 100644
index 00..e40421bb37
--- /dev/null
+++ b/qapi/machine-common.json
@@ -0,0 +1,21 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or later.
+# See the COPYING file in the top-level directory.
+
+##
+# = Machines S390 data types
+##
+
+##
+# @CpuS390Entitlement:
+#
+# An enumeration of cpu entitlements that can be assumed by a virtual
+# S390 CPU
+#
+# Since: 8.2
+##
+{ 'enum': 'CpuS390Entitlement',
+  'prefix': 'S390_CPU_ENTITLEMENT',
+  'data': [ 'auto', 'low', 'medium', 'high' ] }
diff --git a/qapi/machine.json b/qapi/machine.json
index a08b6576ca..54f99f4ac1 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -9,6 +9,7 @@
  ##
  
  { 'include': 'common.json' }

+{ 'include': 'machine-common.json' }
  
  ##

  # @SysEmuTarget:
@@ -71,8 +72,8 @@
  #
  # @thread-id: ID of the underlying host thread
  #
-# @props: properties describing to which node/socket/core/thread
-# virtual CPU belongs to, provided if supported by board
+# @props: properties describing to which node/drawer/book/socket/core/thread
+# virtual CPU belongs to, provided if supported by board


The QAPI docs recently switched to 4 spaces indentation, see commit 
08349786c8430 ... so please do not re-indent the second line here.



  #
  # @target: the QEMU system emulation target, which determines which
  # additional fields will be listed (since 3.0)
@@ -901,7 +902,11 @@
  #
  # @node-id: NUMA node ID the CPU belongs to
  #
-# @socket-id: socket number within node/board the CPU belongs to
+# @drawer-id: drawer number within node/board the CPU belongs to (since 8.2)
+#
+# @book-id: book number within drawer/node/board the CPU belongs to (since 8.2)
+#
+# @socket-id: socket number within book/node/board the CPU belongs to
  #
  # @die-id: die number within socket the CPU belongs to (since 4.1)
  #
@@ -912,7 +917,7 @@
  #
  # @thread-id: thread number within core the CPU belongs to
  #
-# Note: currently there are 6 properties that could be present but
+# Note: currently there are 8 properties that could be present but
  # management should be prepared to pass through other properties
  # with device_add command to allow for future interface extension.
  # This also requires the filed names to be kept in sync with the
@@ -922,6 +927,8 @@
  ##
  { 'struct': 'CpuInstanceProperties',
'data': { '*node-id': 'int',
+'*drawer-id': 'int',
+'*book-id': 'int',
  '*socket-id': 'int',
  '*die-id': 'int',
  '*cluster-id': 'int',
@@ -1480,6 +1487,10 @@
  #
  # @cpus: number of virtual CPUs in the virtual machine
  #
+# @drawers: number of drawers in the CPU topology (since 8.2)
+#
+# @books: number of books in the CPU topology (since 8.2)
+#
  # @sockets: number of sockets in the CPU topology
  #
  # @dies: number of dies per socket in the CPU topology
@@ -1498,6 +1509,8 @@
  ##
  { 'struct': 'SMPConfiguration', 'data': {
   '*cpus': 'int',
+ '*drawers': 'int',
+ '*books': 'int',
   '*sockets': 'int',
   '*dies': 'int',
   '*clusters': 'int',

...

diff --git a/hw/core/machine-smp.c b/hw/core/machine-smp.c
index 0f4d9b6f7a..25019c91ee 100644
--- a/hw/core/machine-smp.c
+++ b/hw/core/machine-smp.c
@@ -33,6 +33,14 @@ static char *cpu_hierarchy_to_string(MachineState *ms)
  MachineClass *mc = MACHINE_GET_CLASS(ms);
  GString *s = g_string_new(NULL);
  
+if (mc->smp_props.drawers_supported) {

+g_string_append_print

Re: [PATCH] hw/pci-bridge/cxl-upstream: Add serial number extended capability support

2023-09-05 Thread Michael S. Tsirkin

On Tue, Sep 05, 2023 at 10:48:54AM +0200, Philippe Mathieu-Daudé wrote:
> Hi Jonathan,
> 
> On 4/9/23 19:57, Jonathan Cameron wrote:
> > Will be needed so there is a defined serial number for
> > information queries via the Switch CCI.
> > 
> > Signed-off-by: Jonathan Cameron 
> > ---
> > No ordering dependencies wrt to other CXL patch sets.
> > 
> > Whilst we 'need' it for the Switch CCI set it is valid without
> > it and aligns with existing EP serial number support. Seems sensible
> > to upstream this first and reduce my out of tree backlog a little!
> > 
> >   hw/pci-bridge/cxl_upstream.c | 15 +--
> >   1 file changed, 13 insertions(+), 2 deletions(-)
> > 
> > diff --git a/hw/pci-bridge/cxl_upstream.c b/hw/pci-bridge/cxl_upstream.c
> > index 2b9cf0cc97..15c4d84a56 100644
> > --- a/hw/pci-bridge/cxl_upstream.c
> > +++ b/hw/pci-bridge/cxl_upstream.c
> > @@ -14,6 +14,11 @@
> >   #include "hw/pci/msi.h"
> >   #include "hw/pci/pcie.h"
> >   #include "hw/pci/pcie_port.h"
> > +/*
> > + * Null value of all Fs suggested by IEEE RA guidelines for use of
> > + * EU, OUI and CID
> > + */
> > +#define UI64_NULL (~0ULL)
> 
> Already defined in hw/mem/cxl_type3.c, can we move it to some common
> CXL header? Or include/qemu/units.h?

not the last one I think - this is a cxl specific hack to detect that
user has changed the property.


I think we really should have a variant of DEFINE_PROP_XXX that sets a
flag allowing us to detect whether a property has been set manually.
This would be a generalization of DEFINE_PROP_ON_OFF_AUTO.


> >   #define CXL_UPSTREAM_PORT_MSI_NR_VECTOR 2
> > @@ -30,6 +35,7 @@ typedef struct CXLUpstreamPort {
> >   /*< public >*/
> >   CXLComponentState cxl_cstate;
> >   DOECap doe_cdat;
> > +uint64_t sn;
> >   } CXLUpstreamPort;
> >   CXLComponentState *cxl_usp_to_cstate(CXLUpstreamPort *usp)
> > @@ -326,8 +332,12 @@ static void cxl_usp_realize(PCIDevice *d, Error **errp)
> >   if (rc) {
> >   goto err_cap;
> >   }
> > -
> > -cxl_cstate->dvsec_offset = CXL_UPSTREAM_PORT_DVSEC_OFFSET;
> > +if (usp->sn != UI64_NULL) {
> > +pcie_dev_ser_num_init(d, CXL_UPSTREAM_PORT_DVSEC_OFFSET, usp->sn);
> > +cxl_cstate->dvsec_offset = CXL_UPSTREAM_PORT_DVSEC_OFFSET + 0x0c;
> 
> Could it be clearer to have:
> 
> diff --git a/hw/pci-bridge/cxl_upstream.c b/hw/pci-bridge/cxl_upstream.c
> @@ -23,2 +23,2 @@
> -#define CXL_UPSTREAM_PORT_DVSEC_OFFSET \
> -(CXL_UPSTREAM_PORT_AER_OFFSET + PCI_ERR_SIZEOF)
> +#define CXL_UPSTREAM_PORT_DVSEC_OFFSET(offset) \
> +(CXL_UPSTREAM_PORT_AER_OFFSET + PCI_ERR_SIZEOF + offset)
> 
> ?
> 
> > +} else {
> > +cxl_cstate->dvsec_offset = CXL_UPSTREAM_PORT_DVSEC_OFFSET;
> > +}
> >   cxl_cstate->pdev = d;
> >   build_dvsecs(cxl_cstate);
> >   cxl_component_register_block_init(OBJECT(d), cxl_cstate, 
> > TYPE_CXL_USP);
> > @@ -366,6 +376,7 @@ static void cxl_usp_exitfn(PCIDevice *d)
> >   }
> >   static Property cxl_upstream_props[] = {
> > +DEFINE_PROP_UINT64("sn", CXLUpstreamPort, sn, UI64_NULL),
> >   DEFINE_PROP_STRING("cdat", CXLUpstreamPort, cxl_cstate.cdat.filename),
> >   DEFINE_PROP_END_OF_LIST()
> >   };

Re: [PATCH 0/5 v2] CXL: SK hynix Niagara MHSLD Device

2023-09-05 Thread Philippe Mathieu-Daudé


On 1/9/23 03:29, Gregory Price wrote:

v2:
- 5 patch series, first 4 are pull-aheads that can be merged separately
- cci: rebased on 8-30 branch from jic23, dropped cci patches
- mailbox: dropped MHD commands, integrated into niagara (for now)
- mailbox: refactor CCI defines to avoid redefinition in niagara
- type3: cleanup duplicate typecasting
- type3: expose ct3 functions so inheriting devices may access them
- type3: add optional mhd validation function for memory access
- niagara: refactor to make niagara inherit type3 and override behavior
- niagara: refactor command definitions and types into header to make
understanding the device a bit easier for users
- style and formatting

This patch set includes an emulation of the SK hynix Niagara MHSLD
platform with custom CCI commands that allow for isolation of memory
blocks between attached hosts.

This device allows hosts to request memory blocks directly from the device,
rather than requiring full the DCD command set.  As a matter of simplicity,
this is beneficial to for testing and applications of dynamic memory
pooling on top of the 1.1 and 2.0 specification.

Note that these CCI commands are not servicable without a proper driver or
the kernel allowing raw CXL commands to be passed through the mailbox
driver, so users should enable `CONFIG_CXL_MEM_RAW_COMMANDS=y` on the
kernel of their QEMU instance if they wish to test it

Signed-off-by: Gregory Price 

Gregory Price (5):
   cxl/mailbox: move mailbox effect definitions to a header
   cxl/type3: Cleanup multiple CXL_TYPE3() calls in read/write functions
   cxl/type3: Expose ct3 functions so that inheriters can call them
   cxl/type3: add an optional mhd validation function for memory accesses
   cxl/vendor: SK hynix Niagara Multi-Headed SLD Device


Being at commit 17780edd81 I can't apply this series:

Applying: cxl/mailbox: move mailbox effect definitions to a header
error: patch failed: hw/cxl/cxl-mailbox-utils.c:12
error: hw/cxl/cxl-mailbox-utils.c: patch does not apply
Patch failed at 0001 cxl/mailbox: move mailbox effect definitions to a 
header


On what is it based?

Thanks,

Phil.

Re: [PATCH v2 05/19] host-utils: Add muldiv64_round_up

2023-09-05 Thread Philippe Mathieu-Daudé


On 5/9/23 05:56, Nicholas Piggin wrote:

On Mon Sep 4, 2023 at 11:30 PM AEST, Cédric Le Goater wrote:



Someone really ought to take over PPC. Daniel and I are real
busy on other subsystems. Volunteers ?


I suppose I should. I could try do the next PR after this one
is merged.


Thanks a lot for volunteering!

Phil.

Re: [QEMU PATCH v4 09/13] virtio-gpu: Handle resource blob commands

2023-09-05 Thread Huang Rui

On Thu, Aug 31, 2023 at 06:24:32PM +0800, Akihiko Odaki wrote:
> On 2023/08/31 18:32, Huang Rui wrote:
> > From: Antonio Caggiano 
> > 
> > Support BLOB resources creation, mapping and unmapping by calling the
> > new stable virglrenderer 0.10 interface. Only enabled when available and
> > via the blob config. E.g. -device virtio-vga-gl,blob=true
> > 
> > Signed-off-by: Antonio Caggiano 
> > Signed-off-by: Dmitry Osipenko 
> > Signed-off-by: Xenia Ragiadakou 
> > Signed-off-by: Huang Rui 
> > ---
> > 
> > v1->v2:
> >  - Remove unused #include "hw/virtio/virtio-iommu.h"
> > 
> >  - Add a local function, called virgl_resource_destroy(), that is used
> >to release a vgpu resource on error paths and in resource_unref.
> > 
> >  - Remove virtio_gpu_virgl_resource_unmap from 
> > virtio_gpu_cleanup_mapping(),
> >since this function won't be called on blob resources and also 
> > because
> >blob resources are unmapped via virgl_cmd_resource_unmap_blob().
> > 
> >  - In virgl_cmd_resource_create_blob(), do proper cleanup in error paths
> >and move QTAILQ_INSERT_HEAD(&g->reslist, res, next) after the 
> > resource
> >has been fully initialized.
> > 
> >  - Memory region has a different life-cycle from virtio gpu resources
> >i.e. cannot be released synchronously along with the vgpu resource.
> >So, here the field "region" was changed to a pointer that will be
> >released automatically once the memory region is unparented and all
> >of its references have been released.
> >Also, since the pointer can be used to indicate whether the blob
> >is mapped, the explicit field "mapped" was removed.
> > 
> >  - In virgl_cmd_resource_map_blob(), add check on the value of
> >res->region, to prevent beeing called twice on the same resource.
> > 
> >  - Remove direct references to parent_obj.
> > 
> >  - Separate declarations from code.
> > 
> >   hw/display/virtio-gpu-virgl.c  | 213 +
> >   hw/display/virtio-gpu.c|   4 +-
> >   include/hw/virtio/virtio-gpu.h |   5 +
> >   meson.build|   4 +
> >   4 files changed, 225 insertions(+), 1 deletion(-)
> > 
> > diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c
> > index 312953ec16..17b634d4ee 100644
> > --- a/hw/display/virtio-gpu-virgl.c
> > +++ b/hw/display/virtio-gpu-virgl.c
> > @@ -17,6 +17,7 @@
> >   #include "trace.h"
> >   #include "hw/virtio/virtio.h"
> >   #include "hw/virtio/virtio-gpu.h"
> > +#include "hw/virtio/virtio-gpu-bswap.h"
> >   
> >   #include "ui/egl-helpers.h"
> >   
> > @@ -78,9 +79,24 @@ static void virgl_cmd_create_resource_3d(VirtIOGPU *g,
> >   virgl_renderer_resource_create(&args, NULL, 0);
> >   }
> >   
> > +static void virgl_resource_destroy(VirtIOGPU *g,
> > +   struct virtio_gpu_simple_resource *res)
> > +{
> > +if (!res)
> > +return;
> > +
> > +QTAILQ_REMOVE(&g->reslist, res, next);
> > +
> > +virtio_gpu_cleanup_mapping_iov(g, res->iov, res->iov_cnt);
> > +g_free(res->addrs);
> > +
> > +g_free(res);
> > +}
> > +
> >   static void virgl_cmd_resource_unref(VirtIOGPU *g,
> >struct virtio_gpu_ctrl_command *cmd)
> >   {
> > +struct virtio_gpu_simple_resource *res;
> >   struct virtio_gpu_resource_unref unref;
> >   struct iovec *res_iovs = NULL;
> >   int num_iovs = 0;
> > @@ -88,13 +104,22 @@ static void virgl_cmd_resource_unref(VirtIOGPU *g,
> >   VIRTIO_GPU_FILL_CMD(unref);
> >   trace_virtio_gpu_cmd_res_unref(unref.resource_id);
> >   
> > +res = virtio_gpu_find_resource(g, unref.resource_id);
> > +
> >   virgl_renderer_resource_detach_iov(unref.resource_id,
> >  &res_iovs,
> >  &num_iovs);
> >   if (res_iovs != NULL && num_iovs != 0) {
> >   virtio_gpu_cleanup_mapping_iov(g, res_iovs, num_iovs);
> > +if (res) {
> > +res->iov = NULL;
> > +res->iov_cnt = 0;
> > +}
> >   }
> > +
> >   virgl_renderer_resource_unref(unref.resource_id);
> > +
> > +virgl_resource_destroy(g, res);
> >   }
> >   
> >   static void virgl_cmd_context_create(VirtIOGPU *g,
> > @@ -426,6 +451,183 @@ static void virgl_cmd_get_capset(VirtIOGPU *g,
> >   g_free(resp);
> >   }
> >   
> > +#ifdef HAVE_VIRGL_RESOURCE_BLOB
> > +
> > +static void virgl_cmd_resource_create_blob(VirtIOGPU *g,
> > +   struct virtio_gpu_ctrl_command 
> > *cmd)
> > +{
> > +struct virtio_gpu_simple_resource *res;
> > +struct virtio_gpu_resource_create_blob cblob;
> > +struct virgl_renderer_resource_create_blob_args virgl_args = { 0 };
> > +int ret;
> > +
> > +VIRTIO_GPU_FILL_CMD(cblob);
> > +virtio_gpu_create_blob_bswap(&cblob);
> > +trace_virtio_gpu_cmd_re

[PATCH v4] arm/kvm: Enable support for KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE

2023-09-05 Thread Shameer Kolothum via

Now that we have Eager Page Split support added for ARM in the kernel,
enable it in Qemu. This adds,
 -eager-split-size to -accel sub-options to set the eager page split chunk size.
 -enable KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE.

The chunk size specifies how many pages to break at a time, using a
single allocation. Bigger the chunk size, more pages need to be
allocated ahead of time.

Reviewed-by: Gavin Shan 
Signed-off-by: Shameer Kolothum 
---
Changes:
v3: 
https://lore.kernel.org/qemu-devel/20230830114818.641-1-shameerali.kolothum.th...@huawei.com/
   -Added R-by by Gavin and replaced kvm_arm_eager_split_size_valid()
with a direct check.
v2: 
https://lore.kernel.org/qemu-devel/20230815092709.1290-1-shameerali.kolothum.th...@huawei.com/
   -Addressed commenst from Gavin.
RFC v1: 
https://lore.kernel.org/qemu-devel/20230725150002.621-1-shameerali.kolothum.th...@huawei.com/
  -Updated qemu-options.hx with description
  -Addressed review comments from Peter and Gavin(Thanks).
---
 accel/kvm/kvm-all.c  |  1 +
 include/sysemu/kvm_int.h |  1 +
 qemu-options.hx  | 15 ++
 target/arm/kvm.c | 61 
 4 files changed, 78 insertions(+)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 2ba7521695..ff1578bb32 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -3763,6 +3763,7 @@ static void kvm_accel_instance_init(Object *obj)
 /* KVM dirty ring is by default off */
 s->kvm_dirty_ring_size = 0;
 s->kvm_dirty_ring_with_bitmap = false;
+s->kvm_eager_split_size = 0;
 s->notify_vmexit = NOTIFY_VMEXIT_OPTION_RUN;
 s->notify_window = 0;
 s->xen_version = 0;
diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h
index 511b42bde5..a5b9122cb8 100644
--- a/include/sysemu/kvm_int.h
+++ b/include/sysemu/kvm_int.h
@@ -116,6 +116,7 @@ struct KVMState
 uint64_t kvm_dirty_ring_bytes;  /* Size of the per-vcpu dirty ring */
 uint32_t kvm_dirty_ring_size;   /* Number of dirty GFNs per ring */
 bool kvm_dirty_ring_with_bitmap;
+uint64_t kvm_eager_split_size;  /* Eager Page Splitting chunk size */
 struct KVMDirtyRingReaper reaper;
 NotifyVmexitOption notify_vmexit;
 uint32_t notify_window;
diff --git a/qemu-options.hx b/qemu-options.hx
index 29b98c3d4c..2e70704ee8 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -186,6 +186,7 @@ DEF("accel", HAS_ARG, QEMU_OPTION_accel,
 "split-wx=on|off (enable TCG split w^x mapping)\n"
 "tb-size=n (TCG translation block cache size)\n"
 "dirty-ring-size=n (KVM dirty ring GFN count, default 0)\n"
+"eager-split-size=n (KVM Eager Page Split chunk size, 
default 0, disabled. ARM only)\n"
 "notify-vmexit=run|internal-error|disable,notify-window=n 
(enable notify VM exit and set notify window, x86 only)\n"
 "thread=single|multi (enable multi-threaded TCG)\n", 
QEMU_ARCH_ALL)
 SRST
@@ -244,6 +245,20 @@ SRST
 is disabled (dirty-ring-size=0).  When enabled, KVM will instead
 record dirty pages in a bitmap.
 
+``eager-split-size=n``
+KVM implements dirty page logging at the PAGE_SIZE granularity and
+enabling dirty-logging on a huge-page requires breaking it into
+PAGE_SIZE pages in the first place. KVM on ARM does this splitting
+lazily by default. There are performance benefits in doing huge-page
+split eagerly, especially in situations where TLBI costs associated
+with break-before-make sequences are considerable and also if guest
+workloads are read intensive. The size here specifies how many pages
+to break at a time and needs to be a valid block size which is
+1GB/2MB/4KB, 32MB/16KB and 512MB/64KB for 4KB/16KB/64KB PAGE_SIZE
+respectively. Be wary of specifying a higher size as it will have an
+impact on the memory. By default, this feature is disabled
+(eager-split-size=0).
+
 ``notify-vmexit=run|internal-error|disable,notify-window=n``
 Enables or disables notify VM exit support on x86 host and specify
 the corresponding notify window to trigger the VM exit if enabled.
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index 23aeb09949..b66b936a95 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -30,6 +30,7 @@
 #include "exec/address-spaces.h"
 #include "hw/boards.h"
 #include "hw/irq.h"
+#include "qapi/visitor.h"
 #include "qemu/log.h"
 
 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
@@ -287,6 +288,26 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
 }
 }
 
+if (s->kvm_eager_split_size) {
+uint32_t sizes;
+
+sizes = kvm_vm_check_extension(s, KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES);
+if (!sizes) {
+s->kvm_eager_split_size = 0;
+warn_report("Eager Page Split support not available");
+} else if (!(s->kvm_eager_s

Re: [QEMU PATCH v4 07/13] softmmu/memory: enable automatic deallocation of memory regions

2023-09-05 Thread Huang Rui

On Thu, Aug 31, 2023 at 06:10:08PM +0800, Akihiko Odaki wrote:
> On 2023/08/31 18:32, Huang Rui wrote:
> > From: Xenia Ragiadakou 
> > 
> > When the memory region has a different life-cycle from that of her parent,
> > could be automatically released, once has been unparent and once all of her
> > references have gone away, via the object's free callback.
> > 
> > However, currently, references to the memory region are held by its owner
> > without first incrementing the memory region object's reference count.
> > As a result, the automatic deallocation of the object, not taking into
> > account those references, results in use-after-free memory corruption.
> > 
> > This patch increases the reference count of the memory region object on
> > each memory_region_ref() and decreases it on each memory_region_unref().
> > 
> > Signed-off-by: Xenia Ragiadakou 
> > Signed-off-by: Huang Rui 
> > ---
> > 
> > New patch
> > 
> >   softmmu/memory.c | 19 +--
> >   1 file changed, 17 insertions(+), 2 deletions(-)
> > 
> > diff --git a/softmmu/memory.c b/softmmu/memory.c
> > index 7d9494ce70..0fdd5eebf9 100644
> > --- a/softmmu/memory.c
> > +++ b/softmmu/memory.c
> > @@ -1797,6 +1797,15 @@ Object *memory_region_owner(MemoryRegion *mr)
> >   
> >   void memory_region_ref(MemoryRegion *mr)
> >   {
> > +if (!mr) {
> > +return;
> > +}
> > +
> > +/* Obtain a reference to prevent the memory region object
> > + * from being released under our feet.
> > + */
> > +object_ref(OBJECT(mr));
> > +
> >   /* MMIO callbacks most likely will access data that belongs
> >* to the owner, hence the need to ref/unref the owner whenever
> >* the memory region is in use.
> > @@ -1807,16 +1816,22 @@ void memory_region_ref(MemoryRegion *mr)
> >* Memory regions without an owner are supposed to never go away;
> >* we do not ref/unref them because it slows down DMA sensibly.
> >*/
> 
> The collapsed comment says:
>  > The memory region is a child of its owner.  As long as the
>  > owner doesn't call unparent itself on the memory region,
>  > ref-ing the owner will also keep the memory region alive.
>  > Memory regions without an owner are supposed to never go away;
>  > we do not ref/unref them because it slows down DMA sensibly.
> 
> It contradicts with this patch.

The reason that we modify it is because we would like to address the memory
leak issue in the original codes. Please see below, we find the memory
region will be crashed once we free(unref) the simple resource, because the
region will be freed in object_finalize() after unparent and the ref count
is to 0. Then the VM will be crashed with this.

In virgl_cmd_resource_map_blob():
memory_region_init_ram_device_ptr(res->region, OBJECT(g), NULL, size, data);
OBJECT(res->region)->free = g_free;
memory_region_add_subregion(&b->hostmem, mblob.offset, res->region);
memory_region_set_enabled(res->region, true);

In virtio_gpu_virgl_resource_unmap():
memory_region_set_enabled(res->region, false);
memory_region_del_subregion(&b->hostmem, res->region);
object_unparent(OBJECT(res->region));
res->region = NULL;

I spent a bit more time to understand your point, do you want me to update
corresponding comments or you have some concern about this change?

Thanks,
Ray

> 
> > -if (mr && mr->owner) {
> > +if (mr->owner) {
> >   object_ref(mr->owner);
> >   }
> >   }
> >   
> >   void memory_region_unref(MemoryRegion *mr)
> >   {
> > -if (mr && mr->owner) {
> > +if (!mr) {
> > +return;
> > +}
> > +
> > +if (mr->owner) {
> >   object_unref(mr->owner);
> >   }
> > +
> > +object_unref(OBJECT(mr));
> >   }
> >   
> >   uint64_t memory_region_size(MemoryRegion *mr)

Re: [PATCH 00/13] VIRTIO-IOMMU/VFIO: Don't assume 64b IOVA space

2023-09-05 Thread Eric Auger

Hi Yanghang,

On 9/5/23 10:22, YangHang Liu wrote:
> I have runned the following two tests, but both tests failed:
> [1] start a VM with virtio-iommu + 2 ice PFs only via qemu-kvm 8.1.5
> Test result : the qemu-kvm keeps throwing the error:  VFIO_MAP_DMA
> failed: File exists. vfio_dma_map(0x56443d20fbe0, 0xe000, 0x1000,
> 0x7fb545709000) = -17 (File exists)
> [2] start a VM with virtio-iommu + 2 ice PFs via libvirt-9.5 + qemu-kvm 8.1.5
> Test result: the qemu-kvm core dump with
> ERROR:../qom/object.c:1198:object_unref: assertion failed: (obj->ref >
> 0). Bail out! ERROR:../qom/object.c:1198:object_unref: assertion
> failed: (obj->ref > 0)
>
> After removing the 2 PF from the VM, both tests passed.
> Tested-by: Yanghang Liu 

thank you for testing. If my understanding is correct you still
encountered some issues with/after the series. If this is correct you
shall not offer your Tested-by which means you tested the series and it
works fine for you/fixes your issue.

Coming back to the above mentionned issues:

1) the File Exists issue is known and is linked to the replay. This will
be handled separately, ie.I need to resume working on it as my first
approach was flawed: See
https://lore.kernel.org/all/20221207133646.635760-1-eric.au...@redhat.com/
This is unrelated to this series. Note this shouldn't prevent your
passthroughed device from working. Those should be just spurious
warnings that need to be removed.

2) the object_unref assertion most probaly is linked to that series and
I will to investigate asap.

Thank you again!

Eric
>
> Best Regards,
> YangHang Liu
>
>
> On Mon, Sep 4, 2023 at 4:08 PM Eric Auger  wrote:
>> On x86, when assigning VFIO-PCI devices protected with virtio-iommu
>> we encounter the case where the guest tries to map IOVAs beyond 48b
>> whereas the physical VTD IOMMU only supports 48b. This ends up with
>> VFIO_MAP_DMA failures at qemu level because at kernel level,
>> vfio_iommu_iova_dma_valid() check returns false on vfio_map_do_map().
>>
>> This is due to the fact the virtio-iommu currently unconditionally
>> exposes an IOVA range of 64b through its config input range fields.
>>
>> This series removes this assumption by retrieving the usable IOVA
>> regions through the VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE UAPI when
>> a VFIO device is attached. This info is communicated to the
>> virtio-iommu memory region, transformed into the inversed info, ie.
>> the host reserved IOVA regions. Then those latter are combined with the
>> reserved IOVA regions set though the virtio-iommu reserved-regions
>> property. That way, the guest virtio-iommu driver, unchanged, is
>> able to probe the whole set of reserved regions and prevent any IOVA
>> belonging to those ranges from beeing used, achieving the original goal.
>>
>> Best Regards
>>
>> Eric
>>
>> This series can be found at:
>> https://github.com/eauger/qemu/tree/virtio-iommu_geometry_v1
>>
>> Eric Auger (13):
>>   memory: Let ReservedRegion use Range
>>   memory: Introduce memory_region_iommu_set_iova_ranges
>>   vfio: Collect container iova range info
>>   virtio-iommu: Rename reserved_regions into prop_resv_regions
>>   virtio-iommu: Introduce per IOMMUDevice reserved regions
>>   range: Introduce range_inverse_array()
>>   virtio-iommu: Implement set_iova_ranges() callback
>>   range: Make range_compare() public
>>   util/reserved-region: Add new ReservedRegion helpers
>>   virtio-iommu: Consolidate host reserved regions and property set ones
>>   test: Add some tests for range and resv-mem helpers
>>   virtio-iommu: Resize memory region according to the max iova info
>>   vfio: Remove 64-bit IOVA address space assumption
>>
>>  include/exec/memory.h|  30 -
>>  include/hw/vfio/vfio-common.h|   2 +
>>  include/hw/virtio/virtio-iommu.h |   7 +-
>>  include/qemu/range.h |   9 ++
>>  include/qemu/reserved-region.h   |  32 +
>>  hw/core/qdev-properties-system.c |   9 +-
>>  hw/vfio/common.c |  70 ---
>>  hw/virtio/virtio-iommu-pci.c |   8 +-
>>  hw/virtio/virtio-iommu.c |  85 +++--
>>  softmmu/memory.c |  15 +++
>>  tests/unit/test-resv-mem.c   | 198 +++
>>  util/range.c |  41 ++-
>>  util/reserved-region.c   |  94 +++
>>  hw/virtio/trace-events   |   1 +
>>  tests/unit/meson.build   |   1 +
>>  util/meson.build |   1 +
>>  16 files changed, 562 insertions(+), 41 deletions(-)
>>  create mode 100644 include/qemu/reserved-region.h
>>  create mode 100644 tests/unit/test-resv-mem.c
>>  create mode 100644 util/reserved-region.c
>>
>> --
>> 2.41.0
>>
>>

Re: [QEMU PATCH v4 07/13] softmmu/memory: enable automatic deallocation of memory regions

2023-09-05 Thread Akihiko Odaki

On 2023/09/05 18:07, Huang Rui wrote:

On Thu, Aug 31, 2023 at 06:10:08PM +0800, Akihiko Odaki wrote:

On 2023/08/31 18:32, Huang Rui wrote:

From: Xenia Ragiadakou 

When the memory region has a different life-cycle from that of her parent,
could be automatically released, once has been unparent and once all of her
references have gone away, via the object's free callback.

However, currently, references to the memory region are held by its owner
without first incrementing the memory region object's reference count.
As a result, the automatic deallocation of the object, not taking into
account those references, results in use-after-free memory corruption.

This patch increases the reference count of the memory region object on
each memory_region_ref() and decreases it on each memory_region_unref().

Signed-off-by: Xenia Ragiadakou 
Signed-off-by: Huang Rui 
---

New patch

   softmmu/memory.c | 19 +--
   1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/softmmu/memory.c b/softmmu/memory.c
index 7d9494ce70..0fdd5eebf9 100644
--- a/softmmu/memory.c
+++ b/softmmu/memory.c
@@ -1797,6 +1797,15 @@ Object *memory_region_owner(MemoryRegion *mr)

   void memory_region_ref(MemoryRegion *mr)

   {
+if (!mr) {
+return;
+}
+
+/* Obtain a reference to prevent the memory region object
+ * from being released under our feet.
+ */
+object_ref(OBJECT(mr));
+
   /* MMIO callbacks most likely will access data that belongs
* to the owner, hence the need to ref/unref the owner whenever
* the memory region is in use.
@@ -1807,16 +1816,22 @@ void memory_region_ref(MemoryRegion *mr)
* Memory regions without an owner are supposed to never go away;
* we do not ref/unref them because it slows down DMA sensibly.
*/

The collapsed comment says:
  > The memory region is a child of its owner.  As long as the
  > owner doesn't call unparent itself on the memory region,
  > ref-ing the owner will also keep the memory region alive.
  > Memory regions without an owner are supposed to never go away;
  > we do not ref/unref them because it slows down DMA sensibly.

It contradicts with this patch.

The reason that we modify it is because we would like to address the memory
leak issue in the original codes. Please see below, we find the memory
region will be crashed once we free(unref) the simple resource, because the
region will be freed in object_finalize() after unparent and the ref count
is to 0. Then the VM will be crashed with this.

In virgl_cmd_resource_map_blob():
 memory_region_init_ram_device_ptr(res->region, OBJECT(g), NULL, size, 
data);
 OBJECT(res->region)->free = g_free;
 memory_region_add_subregion(&b->hostmem, mblob.offset, res->region);
 memory_region_set_enabled(res->region, true);

In virtio_gpu_virgl_resource_unmap():
 memory_region_set_enabled(res->region, false);
 memory_region_del_subregion(&b->hostmem, res->region);
 object_unparent(OBJECT(res->region));
 res->region = NULL;

I spent a bit more time to understand your point, do you want me to update
corresponding comments or you have some concern about this change?

As the comment says ref-ing memory regions without an owner will slow 
down DMA, you should avoid that. More concretely, you should check 
mr->owner before doing object_ref(OBJECT(mr)).

Regards,
Akihiko Odaki

[PATCH] migration/dirtyrate: use QEMU_CLOCK_HOST to report start-time

2023-09-05 Thread Andrei Gudkov via

Currently query-dirty-rate uses QEMU_CLOCK_REALTIME as
the source for start-time field. This translates to
clock_gettime(CLOCK_MONOTONIC), i.e. number of seconds
since host boot. This is not very useful. The only
reasonable use case of start-time I can imagine is to
check whether previously completed measurements are
too old or not. But this makes sense only if start-time
is reported as host wall-clock time.

This patch replaces source of start-time from
QEMU_CLOCK_REALTIME to QEMU_CLOCK_HOST.

Signed-off-by: Andrei Gudkov 
---
 qapi/migration.json   |  4 ++--
 migration/dirtyrate.c | 15 ++-
 2 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/qapi/migration.json b/qapi/migration.json
index 8843e74b59..63deb8e387 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -1941,12 +1941,12 @@
 # 1. Measurement is in progress:
 #
 # <- {"status": "measuring", "sample-pages": 512,
-# "mode": "page-sampling", "start-time": 3665220, "calc-time": 10}
+# "mode": "page-sampling", "start-time": 1693900454, "calc-time": 10}
 #
 # 2. Measurement has been completed:
 #
 # <- {"status": "measured", "sample-pages": 512, "dirty-rate": 108,
-# "mode": "page-sampling", "start-time": 3665220, "calc-time": 10}
+# "mode": "page-sampling", "start-time": 1693900454, "calc-time": 10}
 ##
 { 'command': 'query-dirty-rate', 'returns': 'DirtyRateInfo' }
 
diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c
index bccb3515e3..0510d68765 100644
--- a/migration/dirtyrate.c
+++ b/migration/dirtyrate.c
@@ -259,11 +259,10 @@ static struct DirtyRateInfo *query_dirty_rate_info(void)
 return info;
 }
 
-static void init_dirtyrate_stat(int64_t start_time,
-struct DirtyRateConfig config)
+static void init_dirtyrate_stat(struct DirtyRateConfig config)
 {
 DirtyStat.dirty_rate = -1;
-DirtyStat.start_time = start_time;
+DirtyStat.start_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000;
 DirtyStat.calc_time = config.sample_period_seconds;
 DirtyStat.sample_pages = config.sample_pages_per_gigabytes;
 
@@ -600,7 +599,7 @@ static void calculate_dirtyrate_dirty_bitmap(struct 
DirtyRateConfig config)
 record_dirtypages_bitmap(&dirty_pages, true);
 
 start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
-DirtyStat.start_time = start_time / 1000;
+DirtyStat.start_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000;
 
 msec = config.sample_period_seconds * 1000;
 msec = dirty_stat_wait(msec, start_time);
@@ -628,7 +627,7 @@ static void calculate_dirtyrate_dirty_ring(struct 
DirtyRateConfig config)
 /* start log sync */
 global_dirty_log_change(GLOBAL_DIRTY_DIRTY_RATE, true);
 
-DirtyStat.start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) / 1000;
+DirtyStat.start_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000;
 
 /* calculate vcpu dirtyrate */
 duration = vcpu_calculate_dirtyrate(config.sample_period_seconds * 1000,
@@ -657,6 +656,7 @@ static void calculate_dirtyrate_sample_vm(struct 
DirtyRateConfig config)
 
 rcu_read_lock();
 initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+DirtyStat.start_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000;
 if (!record_ramblock_hash_info(&block_dinfo, config, &block_count)) {
 goto out;
 }
@@ -664,7 +664,6 @@ static void calculate_dirtyrate_sample_vm(struct 
DirtyRateConfig config)
 
 msec = config.sample_period_seconds * 1000;
 msec = dirty_stat_wait(msec, initial_time);
-DirtyStat.start_time = initial_time / 1000;
 DirtyStat.calc_time = msec / 1000;
 
 rcu_read_lock();
@@ -727,7 +726,6 @@ void qmp_calc_dirty_rate(int64_t calc_time,
 static struct DirtyRateConfig config;
 QemuThread thread;
 int ret;
-int64_t start_time;
 
 /*
  * If the dirty rate is already being measured, don't attempt to start.
@@ -799,8 +797,7 @@ void qmp_calc_dirty_rate(int64_t calc_time,
  **/
 dirtyrate_mode = mode;
 
-start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) / 1000;
-init_dirtyrate_stat(start_time, config);
+init_dirtyrate_stat(config);
 
 qemu_thread_create(&thread, "get_dirtyrate", get_dirtyrate_thread,
(void *)&config, QEMU_THREAD_DETACHED);
-- 
2.30.2

Re: [QEMU PATCH v4 09/13] virtio-gpu: Handle resource blob commands

2023-09-05 Thread Akihiko Odaki


On 2023/09/05 18:08, Huang Rui wrote:

On Thu, Aug 31, 2023 at 06:24:32PM +0800, Akihiko Odaki wrote:

On 2023/08/31 18:32, Huang Rui wrote:

From: Antonio Caggiano 

Support BLOB resources creation, mapping and unmapping by calling the
new stable virglrenderer 0.10 interface. Only enabled when available and
via the blob config. E.g. -device virtio-vga-gl,blob=true

Signed-off-by: Antonio Caggiano 
Signed-off-by: Dmitry Osipenko 
Signed-off-by: Xenia Ragiadakou 
Signed-off-by: Huang Rui 
---

v1->v2:
  - Remove unused #include "hw/virtio/virtio-iommu.h"

  - Add a local function, called virgl_resource_destroy(), that is used
to release a vgpu resource on error paths and in resource_unref.

  - Remove virtio_gpu_virgl_resource_unmap from 
virtio_gpu_cleanup_mapping(),
since this function won't be called on blob resources and also because
blob resources are unmapped via virgl_cmd_resource_unmap_blob().

  - In virgl_cmd_resource_create_blob(), do proper cleanup in error paths
and move QTAILQ_INSERT_HEAD(&g->reslist, res, next) after the resource
has been fully initialized.

  - Memory region has a different life-cycle from virtio gpu resources
i.e. cannot be released synchronously along with the vgpu resource.
So, here the field "region" was changed to a pointer that will be
released automatically once the memory region is unparented and all
of its references have been released.
Also, since the pointer can be used to indicate whether the blob
is mapped, the explicit field "mapped" was removed.

  - In virgl_cmd_resource_map_blob(), add check on the value of
res->region, to prevent beeing called twice on the same resource.

  - Remove direct references to parent_obj.

  - Separate declarations from code.

   hw/display/virtio-gpu-virgl.c  | 213 +
   hw/display/virtio-gpu.c|   4 +-
   include/hw/virtio/virtio-gpu.h |   5 +
   meson.build|   4 +
   4 files changed, 225 insertions(+), 1 deletion(-)

diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c
index 312953ec16..17b634d4ee 100644
--- a/hw/display/virtio-gpu-virgl.c
+++ b/hw/display/virtio-gpu-virgl.c
@@ -17,6 +17,7 @@
   #include "trace.h"
   #include "hw/virtio/virtio.h"
   #include "hw/virtio/virtio-gpu.h"
+#include "hw/virtio/virtio-gpu-bswap.h"
   
   #include "ui/egl-helpers.h"
   
@@ -78,9 +79,24 @@ static void virgl_cmd_create_resource_3d(VirtIOGPU *g,

   virgl_renderer_resource_create(&args, NULL, 0);
   }
   
+static void virgl_resource_destroy(VirtIOGPU *g,

+   struct virtio_gpu_simple_resource *res)
+{
+if (!res)
+return;
+
+QTAILQ_REMOVE(&g->reslist, res, next);
+
+virtio_gpu_cleanup_mapping_iov(g, res->iov, res->iov_cnt);
+g_free(res->addrs);
+
+g_free(res);
+}
+
   static void virgl_cmd_resource_unref(VirtIOGPU *g,
struct virtio_gpu_ctrl_command *cmd)
   {
+struct virtio_gpu_simple_resource *res;
   struct virtio_gpu_resource_unref unref;
   struct iovec *res_iovs = NULL;
   int num_iovs = 0;
@@ -88,13 +104,22 @@ static void virgl_cmd_resource_unref(VirtIOGPU *g,
   VIRTIO_GPU_FILL_CMD(unref);
   trace_virtio_gpu_cmd_res_unref(unref.resource_id);
   
+res = virtio_gpu_find_resource(g, unref.resource_id);

+
   virgl_renderer_resource_detach_iov(unref.resource_id,
  &res_iovs,
  &num_iovs);
   if (res_iovs != NULL && num_iovs != 0) {
   virtio_gpu_cleanup_mapping_iov(g, res_iovs, num_iovs);
+if (res) {
+res->iov = NULL;
+res->iov_cnt = 0;
+}
   }
+
   virgl_renderer_resource_unref(unref.resource_id);
+
+virgl_resource_destroy(g, res);
   }
   
   static void virgl_cmd_context_create(VirtIOGPU *g,

@@ -426,6 +451,183 @@ static void virgl_cmd_get_capset(VirtIOGPU *g,
   g_free(resp);
   }
   
+#ifdef HAVE_VIRGL_RESOURCE_BLOB

+
+static void virgl_cmd_resource_create_blob(VirtIOGPU *g,
+   struct virtio_gpu_ctrl_command *cmd)
+{
+struct virtio_gpu_simple_resource *res;
+struct virtio_gpu_resource_create_blob cblob;
+struct virgl_renderer_resource_create_blob_args virgl_args = { 0 };
+int ret;
+
+VIRTIO_GPU_FILL_CMD(cblob);
+virtio_gpu_create_blob_bswap(&cblob);
+trace_virtio_gpu_cmd_res_create_blob(cblob.resource_id, cblob.size);
+
+if (cblob.resource_id == 0) {
+qemu_log_mask(LOG_GUEST_ERROR, "%s: resource id 0 is not allowed\n",
+  __func__);
+cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+return;
+}
+
+res = virtio_gpu_find_resource(g, cblob.resource_id);
+if (res) {
+qemu_log_mask(LOG_GUEST_ERROR, "%s: resource a

Re: [PATCH v5 01/20] tests/docker: Update docker-loongarch-cross toolchain

2023-09-05 Thread Alex Bennée



Richard Henderson  writes:

> Update from clfs 5.0 to clfs 8.1, which includes updates
> to binutils 2.41, gcc 13.2, and glibc 2.38.
>
> See https://github.com/loongson/build-tools
>
> Signed-off-by: Richard Henderson 
> ---
>  tests/docker/dockerfiles/debian-loongarch-cross.docker | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/tests/docker/dockerfiles/debian-loongarch-cross.docker 
> b/tests/docker/dockerfiles/debian-loongarch-cross.docker
> index 9d957547b5..b4bf265717 100644
> --- a/tests/docker/dockerfiles/debian-loongarch-cross.docker
> +++ b/tests/docker/dockerfiles/debian-loongarch-cross.docker
> @@ -20,7 +20,7 @@ RUN apt-get update && \
>  git \
>  python3-minimal
>  
> -RUN curl -#SL 
> https://github.com/loongson/build-tools/releases/download/2022.05.29/loongarch64-clfs-5.0-cross-tools-gcc-glibc.tar.xz
>  \
> +RUN curl -#SL 
> https://github.com/loongson/build-tools/releases/download/2023.08.08/CLFS-loongarch64-8.1-x86_64-cross-tools-gcc-glibc.tar.xz
>  \
>  | tar -xJC /opt
>  
>  ENV PATH $PATH:/opt/cross-tools/bin

Queued to testing/next, thanks.

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro

Re: [PATCH 0/1] qom: fix setting of qdev array properties

2023-09-05 Thread Peter Maydell

On Tue, 5 Sept 2023 at 09:59, Kevin Wolf  wrote:
>
> Am 04.09.2023 um 18:25 hat Daniel P. Berrangé geschrieben:
> > By the time of the 8.2.0 release, it will have been 2 years and 6
> > releases since we accidentally broke setting of array properties
> > for user creatable devices:
> >
> >   https://gitlab.com/qemu-project/qemu/-/issues/1090
>
> Oh, nice!
>
> Well, maybe that sounds a bit wrong, but the syntax that was broken was
> problematic and more of a hack, and after two years there is clearly no
> need to bring the exact same syntax back now.
>
> So I'd suggest we bring the funcionality back, but with proper QAPI
> lists instead of len-foo/foo[*].

I don't object, as long as somebody is proposing to actually
do this work (eg, in this release cycle), rather than merely
suggest the idea as a reason why we should continue to leave
this device's configurability broken...

thanks
-- PMM

Re: [PATCH] migration/dirtyrate: use QEMU_CLOCK_HOST to report start-time

2023-09-05 Thread Philippe Mathieu-Daudé


Hi Andrei,

On 5/9/23 11:18, Andrei Gudkov via wrote:

Currently query-dirty-rate uses QEMU_CLOCK_REALTIME as
the source for start-time field. This translates to
clock_gettime(CLOCK_MONOTONIC), i.e. number of seconds
since host boot. This is not very useful. The only
reasonable use case of start-time I can imagine is to
check whether previously completed measurements are
too old or not. But this makes sense only if start-time
is reported as host wall-clock time.

This patch replaces source of start-time from
QEMU_CLOCK_REALTIME to QEMU_CLOCK_HOST.

Signed-off-by: Andrei Gudkov 
---
  qapi/migration.json   |  4 ++--
  migration/dirtyrate.c | 15 ++-
  2 files changed, 8 insertions(+), 11 deletions(-)




diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c
index bccb3515e3..0510d68765 100644
--- a/migration/dirtyrate.c
+++ b/migration/dirtyrate.c
@@ -259,11 +259,10 @@ static struct DirtyRateInfo *query_dirty_rate_info(void)
  return info;
  }
  
-static void init_dirtyrate_stat(int64_t start_time,

-struct DirtyRateConfig config)
+static void init_dirtyrate_stat(struct DirtyRateConfig config)
  {
  DirtyStat.dirty_rate = -1;
-DirtyStat.start_time = start_time;
+DirtyStat.start_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000;
  DirtyStat.calc_time = config.sample_period_seconds;
  DirtyStat.sample_pages = config.sample_pages_per_gigabytes;
  
@@ -600,7 +599,7 @@ static void calculate_dirtyrate_dirty_bitmap(struct DirtyRateConfig config)

  record_dirtypages_bitmap(&dirty_pages, true);
  
  start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);

-DirtyStat.start_time = start_time / 1000;
+DirtyStat.start_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000;


You can directly use qemu_clock_get_us().

Re: [virtio-dev] [RFC PATCH v2] docs/interop: define PROBE feature for vhost-user VirtIO devices

2023-09-05 Thread Alex Bennée



Albert Esteve  writes:

> This looks great! Thanks for this proposal.
>
> On Fri, Sep 1, 2023 at 1:00 PM Alex Bennée  wrote:
>
>  Currently QEMU has to know some details about the VirtIO device
>  supported by a vhost-user daemon to be able to setup the guest. This
>  makes it hard for QEMU to add support for additional vhost-user
>  daemons without adding specific stubs for each additional VirtIO
>  device.
>
>  This patch suggests a new feature flag (VHOST_USER_PROTOCOL_F_PROBE)
>  which the back-end can advertise which allows a probe message to be
>  sent to get all the details QEMU needs to know in one message.
>
>  Together with the existing features VHOST_USER_PROTOCOL_F_STATUS and
>  VHOST_USER_PROTOCOL_F_CONFIG we can create "standalone" vhost-user
>  daemons which are capable of handling all aspects of the VirtIO
>  transactions with only a generic stub on the QEMU side. These daemons
>  can also be used without QEMU in situations where there isn't a full
>  VMM managing their setup.
>
>  Signed-off-by: Alex Bennée 
>
>  ---
>  v2
>- dropped F_STANDALONE in favour of F_PROBE
>- split probe details across several messages
>- probe messages don't automatically imply a standalone daemon
>- add wording where probe details interact (F_MQ/F_CONFIG)
>- define VMM and make clear QEMU is only one of many potential VMMs
>- reword commit message
>  ---
>   docs/interop/vhost-user.rst | 90 -
>   hw/virtio/vhost-user.c  |  8 
>   2 files changed, 88 insertions(+), 10 deletions(-)
>
>  diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
>  index 5a070adbc1..ba3b5e07b7 100644
>  --- a/docs/interop/vhost-user.rst
>  +++ b/docs/interop/vhost-user.rst
>  @@ -7,6 +7,7 @@ Vhost-user Protocol
>   ..
> Copyright 2014 Virtual Open Systems Sarl.
> Copyright 2019 Intel Corporation
>  +  Copyright 2023 Linaro Ltd
> Licence: This work is licensed under the terms of the GNU GPL,
>  version 2 or later. See the COPYING file in the top-level
>  directory.
>  @@ -27,17 +28,31 @@ The protocol defines 2 sides of the communication, 
> *front-end* and
>   *back-end*. The *front-end* is the application that shares its virtqueues, 
> in
>   our case QEMU. The *back-end* is the consumer of the virtqueues.
>
>  -In the current implementation QEMU is the *front-end*, and the *back-end*
>  -is the external process consuming the virtio queues, for example a
>  -software Ethernet switch running in user space, such as Snabbswitch,
>  -or a block device back-end processing read & write to a virtual
>  -disk. In order to facilitate interoperability between various back-end
>  -implementations, it is recommended to follow the :ref:`Backend program
>  -conventions `.
>  +In the current implementation a Virtual Machine Manager (VMM) such as
>  +QEMU is the *front-end*, and the *back-end* is the external process
>  +consuming the virtio queues, for example a software Ethernet switch
>  +running in user space, such as Snabbswitch, or a block device back-end
>  +processing read & write to a virtual disk. In order to facilitate
>  +interoperability between various back-end implementations, it is
>  +recommended to follow the :ref:`Backend program conventions
>  +`.
>
>   The *front-end* and *back-end* can be either a client (i.e. connecting) or
>   server (listening) in the socket communication.
>
>  +Probing device details
>  +--
>  +
>  +Traditionally the vhost-user daemon *back-end* shares configuration
>  +responsibilities with the VMM *front-end* which needs to know certain
>  +key bits of information about the device. This means the VMM needs to
>  +define at least a minimal stub for each VirtIO device it wants to
>  +support. If the daemon supports the right set of protocol features the
>  +VMM can probe the daemon for the information it needs to setup the
>  +device. See :ref:`Probing features for standalone daemons
>  +` for more details.
>  +
>  +
>   Support for platforms other than Linux
>   --
>
>  @@ -316,6 +331,7 @@ replies. Here is a list of the ones that do:
>   * ``VHOST_USER_GET_VRING_BASE``
>   * ``VHOST_USER_SET_LOG_BASE`` (if ``VHOST_USER_PROTOCOL_F_LOG_SHMFD``)
>   * ``VHOST_USER_GET_INFLIGHT_FD`` (if 
> ``VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD``)
>  +* ``VHOST_USER_GET_BACKEND_SPECS`` (if ``VHOST_USER_PROTOCOL_F_STANDALONE``)
>
>   .. seealso::
>
>  @@ -396,9 +412,10 @@ must support changing some configuration aspects on the 
> fly.
>   Multiple queue support
>   --
>
>  -Many devices have a fixed number of virtqueues.  In this case the front-end
>  -already knows the number of available virtqueues without communicating with 
> the
>  -back-end.
>  +Many devices have a fixed number of virtqueues. In this case the
>  +*front-end* usually already knows the number of available virtqueues
>  +without communicating with the back-end. Fo

RE: [RFC PATCH v2 00/12] Confidential guest-assisted live migration

2023-09-05 Thread Shameerali Kolothum Thodi via



> -Original Message-
> From: Qemu-devel
> [mailto:qemu-devel-bounces+shameerali.kolothum.thodi=huawei.com@nong
> nu.org] On Behalf Of Dov Murik
> Sent: 23 August 2021 15:16
> To: qemu-devel@nongnu.org
> Cc: Tom Lendacky ; Ashish Kalra
> ; Brijesh Singh ; Michael
> S. Tsirkin ; Steve Rutherford ;
> James Bottomley ; Juan Quintela
> ; Dr. David Alan Gilbert ; Dov
> Murik ; Hubertus Franke ;
> Tobin Feldman-Fitzthum ; Paolo Bonzini
> 
> Subject: [RFC PATCH v2 00/12] Confidential guest-assisted live migration
> 
> This is an RFC series for fast migration of confidential guests using an
> in-guest migration helper that lives in OVMF.  QEMU VM live migration
> needs to read source VM's RAM and write it in the target VM; this
> mechanism doesn't work when the guest memory is encrypted or QEMU is
> prevented from reading it in another way.  In order to support live
> migration in such scenarios, we introduce an in-guest migration helper
> which can securely extract RAM content from the guest in order to send
> it to the target.  The migration helper is implemented as part of the
> VM's firmware in OVMF.
> 
> We've implemented and tested this on AMD SEV, but expect most of the
> processes can be used with other technologies that prevent direct access
> of hypervisor to the guest's memory.  Specifically, we don't use SEV's
> PSP migration commands (SEV_SEND_START, SEV_RECEIVE_START, etc) at all;
> but note that the mirror VM relies on
> KVM_CAP_VM_COPY_ENC_CONTEXT_FROM
> to shared the SEV ASID with the main VM.

Hi Dov,

Sorry if I missed out, but just to check if there are any updates to or revised
one to this series? This guest-assisted method seems to be a good generic
approach for live migration and just wondering whether it is worth taking a
look for ARM CCA as well(I am not sure ARM RMM spec will have any 
specific proposal for live migration or not, but couldn't find anything
public yet).

Please let me know if you plan to re-spin or there are any concerns with
this approach. Appreciate if you can point me to any relevant discussion
threads.

Thanks,
Shameer

> 
> Corresponding RFC patches for OVMF have been posted by Tobin
> Feldman-Fitzthum on edk2-devel [1].  Those include the crux of the
> migration helper: a mailbox protocol over a shared memory page which
> allows communication between QEMU and the migration helper.  In the
> source VM this is used to read a page and encrypt it for transport; in
> the target it is used to decrypt the incoming page and storing the
> content in the correct address in the guest memory.  All encryption and
> decryption operations occur inside the trusted context in the VM, and
> therefore the VM's memory plaintext content is never accessible to the
> hosts participating in the migration.
> 
> In order to allow OVMF to run the migration helper in parallel to the
> guest OS, we use a mirror VM [3], which shares the same memory mapping
> and SEV ASID as the main VM but has its own run loop.  To start the
> mirror vcpu and the migration handler, we added a temporary
> start-migration-handler QMP command; this will be removed in a future
> version to run as part of the migrate QMP command.
> 
> In the target VM we need the migration handler running to receive
> incoming RAM pages; to achieve that, we boot the VM into OVMF with a
> special fw_cfg value that causes OVMF to not boot the guest OS; we then
> allow QEMU to receive an incoming migration by issuing a new
> start-migrate-incoming QMP command.
> 
> The confidential RAM migration requires checking whether a given guest
> RAM page is encrypted or not.  This is achieved using SEV shared regions
> list tracking, which is implemented as part the SEV live migration patch
> series [2].  This feature tracks hypercalls from OVMF and guest Linux to
> report changes of page encryption status so that QEMU has an up-to-date
> view of which memory regions are shared and which are encrypted.
> 
> We left a few unfinished edges in this RFC but decided to publish it to
> start the commmunity discussion.  TODOs:
> 
> 1. QMP commands start-migration-handler and start-migrate-incoming are
>developer tools and should be performed automatically.
> 2. The entry point address of the in-guest migration handler and its GDT
>are currently hard-coded in QEMU (patch 8); instead they should be
>discovered using pc_system_ovmf_table_find.  Same applies for the
>mailbox address (patch 1).
> 3. For simplicity, this patch series forces the use of the
>guest-assisted migration instead of the SEV PSP-based migration.
>Ideally we might want the user to choose the desired mode using
>migrate-set-parameters or a similar mechanism.
> 4. There is currently no discovery protocol between QEMU and OVMF to
>verify that OVMF indeed supports in-guest migration handler.
> 
> 
> List of patches in this series:
> 
> 1-3: introduce new confidtial RAM migration functions which communicate
>  with the migration helper.
> 4-6: us

Re: [PATCH 00/13] VIRTIO-IOMMU/VFIO: Don't assume 64b IOVA space

2023-09-05 Thread Eric Auger

Hi,

On 9/5/23 10:22, YangHang Liu wrote:
> I have runned the following two tests, but both tests failed:
> [1] start a VM with virtio-iommu + 2 ice PFs only via qemu-kvm 8.1.5
> Test result : the qemu-kvm keeps throwing the error:  VFIO_MAP_DMA
> failed: File exists. vfio_dma_map(0x56443d20fbe0, 0xe000, 0x1000,
> 0x7fb545709000) = -17 (File exists)
> [2] start a VM with virtio-iommu + 2 ice PFs via libvirt-9.5 + qemu-kvm 8.1.5
> Test result: the qemu-kvm core dump with
> ERROR:../qom/object.c:1198:object_unref: assertion failed: (obj->ref >
> 0). Bail out! ERROR:../qom/object.c:1198:object_unref: assertion
> failed: (obj->ref > 0)
This latter issue is introduced by patch
[PATCH 12/13] virtio-iommu: Resize memory region according to the max
iova info
and especially the call to

memory_region_set_size(&mr->parent_obj, max_iova + 1);

I don't really get why at the moment but I will investigate ... Eric

>
> After removing the 2 PF from the VM, both tests passed.
>
> Tested-by: Yanghang Liu 
>
> Best Regards,
> YangHang Liu
>
>
> On Mon, Sep 4, 2023 at 4:08 PM Eric Auger  wrote:
>> On x86, when assigning VFIO-PCI devices protected with virtio-iommu
>> we encounter the case where the guest tries to map IOVAs beyond 48b
>> whereas the physical VTD IOMMU only supports 48b. This ends up with
>> VFIO_MAP_DMA failures at qemu level because at kernel level,
>> vfio_iommu_iova_dma_valid() check returns false on vfio_map_do_map().
>>
>> This is due to the fact the virtio-iommu currently unconditionally
>> exposes an IOVA range of 64b through its config input range fields.
>>
>> This series removes this assumption by retrieving the usable IOVA
>> regions through the VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE UAPI when
>> a VFIO device is attached. This info is communicated to the
>> virtio-iommu memory region, transformed into the inversed info, ie.
>> the host reserved IOVA regions. Then those latter are combined with the
>> reserved IOVA regions set though the virtio-iommu reserved-regions
>> property. That way, the guest virtio-iommu driver, unchanged, is
>> able to probe the whole set of reserved regions and prevent any IOVA
>> belonging to those ranges from beeing used, achieving the original goal.
>>
>> Best Regards
>>
>> Eric
>>
>> This series can be found at:
>> https://github.com/eauger/qemu/tree/virtio-iommu_geometry_v1
>>
>> Eric Auger (13):
>>   memory: Let ReservedRegion use Range
>>   memory: Introduce memory_region_iommu_set_iova_ranges
>>   vfio: Collect container iova range info
>>   virtio-iommu: Rename reserved_regions into prop_resv_regions
>>   virtio-iommu: Introduce per IOMMUDevice reserved regions
>>   range: Introduce range_inverse_array()
>>   virtio-iommu: Implement set_iova_ranges() callback
>>   range: Make range_compare() public
>>   util/reserved-region: Add new ReservedRegion helpers
>>   virtio-iommu: Consolidate host reserved regions and property set ones
>>   test: Add some tests for range and resv-mem helpers
>>   virtio-iommu: Resize memory region according to the max iova info
>>   vfio: Remove 64-bit IOVA address space assumption
>>
>>  include/exec/memory.h|  30 -
>>  include/hw/vfio/vfio-common.h|   2 +
>>  include/hw/virtio/virtio-iommu.h |   7 +-
>>  include/qemu/range.h |   9 ++
>>  include/qemu/reserved-region.h   |  32 +
>>  hw/core/qdev-properties-system.c |   9 +-
>>  hw/vfio/common.c |  70 ---
>>  hw/virtio/virtio-iommu-pci.c |   8 +-
>>  hw/virtio/virtio-iommu.c |  85 +++--
>>  softmmu/memory.c |  15 +++
>>  tests/unit/test-resv-mem.c   | 198 +++
>>  util/range.c |  41 ++-
>>  util/reserved-region.c   |  94 +++
>>  hw/virtio/trace-events   |   1 +
>>  tests/unit/meson.build   |   1 +
>>  util/meson.build |   1 +
>>  16 files changed, 562 insertions(+), 41 deletions(-)
>>  create mode 100644 include/qemu/reserved-region.h
>>  create mode 100644 tests/unit/test-resv-mem.c
>>  create mode 100644 util/reserved-region.c
>>
>> --
>> 2.41.0
>>
>>

Re: [risu PATCH 2/4] s390x: Add simple s390x.risu file

2023-09-05 Thread Thomas Huth


On 04/09/2023 16.20, Ilya Leoshkevich wrote:

On Mon, 2023-09-04 at 16:00 +0200, Thomas Huth wrote:

This only adds a limited set of s390x instructions for initial
testing.
More instructions will be added later.

Signed-off-by: Thomas Huth 
---
  s390x.risu | 48 
  1 file changed, 48 insertions(+)
  create mode 100644 s390x.risu


Can this be somehow automatically derived from
target/s390x/tcg/insn-data.h.inc?


Hmm, maybe ... OTOH, if something is wrong in that file, you won't find the 
bug with RISU is you used the same source, I guess...



Acked-by: Ilya Leoshkevich 


Thanks!

 Thomas

deadlock when using iothread during backup_clean()

2023-09-05 Thread Fiona Ebner

Hi,

as the title says, a deadlock is possible in backup_clean() when using a
drive with an IO thread. The main thread keeps waiting in
bdrv_graph_wrlock() for reader_count() to become 0, while the IO thread
waits for its AioContext lock, which the main thread holds (because it
also is the backup job's AioContext). See the stack trace below [1].

The reason why it happens becomes clear with following commit message:

> commit 31b2ddfea304afc498aca8cac171020ef33eb89b
> Author: Kevin Wolf 
> Date:   Mon Jun 5 10:57:10 2023 +0200
> 
> graph-lock: Unlock the AioContext while polling
> 
> If the caller keeps the AioContext lock for a block node in an iothread,
> polling in bdrv_graph_wrlock() deadlocks if the condition isn't
> fulfilled immediately.
> 
> Now that all callers make sure to actually have the AioContext locked
> when they call bdrv_replace_child_noperm() like they should, we can
> change bdrv_graph_wrlock() to take a BlockDriverState whose AioContext
> lock the caller holds (NULL if it doesn't) and unlock it temporarily
> while polling.
>

and noting that bdrv_graph_wrlock() is called with bs being NULL while
the caller holds the IO thread's AioContext lock.

The question is where should the IO thread's AioContext lock be dropped
by the main thread? The "Graph locking part 4 (node management)" series
[0] (also reproduced the dealock with that applied) moves the
bdrv_graph_wrlock() further up to block_job_remove_all_bdrv(), but it
still passes along NULL as the argument.

Can we assume block_job_remove_all_bdrv() to always hold the job's
AioContext? And if yes, can we just tell bdrv_graph_wrlock() that it
needs to release that before polling to fix the deadlock?

Best Regards,
Fiona

[0]: https://lists.nongnu.org/archive/html/qemu-devel/2023-08/msg02831.html

[1]:

IO thread:

> Thread 3 (Thread 0x7fa2209596c0 (LWP 119031) "qemu-system-x86"):
> #0  futex_wait (private=0, expected=2, futex_word=0x5648fc93b2c0) at 
> ../sysdeps/nptl/futex-internal.h:146
> #1  __GI___lll_lock_wait (futex=futex@entry=0x5648fc93b2c0, private=0) at 
> ./nptl/lowlevellock.c:49
> #2  0x7fa2240e532a in lll_mutex_lock_optimized (mutex=0x5648fc93b2c0) at 
> ./nptl/pthread_mutex_lock.c:48
> #3  ___pthread_mutex_lock (mutex=0x5648fc93b2c0) at 
> ./nptl/pthread_mutex_lock.c:128
> #4  0x5648fa1742f8 in qemu_mutex_lock_impl (mutex=0x5648fc93b2c0, 
> file=0x5648fa446579 "../util/async.c", line=728) at 
> ../util/qemu-thread-posix.c:94
> #5  0x5648fa174528 in qemu_rec_mutex_lock_impl (mutex=0x5648fc93b2c0, 
> file=0x5648fa446579 "../util/async.c", line=728) at 
> ../util/qemu-thread-posix.c:149
> #6  0x5648fa18dd6f in aio_context_acquire (ctx=0x5648fc93b260) at 
> ../util/async.c:728
> #7  0x5648fa18dce4 in aio_co_enter (ctx=0x5648fc93b260, 
> co=0x7fa217f49210) at ../util/async.c:710
> #8  0x5648fa18dc31 in aio_co_wake (co=0x7fa217f49210) at 
> ../util/async.c:695
> #9  0x5648fa08e4a2 in luring_process_completions (s=0x5648fdf2dd00) at 
> ../block/io_uring.c:216
> #10 0x5648fa08e6c7 in luring_process_completions_and_submit 
> (s=0x5648fdf2dd00) at ../block/io_uring.c:268
> #11 0x5648fa08e727 in qemu_luring_completion_cb (opaque=0x5648fdf2dd00) 
> at ../block/io_uring.c:284
> #12 0x5648fa16f433 in aio_dispatch_handler (ctx=0x5648fc93b260, 
> node=0x5648fdf2de50) at ../util/aio-posix.c:372
> #13 0x5648fa16f506 in aio_dispatch_ready_handlers (ctx=0x5648fc93b260, 
> ready_list=0x7fa220954180) at ../util/aio-posix.c:401
> #14 0x5648fa16ff7d in aio_poll (ctx=0x5648fc93b260, blocking=true) at 
> ../util/aio-posix.c:723
> #15 0x5648f9fbaa59 in iothread_run (opaque=0x5648fc5ed6b0) at 
> ../iothread.c:63
> #16 0x5648fa175046 in qemu_thread_start (args=0x5648fc93b8f0) at 
> ../util/qemu-thread-posix.c:541
> #17 0x7fa2240e2044 in start_thread (arg=) at 
> ./nptl/pthread_create.c:442
> #18 0x7fa2241625fc in clone3 () at 
> ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81
> 

main thread:

> Thread 1 (Thread 0x7fa2214bf340 (LWP 119029) "qemu-system-x86"):
> #0  0x7fa224155136 in __ppoll (fds=0x5648fdf2ce50, nfds=2, 
> timeout=, sigmask=0x0) at ../sysdeps/unix/sysv/linux/ppoll.c:42
> #1  0x5648fa193102 in qemu_poll_ns (fds=0x5648fdf2ce50, nfds=2, 
> timeout=-1) at ../util/qemu-timer.c:339
> #2  0x5648fa17042f in fdmon_poll_wait (ctx=0x5648fc6c9740, 
> ready_list=0x72757260, timeout=-1) at ../util/fdmon-poll.c:79
> #3  0x5648fa16fd6b in aio_poll (ctx=0x5648fc6c9740, blocking=true) at 
> ../util/aio-posix.c:670
> #4  0x5648f9ffc0bc in bdrv_graph_wrlock (bs=0x0) at 
> ../block/graph-lock.c:145
> #5  0x5648f9fc78fa in bdrv_replace_child_noperm (child=0x5648fd729fd0, 
> new_bs=0x0) at ../block.c:2897
> #6  0x5648f9fc8487 in bdrv_root_unref_child (child=0x5648fd729fd0) at 
> ../block.c:3227
> #7  0x5648f9fd4b50 in block_job_remove_all_bdrv (job=0x5648fe0b6960) at 
> ../blockjob.c:208
> #8  0x5648f9feb11b in bac

Re: [virtio-dev] [RFC PATCH v2] docs/interop: define PROBE feature for vhost-user VirtIO devices

2023-09-05 Thread Albert Esteve

On Tue, Sep 5, 2023 at 11:43 AM Alex Bennée  wrote:

>
> Albert Esteve  writes:
>
> > This looks great! Thanks for this proposal.
> >
> > On Fri, Sep 1, 2023 at 1:00 PM Alex Bennée 
> wrote:
> >
> >  Currently QEMU has to know some details about the VirtIO device
> >  supported by a vhost-user daemon to be able to setup the guest. This
> >  makes it hard for QEMU to add support for additional vhost-user
> >  daemons without adding specific stubs for each additional VirtIO
> >  device.
> >
> >  This patch suggests a new feature flag (VHOST_USER_PROTOCOL_F_PROBE)
> >  which the back-end can advertise which allows a probe message to be
> >  sent to get all the details QEMU needs to know in one message.
> >
> >  Together with the existing features VHOST_USER_PROTOCOL_F_STATUS and
> >  VHOST_USER_PROTOCOL_F_CONFIG we can create "standalone" vhost-user
> >  daemons which are capable of handling all aspects of the VirtIO
> >  transactions with only a generic stub on the QEMU side. These daemons
> >  can also be used without QEMU in situations where there isn't a full
> >  VMM managing their setup.
> >
> >  Signed-off-by: Alex Bennée 
> >
> >  ---
> >  v2
> >- dropped F_STANDALONE in favour of F_PROBE
> >- split probe details across several messages
> >- probe messages don't automatically imply a standalone daemon
> >- add wording where probe details interact (F_MQ/F_CONFIG)
> >- define VMM and make clear QEMU is only one of many potential VMMs
> >- reword commit message
> >  ---
> >   docs/interop/vhost-user.rst | 90 -
> >   hw/virtio/vhost-user.c  |  8 
> >   2 files changed, 88 insertions(+), 10 deletions(-)
> >
> >  diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
> >  index 5a070adbc1..ba3b5e07b7 100644
> >  --- a/docs/interop/vhost-user.rst
> >  +++ b/docs/interop/vhost-user.rst
> >  @@ -7,6 +7,7 @@ Vhost-user Protocol
> >   ..
> > Copyright 2014 Virtual Open Systems Sarl.
> > Copyright 2019 Intel Corporation
> >  +  Copyright 2023 Linaro Ltd
> > Licence: This work is licensed under the terms of the GNU GPL,
> >  version 2 or later. See the COPYING file in the top-level
> >  directory.
> >  @@ -27,17 +28,31 @@ The protocol defines 2 sides of the communication,
> *front-end* and
> >   *back-end*. The *front-end* is the application that shares its
> virtqueues, in
> >   our case QEMU. The *back-end* is the consumer of the virtqueues.
> >
> >  -In the current implementation QEMU is the *front-end*, and the
> *back-end*
> >  -is the external process consuming the virtio queues, for example a
> >  -software Ethernet switch running in user space, such as Snabbswitch,
> >  -or a block device back-end processing read & write to a virtual
> >  -disk. In order to facilitate interoperability between various back-end
> >  -implementations, it is recommended to follow the :ref:`Backend program
> >  -conventions `.
> >  +In the current implementation a Virtual Machine Manager (VMM) such as
> >  +QEMU is the *front-end*, and the *back-end* is the external process
> >  +consuming the virtio queues, for example a software Ethernet switch
> >  +running in user space, such as Snabbswitch, or a block device back-end
> >  +processing read & write to a virtual disk. In order to facilitate
> >  +interoperability between various back-end implementations, it is
> >  +recommended to follow the :ref:`Backend program conventions
> >  +`.
> >
> >   The *front-end* and *back-end* can be either a client (i.e.
> connecting) or
> >   server (listening) in the socket communication.
> >
> >  +Probing device details
> >  +--
> >  +
> >  +Traditionally the vhost-user daemon *back-end* shares configuration
> >  +responsibilities with the VMM *front-end* which needs to know certain
> >  +key bits of information about the device. This means the VMM needs to
> >  +define at least a minimal stub for each VirtIO device it wants to
> >  +support. If the daemon supports the right set of protocol features the
> >  +VMM can probe the daemon for the information it needs to setup the
> >  +device. See :ref:`Probing features for standalone daemons
> >  +` for more details.
> >  +
> >  +
> >   Support for platforms other than Linux
> >   --
> >
> >  @@ -316,6 +331,7 @@ replies. Here is a list of the ones that do:
> >   * ``VHOST_USER_GET_VRING_BASE``
> >   * ``VHOST_USER_SET_LOG_BASE`` (if ``VHOST_USER_PROTOCOL_F_LOG_SHMFD``)
> >   * ``VHOST_USER_GET_INFLIGHT_FD`` (if
> ``VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD``)
> >  +* ``VHOST_USER_GET_BACKEND_SPECS`` (if
> ``VHOST_USER_PROTOCOL_F_STANDALONE``)
> >
> >   .. seealso::
> >
> >  @@ -396,9 +412,10 @@ must support changing some configuration aspects
> on the fly.
> >   Multiple queue support
> >   --
> >
> >  -Many devices have a fixed number of virtqueues.  In this case the
> front-end
> >  -already knows the numb

[PATCH] docs/system/replay: do not show removed command line option

2023-09-05 Thread Paolo Bonzini

Cc: qemu-triv...@nongnu.org
Signed-off-by: Paolo Bonzini 
---
 docs/system/replay.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/system/replay.rst b/docs/system/replay.rst
index 3105327423c..ca7c17c63da 100644
--- a/docs/system/replay.rst
+++ b/docs/system/replay.rst
@@ -181,7 +181,7 @@ Audio data is recorded and replay automatically. The 
command line for recording
 and replaying must contain identical specifications of audio hardware, e.g.:
 
 .. parsed-literal::
--soundhw ac97
+-audio pa,model=ac97
 
 Serial ports
 
-- 
2.41.0

Re: QEMU features useful for Xen development?

2023-09-05 Thread Alex Bennée



Ayan Kumar Halder  writes:

> Hi Peter/Alex,
>
> Appreciate your help. :)
>
> On 31/08/2023 11:03, Peter Maydell wrote:
>> On Thu, 31 Aug 2023 at 10:53, Alex Bennée  wrote:
>>>
>>> Peter Maydell  writes:
>>>
 On Thu, 31 Aug 2023 at 01:57, Stefano Stabellini  
 wrote:
> As Xen is gaining R52 and R82 support, it would be great to be able to
> use QEMU for development and testing there as well, but I don't think
> QEMU can emulate EL2 properly for the Cortex-R architecture. We would
> need EL2 support in the GIC/timer for R52/R82 as well.
 We do actually have a Cortex-R52 model which at least in theory
 should include EL2 support, though as usual with newer QEMU
 stuff it quite likely has lurking bugs; I'm not sure how much
 testing it's had. Also there is currently no board model which
 will work with the Cortex-R52 so it's a bit tricky to use in practice.
 (What sort of board model would Xen want to use it with?)
>>> We already model a bunch of the mps2/mps3 images so I'm assuming adding
>>> the mps3-an536 would be a fairly simple step to do (mps2tz.c is mostly
>>> tweaking config values). The question is would it be a useful target for
>>> Xen?
>> All our MPS2/MPS3 boards are M-profile. That means we have the
>> device models for all the interesting devices on the board, but
>> it would be simpler to write the an536 board model separately.
>> (In particular, the M-profile boards are wrappers around an
>> "ARMSSE" sort-of-like-an-SoC component; there's no equivalent
>> for the Cortex-R52.)
>>
>>>https://developer.arm.com/documentation/dai0536/latest/
>
> Yes, it will be helpful if Qemu can model this board. We have a
> downstream port of Xen on R52 (upstreaming is in progress).
>
> So, we can test the Qemu model with Xen.
>
> Also if all works fine, we might consider adding this to the upstream
> Xen CI docker.
>
> Out of curiosity, are you planning to add Qemu R52 SoC support to Zephyr ?

Generally enabling other software platforms is out of scope for the QEMU
team as we have plenty enough to do in QEMU itself. However its
certainly useful to have images we can test with.

Eyeballing the Zephyr docs it looks like it already supports some
R-profile cores on various boards, including CPU_CORTEX_R52 for the
NXPS32Z/E board. The BSP sections mostly look like config fragments but
I'm not really familiar with how Zephyr goes together.

I can ask our micro-controller experts what might be missing and need
implementing but I can't commit them to work on it ;-)

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro

[PATCH] audio: remove QEMU_AUDIO_* and -audio-help support

2023-09-05 Thread Paolo Bonzini

These have been deprecated for a long time, and the introduction
of -audio in 7.1.0 has cemented the new way of specifying an
audio backend.  Remove all the associated baggage, including the
concept of default audio backends.

Signed-off-by: Paolo Bonzini 
---
 audio/alsaaudio.c   |   1 -
 audio/audio.c   |  44 +--
 audio/audio.h   |   1 -
 audio/audio_int.h   |   2 -
 audio/audio_legacy.c| 591 
 audio/coreaudio.m   |   1 -
 audio/dbusaudio.c   |   1 -
 audio/dsoundaudio.c |   1 -
 audio/jackaudio.c   |   1 -
 audio/meson.build   |   1 -
 audio/noaudio.c |   1 -
 audio/ossaudio.c|   1 -
 audio/paaudio.c |   1 -
 audio/pwaudio.c |   1 -
 audio/sdlaudio.c|   1 -
 audio/sndioaudio.c  |   1 -
 audio/wavaudio.c|   1 -
 docs/about/deprecated.rst   |   8 -
 docs/about/removed-features.rst |   6 +
 qemu-options.hx |  10 -
 softmmu/vl.c|   4 -
 21 files changed, 10 insertions(+), 669 deletions(-)
 delete mode 100644 audio/audio_legacy.c

diff --git a/audio/alsaaudio.c b/audio/alsaaudio.c
index 057571dd1e0..5ffb39c4182 100644
--- a/audio/alsaaudio.c
+++ b/audio/alsaaudio.c
@@ -960,7 +960,6 @@ static struct audio_driver alsa_audio_driver = {
 .init   = alsa_audio_init,
 .fini   = alsa_audio_fini,
 .pcm_ops= &alsa_pcm_ops,
-.can_be_default = 1,
 .max_voices_out = INT_MAX,
 .max_voices_in  = INT_MAX,
 .voice_size_out = sizeof (ALSAVoiceOut),
diff --git a/audio/audio.c b/audio/audio.c
index 90c7c49d116..13f519611f6 100644
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -111,8 +111,6 @@ const struct mixeng_volume nominal_volume = {
 #endif
 };
 
-static bool legacy_config = true;
-
 int audio_bug (const char *funcname, int cond)
 {
 if (cond) {
@@ -1712,46 +1710,14 @@ static AudioState *audio_init(Audiodev *dev, const char 
*name)
 /* silence gcc warning about uninitialized variable */
 AudiodevListHead head = QSIMPLEQ_HEAD_INITIALIZER(head);
 
-if (using_spice) {
-/*
- * When using spice allow the spice audio driver being picked
- * as default.
- *
- * Temporary hack.  Using audio devices without explicit
- * audiodev= property is already deprecated.  Same goes for
- * the -soundhw switch.  Once this support gets finally
- * removed we can also drop the concept of a default audio
- * backend and this can go away.
- */
-driver = audio_driver_lookup("spice");
-if (driver) {
-driver->can_be_default = 1;
-}
-}
-
 if (dev) {
 /* -audiodev option */
-legacy_config = false;
 drvname = AudiodevDriver_str(dev->driver);
 } else if (!QTAILQ_EMPTY(&audio_states)) {
-if (!legacy_config) {
-dolog("Device %s: audiodev default parameter is deprecated, please 
"
-  "specify audiodev=%s\n", name,
-  QTAILQ_FIRST(&audio_states)->dev->id);
-}
+dolog("Device %s: audiodev default parameter is deprecated, please "
+  "specify audiodev=%s\n", name,
+  QTAILQ_FIRST(&audio_states)->dev->id);
 return QTAILQ_FIRST(&audio_states);
-} else {
-/* legacy implicit initialization */
-head = audio_handle_legacy_opts();
-/*
- * In case of legacy initialization, all Audiodevs in the list will 
have
- * the same configuration (except the driver), so it doesn't matter 
which
- * one we chose.  We need an Audiodev to set up AudioState before we 
can
- * init a driver.  Also note that dev at this point is still in the
- * list.
- */
-dev = QSIMPLEQ_FIRST(&head)->dev;
-audio_validate_opts(dev, &error_abort);
 }
 
 s = g_new0(AudioState, 1);
@@ -1876,9 +1842,7 @@ CaptureVoiceOut *AUD_add_capture(
 struct capture_callback *cb;
 
 if (!s) {
-if (!legacy_config) {
-dolog("Capturing without setting an audiodev is deprecated\n");
-}
+dolog("Capturing without setting an audiodev is deprecated\n");
 s = audio_init(NULL, NULL);
 }
 
diff --git a/audio/audio.h b/audio/audio.h
index 01bdc567fb1..a276ec13eba 100644
--- a/audio/audio.h
+++ b/audio/audio.h
@@ -172,7 +172,6 @@ void audio_define(Audiodev *audio);
 void audio_parse_option(const char *opt);
 bool audio_init_audiodevs(void);
 void audio_help(void);
-void audio_legacy_help(void);
 
 AudioState *audio_state_by_name(const char *name);
 const char *audio_get_id(QEMUSoundCard *card);
diff --git a/audio/audio_int.h b/audio/audio_int.h
index e57ff50155a..e6fee5ba4b7 100644
--- a/audio/audio_int.h
+++ b/audio/audio_int.h
@@ -146,7 +146,6 @@ struct audio_dri

Re: [PATCH] docs/system/replay: do not show removed command line option

2023-09-05 Thread Thomas Huth


On 05/09/2023 12.07, Paolo Bonzini wrote:

Cc: qemu-triv...@nongnu.org
Signed-off-by: Paolo Bonzini 
---
  docs/system/replay.rst | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/system/replay.rst b/docs/system/replay.rst
index 3105327423c..ca7c17c63da 100644
--- a/docs/system/replay.rst
+++ b/docs/system/replay.rst
@@ -181,7 +181,7 @@ Audio data is recorded and replay automatically. The 
command line for recording
  and replaying must contain identical specifications of audio hardware, e.g.:
  
  .. parsed-literal::

--soundhw ac97
+-audio pa,model=ac97
  
  Serial ports

  


Reviewed-by: Thomas Huth

Re: [PATCH] audio: remove QEMU_AUDIO_* and -audio-help support

2023-09-05 Thread Thomas Huth


On 05/09/2023 12.19, Paolo Bonzini wrote:

These have been deprecated for a long time, and the introduction
of -audio in 7.1.0 has cemented the new way of specifying an
audio backend.  Remove all the associated baggage, including the
concept of default audio backends.

Signed-off-by: Paolo Bonzini 
---
  audio/alsaaudio.c   |   1 -
  audio/audio.c   |  44 +--
  audio/audio.h   |   1 -
  audio/audio_int.h   |   2 -
  audio/audio_legacy.c| 591 
  audio/coreaudio.m   |   1 -
  audio/dbusaudio.c   |   1 -
  audio/dsoundaudio.c |   1 -
  audio/jackaudio.c   |   1 -
  audio/meson.build   |   1 -
  audio/noaudio.c |   1 -
  audio/ossaudio.c|   1 -
  audio/paaudio.c |   1 -
  audio/pwaudio.c |   1 -
  audio/sdlaudio.c|   1 -
  audio/sndioaudio.c  |   1 -
  audio/wavaudio.c|   1 -
  docs/about/deprecated.rst   |   8 -
  docs/about/removed-features.rst |   6 +
  qemu-options.hx |  10 -
  softmmu/vl.c|   4 -
  21 files changed, 10 insertions(+), 669 deletions(-)
  delete mode 100644 audio/audio_legacy.c

diff --git a/audio/alsaaudio.c b/audio/alsaaudio.c
index 057571dd1e0..5ffb39c4182 100644
--- a/audio/alsaaudio.c
+++ b/audio/alsaaudio.c
@@ -960,7 +960,6 @@ static struct audio_driver alsa_audio_driver = {
  .init   = alsa_audio_init,
  .fini   = alsa_audio_fini,
  .pcm_ops= &alsa_pcm_ops,
-.can_be_default = 1,
  .max_voices_out = INT_MAX,
  .max_voices_in  = INT_MAX,
  .voice_size_out = sizeof (ALSAVoiceOut),
diff --git a/audio/audio.c b/audio/audio.c
index 90c7c49d116..13f519611f6 100644
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -111,8 +111,6 @@ const struct mixeng_volume nominal_volume = {
  #endif
  };
  
-static bool legacy_config = true;

-
  int audio_bug (const char *funcname, int cond)
  {
  if (cond) {
@@ -1712,46 +1710,14 @@ static AudioState *audio_init(Audiodev *dev, const char 
*name)
  /* silence gcc warning about uninitialized variable */
  AudiodevListHead head = QSIMPLEQ_HEAD_INITIALIZER(head);
  
-if (using_spice) {

-/*
- * When using spice allow the spice audio driver being picked
- * as default.
- *
- * Temporary hack.  Using audio devices without explicit
- * audiodev= property is already deprecated.  Same goes for
- * the -soundhw switch.  Once this support gets finally
- * removed we can also drop the concept of a default audio
- * backend and this can go away.
- */
-driver = audio_driver_lookup("spice");
-if (driver) {
-driver->can_be_default = 1;
-}
-}
-
  if (dev) {
  /* -audiodev option */
-legacy_config = false;
  drvname = AudiodevDriver_str(dev->driver);
  } else if (!QTAILQ_EMPTY(&audio_states)) {
-if (!legacy_config) {
-dolog("Device %s: audiodev default parameter is deprecated, please 
"
-  "specify audiodev=%s\n", name,
-  QTAILQ_FIRST(&audio_states)->dev->id);
-}
+dolog("Device %s: audiodev default parameter is deprecated, please "
+  "specify audiodev=%s\n", name,
+  QTAILQ_FIRST(&audio_states)->dev->id);


You said that you removed the default handling ... so this log message looks 
suspicious ... is this code block still required?



  return QTAILQ_FIRST(&audio_states);
-} else {
-/* legacy implicit initialization */
-head = audio_handle_legacy_opts();
-/*
- * In case of legacy initialization, all Audiodevs in the list will 
have
- * the same configuration (except the driver), so it doesn't matter 
which
- * one we chose.  We need an Audiodev to set up AudioState before we 
can
- * init a driver.  Also note that dev at this point is still in the
- * list.
- */
-dev = QSIMPLEQ_FIRST(&head)->dev;
-audio_validate_opts(dev, &error_abort);
  }


 Thomas

Re: 'check-avocado' fails after c03f57fd5b ("Revert "tests: Use separate ...")

2023-09-05 Thread Paolo Bonzini

On Fri, Sep 1, 2023 at 4:36 PM Daniel Henrique Barboza
 wrote:
>
> Hi,
>
> FWIW I am unable to run 'check-avocado' after commit c03f57fd5b ("Revert 
> "tests: Use
> separate virtual environment for avocado"). The error being thrown:
>
> [20/20] Generating docs/QEMU man pages with a custom command
>VENVPIP install -e /home/danielhb/work/test/qemu/python/
> /home/danielhb/work/test/qemu/build/pyvenv/bin/python3 -B 
> python/scripts/mkvenv.py ensuregroup --online 
> /home/danielhb/work/test/qemu/pythondeps.toml avocado
> mkvenv: checking for avocado-framework(>=88.1, <93.0)
> mkvenv: checking for pycdlib>=1.11.0
>AVOCADO tests/avocado
> /home/danielhb/work/test/qemu/build/pyvenv/bin/python3: No module named 
> avocado.__main__; 'avocado' is a package and cannot be directly executed
> make: *** [/home/danielhb/work/test/qemu/tests/Makefile.include:139: 
> check-avocado] Error 1

Can you run it with "V=1" and also "cat
/home/danielhb/work/test/qemu/build/pyvenv/bin/avocado" please?

Thanks,

Paolo

Re: [PATCH] docs/system/replay: do not show removed command line option

2023-09-05 Thread Philippe Mathieu-Daudé


On 5/9/23 12:07, Paolo Bonzini wrote:

Cc: qemu-triv...@nongnu.org
Signed-off-by: Paolo Bonzini 
---
  docs/system/replay.rst | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)


Reviewed-by: Philippe Mathieu-Daudé

Re: CXL Namespaces of ACPI disappearing in Qemu demo

2023-09-05 Thread Yuquan Wang

Hi, Jonathan
On 2023-09-04 20:43,  jonathan.cameron wrote:
> 
> At the system design level, MMIO space of Root complex register space via RCRB
> does not map in a similar fashion to PCIE MMIO space (which is handled via
> address decoding in the PCIE fabric). It is much more similar to MMIO for 
> platform
> devices - as such the implementation handles in like a platform device (well 
> 16 of
> them which seemed enough for any sane usecase).
> 
> 

Oh,thanks! According to above, therefore, the core factor is the implementation 
of RCRB.

> 
> So in theory we could make some space for the CXL root bridge RCRB registers
> but it would make various generic paths more complex.  In a real system
> those registers are likely to be far from the PCI MMIO space anyway so the
> way it's modeled is probably more realistic than pushing the RCRB into the
> existing allocation.
> 

Here implies that all CXL root bridge will use RCRB registers.

From Table 8-17 and Figure 9-14 in CXL3.0 specification, I understood that only 
RCH DP &
RCD UP will use RCRBs, and CXL host bridges VH mode will use other way to 
realize
the CHBCR. I had tried to find more explanation in CXL spec, but I haven't 
found. Hence 
this is why I am confused.

Many thanks
Yuquan

Re: [PATCH v4 00/14] simpletrace: refactor and general improvements

2023-09-05 Thread Mads Ynddal



> On 23 Aug 2023, at 10.54, Mads Ynddal  wrote:
> 
> From: Mads Ynddal 
> 
> I wanted to use simpletrace.py for an internal project, so I tried to update
> and polish the code. Some of the commits resolve specific issues, while some
> are more subjective.
> 
> I've tried to divide it into commits so we can discuss the
> individual changes, and I'm ready to pull things out, if it isn't needed.
> 
> v4:
> * Added missing Analyzer2 to __all__
> * Rebased with master
> v3:
> * Added __all__ with public interface
> * Added comment about magic numbers and structs from Stefan Hajnoczi
> * Reintroduced old interface for process, run and Analyzer
> * Added comment about Python 3.6 in ref. to getfullargspec
> * process now accepts events as file-like objects
> * Updated context-manager code for Analyzer
> * Moved logic of event processing to Analyzer class
> * Moved logic of process into _process function
> * Added new Analyzer2 class with kwarg event-processing
> * Reverted changes to process-call in scripts/analyse-locks-simpletrace.py
> v2:
> * Added myself as maintainer of simpletrace.py
> * Improve docstring on `process`
> * Changed call to `process` in scripts/analyse-locks-simpletrace.py to 
> reflect new argument types
> * Replaced `iteritems()` with `items()` in 
> scripts/analyse-locks-simpletrace.py to support Python 3
> 
> Mads Ynddal (14):
>  simpletrace: add __all__ to define public interface
>  simpletrace: annotate magic constants from QEMU code
>  simpletrace: improve parsing of sys.argv; fix files never closed.
>  simpletrace: changed naming of edict and idtoname to improve
>readability
>  simpletrace: update code for Python 3.11
>  simpletrace: improved error handling on struct unpack
>  simpletrace: define exception and add handling
>  simpletrace: made Analyzer into context-manager
>  simpletrace: refactor to separate responsibilities
>  simpletrace: move logic of process into internal function
>  simpletrace: move event processing to Analyzer class
>  simpletrace: added simplified Analyzer2 class
>  MAINTAINERS: add maintainer of simpletrace.py
>  scripts/analyse-locks-simpletrace.py: changed iteritems() to items()
> 
> MAINTAINERS  |   6 +
> scripts/analyse-locks-simpletrace.py |   2 +-
> scripts/simpletrace-benchmark.zip| Bin 0 -> 4809 bytes
> scripts/simpletrace.py   | 362 ++-
> 4 files changed, 247 insertions(+), 123 deletions(-)
> create mode 100644 scripts/simpletrace-benchmark.zip
> 
> -- 
> 2.38.1
> 

Ping :)

Re: 'check-avocado' fails after c03f57fd5b ("Revert "tests: Use separate ...")

2023-09-05 Thread Paolo Bonzini

On Tue, Sep 5, 2023 at 12:39 PM Paolo Bonzini  wrote:
> > /home/danielhb/work/test/qemu/build/pyvenv/bin/python3 -B 
> > python/scripts/mkvenv.py ensuregroup --online 
> > /home/danielhb/work/test/qemu/pythondeps.toml avocado
> > mkvenv: checking for avocado-framework(>=88.1, <93.0)
> > mkvenv: checking for pycdlib>=1.11.0
> >AVOCADO tests/avocado
> > /home/danielhb/work/test/qemu/build/pyvenv/bin/python3: No module named 
> > avocado.__main__; 'avocado' is a package and cannot be directly executed
> > make: *** [/home/danielhb/work/test/qemu/tests/Makefile.include:139: 
> > check-avocado] Error 1
>
> Can you run it with "V=1" and also "cat
> /home/danielhb/work/test/qemu/build/pyvenv/bin/avocado" please?

Also:

1) run the following under the pyvenv/bin/python3 REPL:

from importlib.metadata import distribution
avocado = distribution('avocado-framework')
next((x for x in avocado.entry_points if x.name == 'avocado'))

FWIW here with a similar system I get

EntryPoint(name='avocado', value='avocado.core.main:main',
group='console_scripts')

2) try running "pyvenv/bin/avocado --help" and see if it works

Paolo

Re: [PATCH] audio: remove QEMU_AUDIO_* and -audio-help support

2023-09-05 Thread Paolo Bonzini


On 9/5/23 12:39, Thomas Huth wrote:


+    dolog("Device %s: audiodev default parameter is deprecated, 
please "

+  "specify audiodev=%s\n", name,
+  QTAILQ_FIRST(&audio_states)->dev->id);


You said that you removed the default handling ... so this log message 
looks suspicious ... is this code block still required?


I was referring to the ".can_be_default" argument, maybe I should change 
"default audio backend" to "default audio driver" in the commit message? 
 Unfortunately this code (which is a separate entry in deprecated.rst) 
cannot be removed because of non-qdevified soundcards.


Paolo

Re: deadlock when using iothread during backup_clean()

2023-09-05 Thread Fiona Ebner

Am 05.09.23 um 12:01 schrieb Fiona Ebner:
> 
> Can we assume block_job_remove_all_bdrv() to always hold the job's
> AioContext? And if yes, can we just tell bdrv_graph_wrlock() that it
> needs to release that before polling to fix the deadlock?
> 

I tried a doing something similar as a proof-of-concept

> diff --git a/blockjob.c b/blockjob.c
> index 58c5d64539..1a696241a0 100644
> --- a/blockjob.c
> +++ b/blockjob.c
> @@ -198,19 +198,19 @@ void block_job_remove_all_bdrv(BlockJob *job)
>   * one to make sure that such a concurrent access does not attempt
>   * to process an already freed BdrvChild.
>   */
> -bdrv_graph_wrlock(NULL);
>  while (job->nodes) {
>  GSList *l = job->nodes;
>  BdrvChild *c = l->data;
>  
>  job->nodes = l->next;
>  
> +bdrv_graph_wrlock(c->bs);
>  bdrv_op_unblock_all(c->bs, job->blocker);
>  bdrv_root_unref_child(c);
> +bdrv_graph_wrunlock();
>  
>  g_slist_free_1(l);
>  }
> -bdrv_graph_wrunlock();
>  }

and while it did get slightly further, I ran into another deadlock with

> #0  0x7f1941155136 in __ppoll (fds=0x55992068fb20, nfds=2, 
> timeout=, sigmask=0x0) at ../sysdeps/unix/sysv/linux/ppoll.c:42
> #1  0x55991c6a1a3f in qemu_poll_ns (fds=0x55992068fb20, nfds=2, 
> timeout=-1) at ../util/qemu-timer.c:339
> #2  0x55991c67ed6c in fdmon_poll_wait (ctx=0x55991f058810, 
> ready_list=0x7ffda8c987b0, timeout=-1) at ../util/fdmon-poll.c:79
> #3  0x55991c67e6a8 in aio_poll (ctx=0x55991f058810, blocking=true) at 
> ../util/aio-posix.c:670
> #4  0x55991c50a763 in bdrv_graph_wrlock (bs=0x0) at 
> ../block/graph-lock.c:145
> #5  0x55991c4daf85 in bdrv_close (bs=0x55991fff2f30) at ../block.c:5166
> #6  0x55991c4dc050 in bdrv_delete (bs=0x55991fff2f30) at ../block.c:5606
> #7  0x55991c4df205 in bdrv_unref (bs=0x55991fff2f30) at ../block.c:7173
> #8  0x55991c4fb8ca in bdrv_cbw_drop (bs=0x55991fff2f30) at 
> ../block/copy-before-write.c:566
> #9  0x55991c4f9685 in backup_clean (job=0x55992016d0b0) at 
> ../block/backup.c:105

Re: [PATCH v4 05/14] simpletrace: update code for Python 3.11

2023-09-05 Thread Philippe Mathieu-Daudé


On 23/8/23 10:54, Mads Ynddal wrote:

From: Mads Ynddal 

The call to `getargspec` was deprecated and in Python 3.11 it has been
removed in favor of `getfullargspec`. `getfullargspec` is compatible
with QEMU's requirement of at least Python version 3.6.

Signed-off-by: Mads Ynddal 
Reviewed-by: Stefan Hajnoczi 
---
  scripts/simpletrace.py | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)


Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH v4 06/14] simpletrace: improved error handling on struct unpack

2023-09-05 Thread Philippe Mathieu-Daudé


On 23/8/23 10:54, Mads Ynddal wrote:

From: Mads Ynddal 

A failed call to `read_header` wouldn't be handled the same for the two
different code paths (one path would try to use `None` as a list).
Changed to raise exception to be handled centrally. This also allows for
easier unpacking, as errors has been filtered out.

Signed-off-by: Mads Ynddal 
---
  scripts/simpletrace.py | 41 -
  1 file changed, 16 insertions(+), 25 deletions(-)


Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH v4 11/14] simpletrace: move event processing to Analyzer class

2023-09-05 Thread Philippe Mathieu-Daudé


On 23/8/23 10:54, Mads Ynddal wrote:

From: Mads Ynddal 

Moved event processing to the Analyzer class to separate specific analyzer


"Move"

Reviewed-by: Philippe Mathieu-Daudé 


logic (like caching and function signatures) from the _process function.
This allows for new types of Analyzer-based subclasses without changing
the core code.

Note, that the fn_cache is important for performance in cases where the
analyzer is branching away from the catch-all a lot. The cache has no
measurable performance penalty.

Signed-off-by: Mads Ynddal 
---
  scripts/simpletrace.py | 60 +-
  1 file changed, 36 insertions(+), 24 deletions(-)

Re: [PATCH v4 13/14] MAINTAINERS: add maintainer of simpletrace.py

2023-09-05 Thread Philippe Mathieu-Daudé


On 23/8/23 10:54, Mads Ynddal wrote:

From: Mads Ynddal 

In my work to refactor simpletrace.py, I noticed that there's no
maintainer of it, and has the status of "odd fixes". I'm using it from
time to time, so I'd like to maintain the script.

I've added myself as reviewer under "Tracing" to be informed of changes
that might affect simpletrace.py.

Signed-off-by: Mads Ynddal 
---
  MAINTAINERS | 6 ++
  1 file changed, 6 insertions(+)


Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH v4 14/14] scripts/analyse-locks-simpletrace.py: changed iteritems() to items()

2023-09-05 Thread Philippe Mathieu-Daudé


On 23/8/23 10:54, Mads Ynddal wrote:

From: Mads Ynddal 

Python 3 removed `dict.iteritems()` in favor of `dict.items()`. This
means the script curerntly doesn't work on Python 3.


"currently"

Reviewed-by: Philippe Mathieu-Daudé 



Signed-off-by: Mads Ynddal 
---
  scripts/analyse-locks-simpletrace.py | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

Re: [PATCH] audio: remove QEMU_AUDIO_* and -audio-help support

2023-09-05 Thread Thomas Huth


On 05/09/2023 12.55, Paolo Bonzini wrote:

On 9/5/23 12:39, Thomas Huth wrote:


+    dolog("Device %s: audiodev default parameter is deprecated, 
please "

+  "specify audiodev=%s\n", name,
+  QTAILQ_FIRST(&audio_states)->dev->id);


You said that you removed the default handling ... so this log message 
looks suspicious ... is this code block still required?


I was referring to the ".can_be_default" argument, maybe I should change 
"default audio backend" to "default audio driver" in the commit message? 


No, I think it's fine. Thanks for the clarification!

Reviewed-by: Thomas Huth

Re: deadlock when using iothread during backup_clean()

2023-09-05 Thread Paolo Bonzini


On 9/5/23 12:01, Fiona Ebner wrote:

Can we assume block_job_remove_all_bdrv() to always hold the job's
AioContext?


I think so, see job_unref_locked(), job_prepare_locked() and 
job_finalize_single_locked().  These call the callbacks that ultimately 
get to block_job_remove_all_bdrv().



And if yes, can we just tell bdrv_graph_wrlock() that it
needs to release that before polling to fix the deadlock?


No, but I think it should be released and re-acquired in 
block_job_remove_all_bdrv() itself.


mirror_exit_common() however holds _two_ AioContext locks at the time it 
calls block_job_remove_all_bdrv(), qemu_get_aio_context() has to be 
released and reacquired in mirror_exit_common() itself.


Paolo

[risu PATCH v2 4/4] s390x: Update the configure script for s390x support

2023-09-05 Thread Thomas Huth

Auto-detect s390x hosts and add s390x information to the help text.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Thomas Huth 
---
 configure | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/configure b/configure
index ca2d7db..2f7c580 100755
--- a/configure
+++ b/configure
@@ -58,6 +58,8 @@ guess_arch() {
 ARCH="m68k"
 elif check_define __powerpc64__ ; then
 ARCH="ppc64"
+elif check_define __s390x__ ; then
+ARCH="s390x"
 else
 echo "This cpu is not supported by risu. Try -h. " >&2
 exit 1
@@ -139,7 +141,7 @@ Some influential environment variables:
prefixed with the given string.
 
   ARCH force target architecture instead of trying to detect it.
-   Valid values=[arm|aarch64|ppc64|ppc64le|m68k]
+   Valid values=[arm|aarch64|m68k|ppc64|ppc64le|s390x]
 
   CC   C compiler command
   CFLAGS   C compiler flags
-- 
2.39.3

[risu PATCH v2 2/4] s390x: Add simple s390x.risu file

2023-09-05 Thread Thomas Huth

This only adds a limited set of s390x instructions for initial testing.
More instructions will be added later.

Acked-by: Ilya Leoshkevich 
Signed-off-by: Thomas Huth 
---
 s390x.risu | 48 
 1 file changed, 48 insertions(+)
 create mode 100644 s390x.risu

diff --git a/s390x.risu b/s390x.risu
new file mode 100644
index 000..3ad7015
--- /dev/null
+++ b/s390x.risu
@@ -0,0 +1,48 @@
+###
+# Copyright 2023 Red Hat Inc.
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Eclipse Public License v1.0
+# which accompanies this distribution, and is available at
+# http://www.eclipse.org/legal/epl-v10.html
+#
+# Contributors:
+# Thomas Huth - initial implementation
+###
+
+.mode s390x
+
+# format:RR Add (register + register, 32 bit)
+AR Z 00011010 r1:4 r2:4
+
+# format:RRE Add (register + register, 64 bit)
+AGR Z 10111001 1000  r1:4 r2:4
+
+# format:RRE Add (register + register, 32 bit to 64 bit)
+AGFR Z 10111001 00011000  r1:4 r2:4
+
+# format:RRF-a Add (three registers, 32 bit)
+ARK STFLE45 10111001 1000 r3:4  r1:4 r2:4
+
+# format:RRF-a Add (three registers, 64 bit)
+AGRK STFLE45 10111001 11101000 r3:4  r1:4 r2:4
+
+
+# format:RRE Add Halfword Immediate (32 bit)
+AHI Z 10100111 r1:4 1010 i2:16
+
+# format:RI Add Halfword Immediate (64 bit)
+AGHI Z 10100111 r1:4 1011 i2:16
+
+
+# format:RR Add Logical (32 bit)
+ALR Z 0000 r1:4 r2:4
+
+# format:RRE Add Logical (64 bit)
+ALGR Z 10111001 1010  r1:4 r2:4
+
+# format:RRE Add Logical (32 bit to 64 bit)
+ALGFR Z 10111001 00011010  r1:4 r2:4
+
+
+# format:RRF-c Population Count
+POPCNT STFLE45 10111001 1111 m3:4  r1:4 r2:4
-- 
2.39.3

[risu PATCH v2 3/4] s390x: Add basic risugen perl module for s390x

2023-09-05 Thread Thomas Huth

This implements support for simple 16-bit and 32-bit instructions.
Support for 48-bit instructions and support for load/store memory
instructions is not implemented yet.

Signed-off-by: Thomas Huth 
---
 risugen_s390x.pm | 186 +++
 1 file changed, 186 insertions(+)
 create mode 100644 risugen_s390x.pm

diff --git a/risugen_s390x.pm b/risugen_s390x.pm
new file mode 100644
index 000..f0d03c4
--- /dev/null
+++ b/risugen_s390x.pm
@@ -0,0 +1,186 @@
+#!/usr/bin/perl -w
+###
+# Copyright 2023 Red Hat Inc.
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Eclipse Public License v1.0
+# which accompanies this distribution, and is available at
+# http://www.eclipse.org/legal/epl-v10.html
+#
+# Contributors:
+# Thomas Huth - initial implementation (based on risugen_ppc64.pm etc.)
+###
+
+# risugen -- generate a test binary file for use with risu
+# See 'risugen --help' for usage information.
+package risugen_s390x;
+
+use strict;
+use warnings;
+
+use risugen_common;
+
+require Exporter;
+
+our @ISA= qw(Exporter);
+our @EXPORT = qw(write_test_code);
+
+my $periodic_reg_random = 1;
+
+# Maximum alignment restriction permitted for a memory op.
+my $MAXALIGN = 64;
+
+sub write_mov_ri($$$)
+{
+my ($r, $imm_h, $imm_l) = @_;
+
+# LGFI
+insn16(0xc0 << 8 | $r << 4 | 0x1);
+insn32($imm_l);
+# IIHF r,imm_high
+insn16(0xc0 << 8 | $r << 4 | 0x8);
+insn32($imm_h);
+}
+
+sub write_mov_fp($$)
+{
+my ($r, $imm) = @_;
+
+write_mov_ri(0, ~$imm, $imm);
+# LDGR
+insn32(0xb3c1 << 16 | $r << 4);
+}
+
+sub write_random_regdata()
+{
+# Floating point registers
+for (my $i = 0; $i < 16; $i++) {
+write_mov_fp($i, rand(0x));
+}
+
+# Load FPC (via r0)
+write_mov_ri(0, 0, (rand(0x) & 0xfcfcff77));
+insn32(0xb384);
+
+# general purpose registers
+for (my $i = 0; $i < 16; $i++) {
+write_mov_ri($i, rand(0x), rand(0x));
+}
+}
+
+my $OP_COMPARE = 0;# compare registers
+my $OP_TESTEND = 1;# end of test, stop
+
+sub write_random_register_data()
+{
+write_random_regdata();
+write_risuop($OP_COMPARE);
+}
+
+sub gen_one_insn($$)
+{
+# Given an instruction-details array, generate an instruction
+my $constraintfailures = 0;
+
+INSN: while(1) {
+my ($forcecond, $rec) = @_;
+my $insn = int(rand(0x));
+my $insnname = $rec->{name};
+my $insnwidth = $rec->{width};
+my $fixedbits = $rec->{fixedbits};
+my $fixedbitmask = $rec->{fixedbitmask};
+my $constraint = $rec->{blocks}{"constraints"};
+my $memblock = $rec->{blocks}{"memory"};
+
+$insn &= ~$fixedbitmask;
+$insn |= $fixedbits;
+
+if (defined $constraint) {
+# user-specified constraint: evaluate in an environment
+# with variables set corresponding to the variable fields.
+my $v = eval_with_fields($insnname, $insn, $rec, "constraints", 
$constraint);
+if (!$v) {
+$constraintfailures++;
+if ($constraintfailures > 1) {
+print "1 consecutive constraint failures for $insnname 
constraints string:\n$constraint\n";
+exit (1);
+}
+next INSN;
+}
+}
+
+# OK, we got a good one
+$constraintfailures = 0;
+
+my $basereg;
+
+if (defined $memblock) {
+die "memblock handling has not been implemented yet."
+}
+
+if ($insnwidth == 16) {
+insn16(($insn >> 16) & 0x);
+} else {
+insn32($insn);
+}
+
+return;
+}
+}
+
+sub write_risuop($)
+{
+my ($op) = @_;
+insn32(0x835a0f00 | $op);
+}
+
+sub write_test_code($)
+{
+my ($params) = @_;
+
+my $condprob = $params->{ 'condprob' };
+my $numinsns = $params->{ 'numinsns' };
+my $outfile = $params->{ 'outfile' };
+
+my %insn_details = %{ $params->{ 'details' } };
+my @keys = @{ $params->{ 'keys' } };
+
+set_endian(1);
+
+open_bin($outfile);
+
+# convert from probability that insn will be conditional to
+# probability of forcing insn to unconditional
+$condprob = 1 - $condprob;
+
+# TODO better random number generator?
+srand(0);
+
+print "Generating code using patterns: @keys...\n";
+progress_start(78, $numinsns);
+
+if (grep { defined($insn_details{$_}->{blocks}->{"memory"}) } @keys) {
+write_memblock_setup();
+}
+
+# memblock setup doesn't clean its registers, so this must come afterwards.
+write_random_register_data();
+
+for my $i (1..$numinsns) {
+my $insn_enc = $keys[in

[risu PATCH v2 0/4] Add support for s390x to RISU

2023-09-05 Thread Thomas Huth

 Hi Peter!

Here are some patches that add basic support for s390x to RISU.
It's still quite limited, e.g. no support for load/store memory
operations yet, but the basics with simple 16-bit or 32-bit
instructions work already fine.

v2:
- Removed the code to avoid r14 (return address) and r15 (stack pointer)
  since it is not necessary anymore since commit ad82a069e8d6a
- Initialize the floating point registers in test_s390x.S, too
- Added Acked-bys and Reviewed-bys from v1

Thomas Huth (4):
  s390x: Add basic s390x support to the C code
  s390x: Add simple s390x.risu file
  s390x: Add basic risugen perl module for s390x
  s390x: Update the configure script for s390x support

 configure|   4 +-
 risu_reginfo_s390x.c | 140 
 risu_reginfo_s390x.h |  23 ++
 risu_s390x.c |  48 +++
 risugen_s390x.pm | 186 +++
 s390x.risu   |  48 +++
 test_s390x.S |  51 
 7 files changed, 499 insertions(+), 1 deletion(-)
 create mode 100644 risu_reginfo_s390x.c
 create mode 100644 risu_reginfo_s390x.h
 create mode 100644 risu_s390x.c
 create mode 100644 risugen_s390x.pm
 create mode 100644 s390x.risu
 create mode 100644 test_s390x.S

-- 
2.39.3

[risu PATCH v2 1/4] s390x: Add basic s390x support to the C code

2023-09-05 Thread Thomas Huth

With these changes, it is now possible to compile the "risu" binary
for s390x hosts.

Acked-by: Ilya Leoshkevich 
Signed-off-by: Thomas Huth 
---
 risu_reginfo_s390x.c | 140 +++
 risu_reginfo_s390x.h |  23 +++
 risu_s390x.c |  48 +++
 test_s390x.S |  51 
 4 files changed, 262 insertions(+)
 create mode 100644 risu_reginfo_s390x.c
 create mode 100644 risu_reginfo_s390x.h
 create mode 100644 risu_s390x.c
 create mode 100644 test_s390x.S

diff --git a/risu_reginfo_s390x.c b/risu_reginfo_s390x.c
new file mode 100644
index 000..1c6aa0c
--- /dev/null
+++ b/risu_reginfo_s390x.c
@@ -0,0 +1,140 @@
+/**
+ * Copyright 2023 Red Hat Inc.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * Thomas Huth - initial implementation
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "risu.h"
+#include "risu_reginfo_s390x.h"
+
+
+const struct option * const arch_long_opts;
+const char * const arch_extra_help;
+
+void process_arch_opt(int opt, const char *arg)
+{
+abort();
+}
+
+void arch_init(void)
+{
+}
+
+int reginfo_size(struct reginfo *ri)
+{
+return sizeof(*ri);
+}
+
+/* reginfo_init: initialize with a ucontext */
+void reginfo_init(struct reginfo *ri, ucontext_t *uc)
+{
+int i;
+
+memset(ri, 0, sizeof(*ri));
+
+ri->faulting_insn = *((uint32_t *) uc->uc_mcontext.psw.addr);
+ri->psw_mask = uc->uc_mcontext.psw.mask;
+ri->psw_addr = uc->uc_mcontext.psw.addr - image_start_address;
+
+for (i = 0; i < 16; i++) {
+ri->gregs[i] = uc->uc_mcontext.gregs[i];
+}
+
+memcpy(&ri->fpregs, &uc->uc_mcontext.fpregs, sizeof(fpregset_t));
+}
+
+/* reginfo_is_eq: compare the reginfo structs, returns nonzero if equal */
+int reginfo_is_eq(struct reginfo *m, struct reginfo *a)
+{
+int i;
+
+if (m->psw_mask != a->psw_mask || m->psw_addr != a->psw_addr) {
+return 0;
+}
+
+for (i = 0; i < 16; i++) {
+if (m->gregs[i] != a->gregs[i]) {
+return 0;
+}
+}
+
+if (memcmp(&m->fpregs, &a->fpregs, sizeof(fpregset_t))) {
+return 0;
+}
+
+return 1;
+}
+
+/* reginfo_dump: print state to a stream, returns nonzero on success */
+int reginfo_dump(struct reginfo *ri, FILE * f)
+{
+int i;
+
+fprintf(f, "  faulting insn 0x%x\n", ri->faulting_insn);
+fprintf(f, "  PSW mask  0x%" PRIx64 "\n", ri->psw_mask);
+fprintf(f, "  PSW addr offs 0x%" PRIx64 "\n\n", ri->psw_addr);
+
+for (i = 0; i < 16/2; i++) {
+fprintf(f, "\tr%d: %16lx\tr%02d: %16lx\n", i, ri->gregs[i],
+i + 8, ri->gregs[i + 8]);
+}
+fprintf(f, "\n");
+
+for (i = 0; i < 16/2; i++) {
+fprintf(f, "\tf%d: %16lx\tf%02d: %16lx\n",
+i, *(uint64_t *)&ri->fpregs.fprs[i],
+i + 8, *(uint64_t *)&ri->fpregs.fprs[i + 8]);
+}
+fprintf(f, "\tFPC: %8x\n\n", ri->fpregs.fpc);
+
+return !ferror(f);
+}
+
+int reginfo_dump_mismatch(struct reginfo *m, struct reginfo *a, FILE *f)
+{
+int i;
+
+if (m->psw_mask != a->psw_mask) {
+fprintf(f, "Mismatch: PSW mask master: [%016lx] - PSW mask apprentice: 
[%016lx]\n",
+m->psw_mask, a->psw_mask);
+}
+
+if (m->psw_addr != a->psw_addr) {
+fprintf(f, "Mismatch: PSW addr offset master: [%016lx] - PSW addr 
offset apprentice: [%016lx]\n",
+m->psw_addr, a->psw_addr);
+}
+
+for (i = 0; i < 16; i++) {
+if (m->gregs[i] != a->gregs[i]) {
+fprintf(f, "Mismatch: r%d master: [%016lx] - r%d apprentice: 
[%016lx]\n",
+i, m->gregs[i], i, a->gregs[i]);
+}
+}
+
+for (i = 0; i < 16; i++) {
+if (*(uint64_t *)&m->fpregs.fprs[i] != *(uint64_t 
*)&a->fpregs.fprs[i]) {
+fprintf(f, "Mismatch: f%d master: [%016lx] - f%d apprentice: 
[%016lx]\n",
+i, *(uint64_t *)&m->fpregs.fprs[i],
+i, *(uint64_t *)&a->fpregs.fprs[i]);
+}
+}
+
+if (m->fpregs.fpc != a->fpregs.fpc) {
+fprintf(f, "Mismatch: FPC master: [%08x] - FPC apprentice: [%08x]\n",
+m->fpregs.fpc, a->fpregs.fpc);
+}
+
+return !ferror(f);
+}
diff --git a/risu_reginfo_s390x.h b/risu_reginfo_s390x.h
new file mode 100644
index 000..b55a11d
--- /dev/null
+++ b/risu_reginfo_s390x.h
@@ -0,0 +1,23 @@
+/**
+ * Copyright 2023 Red Hat Inc.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under th

Re: [risu PATCH 0/4] Add support for s390x to RISU

2023-09-05 Thread Thomas Huth


On 04/09/2023 16.30, Ilya Leoshkevich wrote:

On Mon, 2023-09-04 at 16:00 +0200, Thomas Huth wrote:

  Hi Peter!

Here are some patches that add basic support for s390x to RISU.
It's still quite limited, e.g. no support for load/store memory
operations yet, but the basics with simple 16-bit or 32-bit
instructions work already fine.

(In the long run, we'd need to support instructions with 48-bit
length on s390x, too, since most newer "interesting" instructions
like e.g. vector SIMD instructions are encoded with 48 bit. This
will require modifications to the generic code, too, so I limited
my initial implementation to 16-bit and 32-bit instruction length
support to keep the code self-contained in the s390x architecture
specific files)


What's also interesting about SIMD, is that floating-point instructions
clobber the upper parts of vector registers. I wonder if there is a way
to systematically solve this?#


No clue yet, so far the code does not support the extended vector registers 
yet (since the weren't part of the information that is provided by the 
ucontext.h header file).


I guess it should be OK to check only the floating point part for the 
registers where it overlaps, and only check the full vector register if the 
register does not overlap ... I don't expect much difference for a vector 
instruction when it executes with register 0 - 15 compared to when it 
executes with register 16 - 31, so skipping half of the check for register 0 
- 15 shouldn't be too bad.



One other thing - for not-so-near future - is it possible to integrate
this with coverage-based fuzzers? I.e., somehow generate the
instructions based on the coverage signal. Maybe even make sure that
the signal comes from JITed code too. I wanted to try AFLplusplus in
QEMU mode for this purpose (which would ultimately run QEMU in QEMU),
but never found the time.


I don't think this is possible yet, but maybe it's be possible to write a 
TCG plugin for QEMU to dump the executed instructions into an input file for 
risu?


 Thomas

Re: [PATCH v4 12/14] simpletrace: added simplified Analyzer2 class

2023-09-05 Thread Mads Ynddal



> On 23 Aug 2023, at 10.54, Mads Ynddal  wrote:
> 
> From: Mads Ynddal 
> 
> By moving the dynamic argument construction to keyword-arguments,
> we can remove all of the specialized handling, and streamline it.
> If a tracing method wants to access these, they can define the
> kwargs, or ignore it be placing `**kwargs` at the end of the
> function's arguments list.
> 
> Added deprecation warning to Analyzer class to make users aware
> of the Analyzer2 class. No removal date is planned.
> 
> Signed-off-by: Mads Ynddal 
> ---
> scripts/simpletrace-benchmark.zip | Bin 0 -> 4809 bytes
> scripts/simpletrace.py|  78 +-
> 2 files changed, 76 insertions(+), 2 deletions(-)
> create mode 100644 scripts/simpletrace-benchmark.zip
> 
> diff --git a/scripts/simpletrace-benchmark.zip 
> b/scripts/simpletrace-benchmark.zip
> new file mode 100644
> index 
> ..1d696a8d5c49d4725c938af8bf25a59090192986
> GIT binary patch
> literal 4809
> zcmb7|^-~m#x5jtLrD4ehq*FRXq`O;??nb% zJ9F>c@2}tInKREhbLN~s;G?6CjzIOwzjqB
> z^$ax7Cjek4BSY+t{;7Y{F8~h!#JB(g0RQ{|01nLF`z$IYKjr4yFu~&OG`dpv7e;He
> znoe1lQxC!H%PeWl6BVnB{0xqmXh!%lpLBOyEU#Mm1ClW8JT~{VxYJb8CM6{$AFlmu
> zdnfrUU`%24s6j5ct15zP#`!eJS4@mmdb5(l-_iJ2O5`Ssnh(=8TReo*o0`A2sEbEk
> z4y{gjWjCG#nvsNBDVpcq{^lq9{qZ3N5 zBl?xrxNr{)Rpy(`maC5BqtM$>B?LF6geLN%GIA^dNZ zgRmFC3H%kwH2N+l>zc1R;YGX<6BV;Hb*v#H;roy4XLg&L1yqSv8O|2OHKdoeU(#f)
> z+fNkiI(^ADca?ZrRVLGST3!|A6KSN#jUdH?J@C)#_9Uvbdkn7m2|xnWH&ThVE9_~g
> zpnH-0lf!p#sH0uhrgP_qPH138f`^h4?3gI6>I&`BMINt_>B$z=(?TE0%I< zBi241hZ7A$1l9B+hc`%woFewHUchLKqHhs)KpxDLgbM{)1T7M`_PMiGmzfn`JegAD
> z@{XWGXKO8CEvI6{m0;#Z$;ydM;z;<%JQ_E1N3}4=JgoX3WZVRfsq#~5C>g%Ra$uv8
> z6=PbEGNv(Hf|td|qb~ZHb<+ZAZcN_HO~_P0hAd6 z5bl}quu(t~IZa|GYC_^X-n>8D8s1g1j$H5?MEjltRq|>+y*U4k^&^o_I{2Euf9>F}
> zq2VY^=m@y6+;s}ii5B!I{&fmlQ@N0#L@p_}_;mRXHh0fM)|~caA80naEAKal-4{W-
> zK5KCCaK{*u8d7e(adQ2`GhW9{+r?RdAm*$0c(bYCpb`Pdrx7|LndKIVu}2661?O~z
> zbKb1$dFzpGaFx<>S2rb}v`AoIDJhkCvc@Tg4!^#RR*DQ_UzIndt36)ncVPng*ym6|
> zhFQ=DlEA-on#=Y2;>aDCi=Wql+4>0`>`ba&pM{U2DOb&0PU~xmMil!(pV~zp6X|mE
> z23yqRo&_@Qh})4SjTwJnCeqW)4$;5czzy^EKII|h-WHD5bapSoH=e>UICf~Xw5gl$
> zVcB$0!fm%+UH|H;B$$=D#U*eNUXzsv?*Dv78vAJh&kFL*#;wC);_HIQtgtOA;p`gn
> zr;n51ZAEzvn`6-d$E4)#9@%f+A&o5;{fc2&{lvFZe2{TcO#q5b*$t+o7|JX>x=#*?
> zalnv|Qhjf}P##10S4^{g;^SUg=JL6vFmJ|ro~m#tQCC(nkCD<2NP1ENWESG2brS0%
> zA_7+7RMjACZ8ClN#$o>X^x2=U#PEP3ZVYJE#xi0{KFA7Ix1@&s_C9`
> z&Rb)I>axD%Ai+}uQTln`7fghU^Tb|ltxBwz5+-u$Sm`=|AQk&K&ZZ9SS$zq((J3MX
> z1{rnfRCYG4!zTLgj694Xn`?2%0U6cq1KAk~MB(y{CC#PPUo>53t
> z>BSYi-1R~0O3n(Y0!;hnAL53;18VdEMVF&Q;Np-gZag#QjJ!ITu9Qmdq8-@sx9OBY
> za2^D|#Z(AD-GtzkDF?-S=9x&;(n1smuQ*~j1Hr|sdDM`gB) z-iD)5AvXg-7Ttq`4Ywt&qMVHQPrHeh#5(2uY3*#eIV1a-mu;0279*rs3`eXHl4aiu
> zY z3Ji*py=?SOv}=6KuAv+%W6HCEJA3D`%M%20vbTO@4FNaGO2(+4)h6?1&WlkT6p
> zTTpVe74&@+)2#iT8&(SUYUr0ZR=22)Q`H@|vD7>uAc(>Z#{)3@{mE;6e1Np*M#X>}
> zr71XFf-gMnmLd#WD4p6&+)VH&2&UpJ&)!hrAv_j>g4{RlQ)tXnC%aVR+bpYf6U&hA
> z;^CGM?JAn!zNZqY9}1-?>3OY)*JT<;UmaiO9Qf`!-FMO@TH}sza^VNt=LZ)s0Pt-y
> zdtSW89R_?K@n0JBuIzqQ2~S0PR%#F_i@_z+X7M%DWY&mUF_OG?#*1&`jkSR3EWvH2
> zpwWsFDRjm=JTRhZ;WQvpc=D1eSW~#nH8337Wh(DF>*-*EQoj&6KvfuCfW7^ zpPP_yPc5pmPL7hyxae;)pbjGPW!t1k+(;HfZKrD#Rcb70=%qAiET6&N{936d!J)xF
> zKRl*?+3zd*NCr+*pe8kTORt@&Q%z+ezPpy{`2{=D#)xMguww8ELw`fSTX4tRDb1(R
> zpjuC^;XRO6P8>0e^d{nO0#FVHE4BBk$>UDFfR2bFzR8J%^H1n3bR6#F&Fan?3{t z!XMh^;7O@wgI0`Z+b^HV{Q#S(2Bt0q^pTiK#=xQGt!?p!Dbs$z)?e058|w0JMI0Kf
> z+~8O^jnRw*GwKy5Kuvvl70~pnp zpbWylXrhLIH$-9To}{78isq96UXcx15xjPfTDQOi^v!`a-B-qt1RH*$Ra?984kb{#
> z14DDl-zxdQaxe-H#G%5$*lP6*$P+m~PtMreeRYe`cORR+^Z9R1){Bt`a6a3#Byc&7
> zWrEg@M;1ryWqJ?K;ve9X5Wma(!)qS7t`YYWgJikPFZeJQ>Cl_agJ-v$%1dI^nw=Sp
> zo!?EDKA6YnGi#*wdj-WvnEX@&?!r&0#6KnOS>+!KM)(q0VHM9%EzT>68`-F3C7Sv)
> zTx&i5#acCs<4&w6u}@y4<&=y&m&ii)Lb)OfsZrGcR@7M>(5I3~CdYOb&Myjnr%gi4
> zEjCs3&Yxf_mX$I&YD^{S()uY61t_$6*WN1V=jK+!|J!ILhW86D0~|jxb9v09_G3kr
> zv*HAc6SrRV8uu2EZ(Mw*%JsRi;)@2v-n|;R_@ULyu%zmG!L!&MhdcneJ8Wg{R4vlx
> z+^4Sf_E3y#z#n$ zrM;P_Yva7=t*@I(YpWrv7<^W5h(76+-||uKZGXg*hl61j(oSqL2WTbzv+3JRX;be+
> zjmwPL;J8* zvOXuQEBK) z4od@B)mDykT*ik^J}@zVp>z6}Y?hc#cLP@>lQ3NkSVBhM4a&v5Rjm?G=`#wnE0c+}
> zJ%XN{ki8)FOC9S@yYt_bO3PAMDE58PwSE}*Wg_F6jK#rfhM#nLh8VJuCvduw|18En
> zFUV_}RNL`Tx54+ z)TSZ-dqBtGs2)7yS z30zD3tkz~|1!=nfJnrX|U3HClh`phr&laJPeSr+&aN)eW_kAr4UbLv8NzWm68Q*+*
> z8BOK3mz_O!Da2Dqe!yHIQ3jj8jsr`F+cCdygj(4gh23xdiM&tmJ*7qaG#`i6;vZH&
> zv>TJtX6zYmyx-ZAFoaql5+3<>xB+v1RFtxhAZXrkdan4%#iu)bDZf0Z8TREp)?D2p
> zo35a9ch+z;1#io=*F2hh8Z4ALk$iJqqE=$Ghv%o_!DjXhqIxFp0i~GhfBe8+-j$8W
> z7#0(`^&3#KWl4dW2hUaZXI=@@yr1j86Sn?iE?loQt^fd$ks7$3HkIC*m@8BZrju2G
> zP>VdN6Ze+n#}2{+YTO^u#9$Y=MOcu}6{Y(2v>U<4A$;PA>!zm52&rP&Rz-8^SB1gz
> zpim)JM^x~W%`=NM&FnUo?HFIr;vvBX4D=7ZKDy-T18~~OQNg0#G+AfHKUBH3X1RR4
> z?p8tf*Fh)LQq!z08He*{jRTpySl@Am@Qx$cD;0Q()NdubkNygB{eCR;CBu`#Wi@7H
> zbvS3oo3C0&Tyz;#H(EG~RrtHj7DhjwkXmN{_VM7@MG{mol-n{PMMdi*c|Q8ZluwS8
> z{VD
> z=bU@Tyf|5y8Gp^7zdI>vPfk>Uw)004Z>V6pEB~;Ne40kfuTbods|=B2z~>%+maaBe
> zpg}#vzL;)H#$3FMXPbmy1LM_Lb-i||iqCXx@ft;xy3=8K z_r0BaTtqJ9^|sd-LwYC%3|ZJgKZv#W( zZcMU|m%&xW69}X

[PATCH v4 0/3] sysemu/accel: Simplify sysemu/xen.h

2023-09-05 Thread Philippe Mathieu-Daudé

Trivial cleanups which simplify "sysemu/xen.h".

Philippe Mathieu-Daudé (3):
  sysemu/xen: Remove unuseful CONFIG_USER_ONLY header guard
  sysemu/xen: Remove unreachable xen_ram_alloc() code
  sysemu/xen: Allow elision of xen_hvm_modified_memory()

 include/exec/ram_addr.h |  8 ++--
 include/sysemu/xen.h| 24 +++-
 2 files changed, 9 insertions(+), 23 deletions(-)

-- 
2.41.0

[PATCH 2/3] sysemu/xen: Remove unreachable xen_ram_alloc() code

2023-09-05 Thread Philippe Mathieu-Daudé

The xen_ram_alloc() call in softmmu/physmem.c is guarded
by checking for xen_enabled(), which evaluate to 'false'
when XEN is not built in. The compiler elide the function
call, and thus the inlined function is not used. Remove it.

Inspired-by: Daniel Henrique Barboza 
Signed-off-by: Philippe Mathieu-Daudé 
---
 include/sysemu/xen.h | 10 +++---
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/include/sysemu/xen.h b/include/sysemu/xen.h
index 9b2d0b21ff..1f797a9abe 100644
--- a/include/sysemu/xen.h
+++ b/include/sysemu/xen.h
@@ -27,8 +27,6 @@ extern bool xen_allowed;
 #define xen_enabled()   (xen_allowed)
 
 void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length);
-void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size,
-   struct MemoryRegion *mr, Error **errp);
 
 #else /* !CONFIG_XEN_IS_POSSIBLE */
 
@@ -38,12 +36,10 @@ static inline void xen_hvm_modified_memory(ram_addr_t 
start, ram_addr_t length)
 {
 /* nothing */
 }
-static inline void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size,
- MemoryRegion *mr, Error **errp)
-{
-g_assert_not_reached();
-}
 
 #endif /* CONFIG_XEN_IS_POSSIBLE */
 
+void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size,
+   struct MemoryRegion *mr, Error **errp);
+
 #endif
-- 
2.41.0

[PATCH 1/3] sysemu/xen: Remove unuseful CONFIG_USER_ONLY header guard

2023-09-05 Thread Philippe Mathieu-Daudé

In commit da278d58a0 ("accel: Move Xen accelerator code under
accel/xen/") we used include/sysemu/kvm.h as a template for
include/sysemu/xen.h, using the CONFIG_USER_ONLY header guard.
In retrospective, it is not used / useful, so remove it.

Signed-off-by: Philippe Mathieu-Daudé 
---
 include/sysemu/xen.h | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/include/sysemu/xen.h b/include/sysemu/xen.h
index bc13ad5692..9b2d0b21ff 100644
--- a/include/sysemu/xen.h
+++ b/include/sysemu/xen.h
@@ -26,16 +26,14 @@ extern bool xen_allowed;
 
 #define xen_enabled()   (xen_allowed)
 
-#ifndef CONFIG_USER_ONLY
 void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length);
 void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size,
struct MemoryRegion *mr, Error **errp);
-#endif
 
 #else /* !CONFIG_XEN_IS_POSSIBLE */
 
 #define xen_enabled() 0
-#ifndef CONFIG_USER_ONLY
+
 static inline void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length)
 {
 /* nothing */
@@ -45,7 +43,6 @@ static inline void xen_ram_alloc(ram_addr_t ram_addr, 
ram_addr_t size,
 {
 g_assert_not_reached();
 }
-#endif
 
 #endif /* CONFIG_XEN_IS_POSSIBLE */
 
-- 
2.41.0

[PATCH 3/3] sysemu/xen: Allow elision of xen_hvm_modified_memory()

2023-09-05 Thread Philippe Mathieu-Daudé

Call xen_enabled() before xen_hvm_modified_memory() to let
the compiler elide its call.

Have xen_enabled() return a boolean to match its declaration
in the CONFIG_XEN_IS_POSSIBLE case.

Suggested-by: Daniel Henrique Barboza 
Signed-off-by: Philippe Mathieu-Daudé 
---
 include/exec/ram_addr.h |  8 ++--
 include/sysemu/xen.h| 15 ++-
 2 files changed, 8 insertions(+), 15 deletions(-)

diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index 9f2e3893f5..66e849ac4e 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -330,7 +330,9 @@ static inline void 
cpu_physical_memory_set_dirty_range(ram_addr_t start,
 }
 }
 
-xen_hvm_modified_memory(start, length);
+if (xen_enabled()) {
+xen_hvm_modified_memory(start, length);
+}
 }
 
 #if !defined(_WIN32)
@@ -406,7 +408,9 @@ uint64_t cpu_physical_memory_set_dirty_lebitmap(unsigned 
long *bitmap,
 }
 }
 
-xen_hvm_modified_memory(start, pages << TARGET_PAGE_BITS);
+if (xen_enabled()) {
+xen_hvm_modified_memory(start, pages << TARGET_PAGE_BITS);
+}
 } else {
 uint8_t clients = tcg_enabled() ? DIRTY_CLIENTS_ALL : 
DIRTY_CLIENTS_NOCODE;
 
diff --git a/include/sysemu/xen.h b/include/sysemu/xen.h
index 1f797a9abe..d84a5f3551 100644
--- a/include/sysemu/xen.h
+++ b/include/sysemu/xen.h
@@ -21,24 +21,13 @@
 #endif
 
 #ifdef CONFIG_XEN_IS_POSSIBLE
-
 extern bool xen_allowed;
-
 #define xen_enabled()   (xen_allowed)
-
-void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length);
-
 #else /* !CONFIG_XEN_IS_POSSIBLE */
-
-#define xen_enabled() 0
-
-static inline void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length)
-{
-/* nothing */
-}
-
+#define xen_enabled()   false
 #endif /* CONFIG_XEN_IS_POSSIBLE */
 
+void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length);
 void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size,
struct MemoryRegion *mr, Error **errp);
 
-- 
2.41.0

[PATCH 0/3] sysemu/accel: Simplify sysemu/xen.h

2023-09-05 Thread Philippe Mathieu-Daudé

Trivial cleanups which simplify "sysemu/xen.h".

Philippe Mathieu-Daudé (3):
  sysemu/xen: Remove unuseful CONFIG_USER_ONLY header guard
  sysemu/xen: Remove unreachable xen_ram_alloc() code
  sysemu/xen: Allow elision of xen_hvm_modified_memory()

 include/exec/ram_addr.h |  8 ++--
 include/sysemu/xen.h| 24 +++-
 2 files changed, 9 insertions(+), 23 deletions(-)

-- 
2.41.0

Re: [PATCH v4 0/3] sysemu/accel: Simplify sysemu/xen.h

2023-09-05 Thread Philippe Mathieu-Daudé


On 5/9/23 14:20, Philippe Mathieu-Daudé wrote:

Trivial cleanups which simplify "sysemu/xen.h".

Philippe Mathieu-Daudé (3):
   sysemu/xen: Remove unuseful CONFIG_USER_ONLY header guard
   sysemu/xen: Remove unreachable xen_ram_alloc() code
   sysemu/xen: Allow elision of xen_hvm_modified_memory()

  include/exec/ram_addr.h |  8 ++--
  include/sysemu/xen.h| 24 +++-
  2 files changed, 9 insertions(+), 23 deletions(-)


Oops, this isn't v4. v1 coming...

Re: [PATCH v22 02/20] s390x/cpu topology: add topology entries on CPU hotplug

2023-09-05 Thread Thomas Huth


On 01/09/2023 17.57, Nina Schoetterl-Glausch wrote:

From: Pierre Morel 

The topology information are attributes of the CPU and are
specified during the CPU device creation.

On hot plug we:
- calculate the default values for the topology for drawers,
   books and sockets in the case they are not specified.
- verify the CPU attributes
- check that we have still room on the desired socket

The possibility to insert a CPU in a mask is dependent on the
number of cores allowed in a socket, a book or a drawer, the
checking is done during the hot plug of the CPU to have an
immediate answer.

If the complete topology is not specified, the core is added
in the physical topology based on its core ID and it gets
defaults values for the modifier attributes.

This way, starting QEMU without specifying the topology can
still get some advantage of the CPU topology.

Signed-off-by: Pierre Morel 
Reviewed-by: Nina Schoetterl-Glausch 
Co-developed-by: Nina Schoetterl-Glausch 
Signed-off-by: Nina Schoetterl-Glausch 
---

...

+/**
+ * s390_topology_cpu_default:
+ * @cpu: pointer to a S390CPU
+ * @errp: Error pointer
+ *
+ * Setup the default topology if no attributes are already set.
+ * Passing a CPU with some, but not all, attributes set is considered
+ * an error.
+ *
+ * The function calculates the (drawer_id, book_id, socket_id)
+ * topology by filling the cores starting from the first socket
+ * (0, 0, 0) up to the last (smp->drawers, smp->books, smp->sockets).
+ *
+ * CPU type and dedication have defaults values set in the
+ * s390x_cpu_properties, entitlement must be adjust depending on the
+ * dedication.
+ *
+ * Returns false if it is impossible to setup a default topology
+ * true otherwise.
+ */
+static bool s390_topology_cpu_default(S390CPU *cpu, Error **errp)
+{
+CpuTopology *smp = ¤t_machine->smp;
+CPUS390XState *env = &cpu->env;
+
+/* All geometry topology attributes must be set or all unset */
+if ((env->socket_id < 0 || env->book_id < 0 || env->drawer_id < 0) &&
+(env->socket_id >= 0 || env->book_id >= 0 || env->drawer_id >= 0)) {
+error_setg(errp,
+   "Please define all or none of the topology geometry 
attributes");
+return false;
+}
+
+/* Check if one of the geometry topology is unset */


The comment isn't too helpful - maybe rather something like this:
"If one value of the topology is still unset, this means that now that all 
of them are unset, so calculate  the default attributes now." (and then 
remove also the comment within the curly braces below)



+if (env->socket_id < 0) {
+/* Calculate default geometry topology attributes */
+env->socket_id = s390_std_socket(env->core_id, smp);
+env->book_id = s390_std_book(env->core_id, smp);
+env->drawer_id = s390_std_drawer(env->core_id, smp);
+}
+
+/*
+ * When the user specifies the entitlement as 'auto' on the command line,
+ * QEMU will set the entitlement as:
+ * Medium when the CPU is not dedicated.
+ * High when dedicated is true.
+ */
+if (env->entitlement == S390_CPU_ENTITLEMENT_AUTO) {
+if (env->dedicated) {
+env->entitlement = S390_CPU_ENTITLEMENT_HIGH;
+} else {
+env->entitlement = S390_CPU_ENTITLEMENT_MEDIUM;
+}
+}
+return true;
+}
+
+/**
+ * s390_topology_check:
+ * @socket_id: socket to check
+ * @book_id: book to check
+ * @drawer_id: drawer to check
+ * @entitlement: entitlement to check
+ * @dedicated: dedication to check
+ * @errp: Error pointer
+ *
+ * The function checks if the topology
+ * attributes fits inside the system topology.
+ *
+ * Returns false if the specified topology does not match with
+ * the machine topology.
+ */
+static bool s390_topology_check(uint16_t socket_id, uint16_t book_id,
+uint16_t drawer_id, uint16_t entitlement,
+bool dedicated, Error **errp)
+{
+CpuTopology *smp = ¤t_machine->smp;
+ERRP_GUARD();


No need for ERRP_GUARD() here, I think.


+if (socket_id >= smp->sockets) {
+error_setg(errp, "Unavailable socket: %d", socket_id);
+return false;
+}
+if (book_id >= smp->books) {
+error_setg(errp, "Unavailable book: %d", book_id);
+return false;
+}
+if (drawer_id >= smp->drawers) {
+error_setg(errp, "Unavailable drawer: %d", drawer_id);
+return false;
+}
+if (entitlement >= S390_CPU_ENTITLEMENT__MAX) {
+error_setg(errp, "Unknown entitlement: %d", entitlement);
+return false;
+}
+if (dedicated && (entitlement == S390_CPU_ENTITLEMENT_LOW ||
+  entitlement == S390_CPU_ENTITLEMENT_MEDIUM)) {
+error_setg(errp, "A dedicated CPU implies high entitlement");
+return false;
+}
+return true;
+}
+
+/**
+ * s390_update_cpu_props:
+ * @ms: the machine state
+ * @cpu: the CPU for which to update the properties from the environ

[PATCH 1/2] target/i386/hvf: Remove unused includes in 'hvf-i386.h'

2023-09-05 Thread Philippe Mathieu-Daudé

The only non standard type -- CPUArchState -- is forward
declared in "qemu/typedefs.h", so no particular header is
required here.

Signed-off-by: Philippe Mathieu-Daudé 
---
 target/i386/hvf/hvf-i386.h | 6 --
 1 file changed, 6 deletions(-)

diff --git a/target/i386/hvf/hvf-i386.h b/target/i386/hvf/hvf-i386.h
index 95b47c1c2e..243bc111cc 100644
--- a/target/i386/hvf/hvf-i386.h
+++ b/target/i386/hvf/hvf-i386.h
@@ -16,12 +16,6 @@
 #ifndef HVF_I386_H
 #define HVF_I386_H
 
-#include "qemu/accel.h"
-#include "sysemu/hvf.h"
-#include "sysemu/hvf_int.h"
-#include "cpu.h"
-#include "x86.h"
-
 void hvf_handle_io(CPUArchState *, uint16_t, void *, int, int, int);
 
 /* Host specific functions */
-- 
2.41.0

[PATCH 0/2] sysemu/accel: Simplify sysemu/hvf.h

2023-09-05 Thread Philippe Mathieu-Daudé

Trivial cleanups which simplify "sysemu/hvf.h".

Philippe Mathieu-Daudé (2):
  target/i386/hvf: Remove unused includes in 'hvf-i386.h'
  sysemu/kvm: Restrict hvf_get_supported_cpuid() to x86 targets

 include/sysemu/hvf.h   | 3 ---
 target/i386/hvf/hvf-i386.h | 6 +-
 target/i386/cpu.c  | 1 +
 3 files changed, 2 insertions(+), 8 deletions(-)

-- 
2.41.0

[PATCH 2/2] sysemu/kvm: Restrict hvf_get_supported_cpuid() to x86 targets

2023-09-05 Thread Philippe Mathieu-Daudé

hvf_get_supported_cpuid() is only defined for x86 targets
(in target/i386/hvf/x86_cpuid.c).
Its declaration is pointless on all other targets.

All the calls to it in target/i386/cpu.c are guarded by
a call on hvf_enabled(), so are elided when HVF is not
built in. Therefore we can remove the unnecessary function
stub.

Signed-off-by: Philippe Mathieu-Daudé 
---
 include/sysemu/hvf.h   | 3 ---
 target/i386/hvf/hvf-i386.h | 2 ++
 target/i386/cpu.c  | 1 +
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/sysemu/hvf.h b/include/sysemu/hvf.h
index 4037cd6a73..4a7c6af3a5 100644
--- a/include/sysemu/hvf.h
+++ b/include/sysemu/hvf.h
@@ -20,13 +20,10 @@
 #include "cpu.h"
 
 #ifdef CONFIG_HVF
-uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx,
- int reg);
 extern bool hvf_allowed;
 #define hvf_enabled() (hvf_allowed)
 #else /* !CONFIG_HVF */
 #define hvf_enabled() 0
-#define hvf_get_supported_cpuid(func, idx, reg) 0
 #endif /* !CONFIG_HVF */
 
 #endif /* NEED_CPU_H */
diff --git a/target/i386/hvf/hvf-i386.h b/target/i386/hvf/hvf-i386.h
index 243bc111cc..e99c02cd4b 100644
--- a/target/i386/hvf/hvf-i386.h
+++ b/target/i386/hvf/hvf-i386.h
@@ -16,6 +16,8 @@
 #ifndef HVF_I386_H
 #define HVF_I386_H
 
+uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx, int reg);
+
 void hvf_handle_io(CPUArchState *, uint16_t, void *, int, int, int);
 
 /* Host specific functions */
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 00f913b638..6789384bec 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -26,6 +26,7 @@
 #include "tcg/helper-tcg.h"
 #include "sysemu/reset.h"
 #include "sysemu/hvf.h"
+#include "hvf/hvf-i386.h"
 #include "kvm/kvm_i386.h"
 #include "sev.h"
 #include "qapi/error.h"
-- 
2.41.0

[PATCH v1 1/1] target/loongarch: Add preldx instruction

2023-09-05 Thread Song Gao

Resolve the issue of starting the Loongnix 20.5[1] system failure.

Logs:
Loading Linux 4.19.0-19-loongson-3 ...
Loading initial ramdisk ...
PROGRESS CODE: V02010004 I0
PROGRESS CODE: V03101019 I0
Error: unknown opcode. 903a3e6c: 0x382c6d82

[1] 
http://pkg.loongnix.cn/loongnix/isos/Loongnix-20.5/Loongnix-20.5.cartoon.gui.loongarch64.en.qcow2

Signed-off-by: Song Gao 
---
 target/loongarch/insns.decode  | 3 +++
 target/loongarch/disas.c   | 7 +++
 target/loongarch/insn_trans/trans_memory.c.inc | 5 +
 3 files changed, 15 insertions(+)

diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index c9c3bc2c73..56db653603 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -24,6 +24,7 @@
 &rrr  rd rj rk
 &rr_i rd rj imm
 &hint_r_i hint rj imm
+&hint_rr  hint rj rk
 &rrr_sa   rd rj rk sa
 &rr_ms_ls rd rj ms ls
 &ff   fd fj
@@ -69,6 +70,7 @@
 @rr_i16  .. imm:s16 rj:5 rd:5&rr_i
 @rr_i16s2  ..   rj:5 rd:5&rr_i imm=%offs16
 @hint_r_i12    .. imm:s12 rj:5 hint:5&hint_r_i
+@hint_rr   . rk:5 rj:5 hint:5&hint_rr
 @rrr_sa2p1  ... .. rk:5 rj:5 rd:5&rrr_sa  sa=%sa2p1
 @rrr_sa2  ... sa:2 rk:5 rj:5 rd:5&rrr_sa
 @rrr_sa3   .. sa:3 rk:5 rj:5 rd:5&rrr_sa
@@ -228,6 +230,7 @@ ldx_bu  0011 1010 0 . . .
@rrr
 ldx_hu  0011 1010 01000 . . .@rrr
 ldx_wu  0011 1010 1 . . .@rrr
 preld   0010 101011  . . @hint_r_i12
+preldx  0011 1010 11000 . . .@hint_rr
 dbar0011 1111 00100 ...  @i15
 ibar0011 1111 00101 ...  @i15
 ldptr_w 0010 0100 .. . . @rr_i14s2
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 5c402d944d..d5ea8f7140 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -190,6 +190,12 @@ static void output_hint_r_i(DisasContext *ctx, 
arg_hint_r_i *a,
 output(ctx, mnemonic, "%d, r%d, %d", a->hint, a->rj, a->imm);
 }
 
+static void output_hint_rr(DisasContext *ctx, arg_hint_rr *a,
+   const char *mnemonic)
+{
+output(ctx, mnemonic, "%d, r%d, r%d", a->hint, a->rj, a->rk);
+}
+
 static void output_i(DisasContext *ctx, arg_i *a, const char *mnemonic)
 {
 output(ctx, mnemonic, "%d", a->imm);
@@ -549,6 +555,7 @@ INSN(ld_bu,rr_i)
 INSN(ld_hu,rr_i)
 INSN(ld_wu,rr_i)
 INSN(preld,hint_r_i)
+INSN(preldx,   hint_rr)
 INSN(fld_s,fr_i)
 INSN(fst_s,fr_i)
 INSN(fld_d,fr_i)
diff --git a/target/loongarch/insn_trans/trans_memory.c.inc 
b/target/loongarch/insn_trans/trans_memory.c.inc
index d9d062235a..ca7378c79d 100644
--- a/target/loongarch/insn_trans/trans_memory.c.inc
+++ b/target/loongarch/insn_trans/trans_memory.c.inc
@@ -110,6 +110,11 @@ static bool trans_preld(DisasContext *ctx, arg_preld *a)
 return true;
 }
 
+static bool trans_preldx(DisasContext *ctx, arg_preldx * a)
+{
+return true;
+}
+
 static bool trans_dbar(DisasContext *ctx, arg_dbar * a)
 {
 tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL);
-- 
2.39.1

Re: [PATCH v4 12/14] simpletrace: added simplified Analyzer2 class

2023-09-05 Thread Daniel P . Berrangé

On Wed, Aug 23, 2023 at 10:54:27AM +0200, Mads Ynddal wrote:
> From: Mads Ynddal 
> 
> By moving the dynamic argument construction to keyword-arguments,
> we can remove all of the specialized handling, and streamline it.
> If a tracing method wants to access these, they can define the
> kwargs, or ignore it be placing `**kwargs` at the end of the
> function's arguments list.
> 
> Added deprecation warning to Analyzer class to make users aware
> of the Analyzer2 class. No removal date is planned.

AFAIK, we don't consider simpletrace.py python code to be a
supported public API, just a command line tool.

IOW, we can change the python code at will, as long as the
command line doesn't alter its behaviour. Thus I don't see
a need to add new classes, just change the existing ones.


With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

[PATCH 1/2] block/meson.build: Restore alphabetical order of files

2023-09-05 Thread Kevin Wolf

When commit 5e5733e5999 created block/meson.build, the list of
unconditionally added files was in alphabetical order. Later commits
added new files in random places. Reorder the list to be alphabetical
again. (As for ordering foo.c against foo-*.c, there are both ways used
currently; standardise on having foo.c first, even though this is
different from the original commit 5e5733e5999.)

Signed-off-by: Kevin Wolf 
---
 block/meson.build | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/block/meson.build b/block/meson.build
index 529fc172c6..f351b9d0d3 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -4,41 +4,41 @@ block_ss.add(files(
   'aio_task.c',
   'amend.c',
   'backup.c',
-  'copy-before-write.c',
   'blkdebug.c',
   'blklogwrites.c',
   'blkverify.c',
   'block-backend.c',
   'block-copy.c',
-  'graph-lock.c',
   'commit.c',
+  'copy-before-write.c',
   'copy-on-read.c',
-  'preallocate.c',
-  'progress_meter.c',
   'create.c',
   'crypto.c',
   'dirty-bitmap.c',
   'filter-compress.c',
+  'graph-lock.c',
   'io.c',
   'mirror.c',
   'nbd.c',
   'null.c',
   'plug.c',
+  'preallocate.c',
+  'progress_meter.c',
   'qapi.c',
+  'qcow2.c',
   'qcow2-bitmap.c',
   'qcow2-cache.c',
   'qcow2-cluster.c',
   'qcow2-refcount.c',
   'qcow2-snapshot.c',
   'qcow2-threads.c',
-  'qcow2.c',
   'quorum.c',
   'raw-format.c',
   'reqlist.c',
   'snapshot.c',
   'snapshot-access.c',
-  'throttle-groups.c',
   'throttle.c',
+  'throttle-groups.c',
   'write-threshold.c',
 ), zstd, zlib, gnutls)
 
-- 
2.41.0

[PATCH 2/2] block: Make more BlockDriver definitions static

2023-09-05 Thread Kevin Wolf

Most block driver implementations don't have any reason for their
BlockDriver to be public. The only exceptions are bdrv_file, bdrv_raw
and bdrv_qcow2, which are actually used in other source files.

Make all other BlockDriver definitions static if they aren't yet.

Signed-off-by: Kevin Wolf 
---
 block/copy-before-write.c | 2 +-
 block/preallocate.c   | 2 +-
 block/snapshot-access.c   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index b866e42271..9a0e2b69d9 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -503,7 +503,7 @@ static void cbw_close(BlockDriverState *bs)
 s->bcs = NULL;
 }
 
-BlockDriver bdrv_cbw_filter = {
+static BlockDriver bdrv_cbw_filter = {
 .format_name = "copy-before-write",
 .instance_size = sizeof(BDRVCopyBeforeWriteState),
 
diff --git a/block/preallocate.c b/block/preallocate.c
index 4d82125036..3d0f621003 100644
--- a/block/preallocate.c
+++ b/block/preallocate.c
@@ -535,7 +535,7 @@ static void preallocate_child_perm(BlockDriverState *bs, 
BdrvChild *c,
 }
 }
 
-BlockDriver bdrv_preallocate_filter = {
+static BlockDriver bdrv_preallocate_filter = {
 .format_name = "preallocate",
 .instance_size = sizeof(BDRVPreallocateState),
 
diff --git a/block/snapshot-access.c b/block/snapshot-access.c
index 67ea339da9..8d4e8932b8 100644
--- a/block/snapshot-access.c
+++ b/block/snapshot-access.c
@@ -108,7 +108,7 @@ static void snapshot_access_child_perm(BlockDriverState 
*bs, BdrvChild *c,
 *nshared = BLK_PERM_ALL;
 }
 
-BlockDriver bdrv_snapshot_access_drv = {
+static BlockDriver bdrv_snapshot_access_drv = {
 .format_name = "snapshot-access",
 
 .bdrv_open  = snapshot_access_open,
-- 
2.41.0

[PATCH 0/2] block: Minor cleanups

2023-09-05 Thread Kevin Wolf

I had to go through all block drivers and noticed some details that
aren't as expected, so I thought I might as well fix them...

Kevin Wolf (2):
  block/meson.build: Restore alphabetical order of files
  block: Make more BlockDriver definitions static

 block/copy-before-write.c |  2 +-
 block/preallocate.c   |  2 +-
 block/snapshot-access.c   |  2 +-
 block/meson.build | 12 ++--
 4 files changed, 9 insertions(+), 9 deletions(-)

-- 
2.41.0

[PATCH v2] qcow2: keep reference on zeroize with discard-no-unref enabled

2023-09-05 Thread Jean-Louis Dupond

When the discard-no-unref flag is enabled, we keep the reference for
normal discard requests.
But when a discard is executed on a snapshot/qcow2 image with backing,
the discards are saved as zero clusters in the snapshot image.

When committing the snapshot to the backing file, not
discard_in_l2_slice is called but zero_in_l2_slice. Which did not had
any logic to keep the reference when discard-no-unref is enabled.

Therefor we add logic in the zero_in_l2_slice call to keep the reference
on commit.

Fixes: https://gitlab.com/qemu-project/qemu/-/issues/1621
Signed-off-by: Jean-Louis Dupond 
---
 block/qcow2-cluster.c | 22 ++
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index f4f6cd6ad0..fc764aea4d 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1984,7 +1984,7 @@ static int discard_in_l2_slice(BlockDriverState *bs, 
uint64_t offset,
 /* If we keep the reference, pass on the discard still */
 bdrv_pdiscard(s->data_file, old_l2_entry & L2E_OFFSET_MASK,
   s->cluster_size);
-   }
+}
 }
 
 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
@@ -2062,9 +2062,15 @@ zero_in_l2_slice(BlockDriverState *bs, uint64_t offset,
 QCow2ClusterType type = qcow2_get_cluster_type(bs, old_l2_entry);
 bool unmap = (type == QCOW2_CLUSTER_COMPRESSED) ||
 ((flags & BDRV_REQ_MAY_UNMAP) && qcow2_cluster_is_allocated(type));
-uint64_t new_l2_entry = unmap ? 0 : old_l2_entry;
+bool keep_reference =
+(s->discard_no_unref && type != QCOW2_CLUSTER_COMPRESSED);
+uint64_t new_l2_entry = old_l2_entry;
 uint64_t new_l2_bitmap = old_l2_bitmap;
 
+if (unmap && !keep_reference) {
+new_l2_entry = 0;
+}
+
 if (has_subclusters(s)) {
 new_l2_bitmap = QCOW_L2_BITMAP_ALL_ZEROES;
 } else {
@@ -2082,9 +2088,17 @@ zero_in_l2_slice(BlockDriverState *bs, uint64_t offset,
 set_l2_bitmap(s, l2_slice, l2_index + i, new_l2_bitmap);
 }
 
-/* Then decrease the refcount */
 if (unmap) {
-qcow2_free_any_cluster(bs, old_l2_entry, QCOW2_DISCARD_REQUEST);
+if (!keep_reference) {
+/* Then decrease the refcount */
+qcow2_free_any_cluster(bs, old_l2_entry, 
QCOW2_DISCARD_REQUEST);
+} else if (s->discard_passthrough[QCOW2_DISCARD_REQUEST] &&
+   (type == QCOW2_CLUSTER_NORMAL ||
+type == QCOW2_CLUSTER_ZERO_ALLOC)) {
+/* If we keep the reference, pass on the discard still */
+bdrv_pdiscard(s->data_file, old_l2_entry & L2E_OFFSET_MASK,
+s->cluster_size);
+}
 }
 }
 
-- 
2.42.0

Re: [PATCH] qcow2: keep reference on zeroize with discard-no-unref enabled

2023-09-05 Thread Jean-Louis Dupond


Replaced by PATCH v2.

On 4/09/2023 17:09, Jean-Louis Dupond wrote:

When the discard-no-unref flag is enabled, we keep the reference for
normal discard requests.
But when a discard is executed on a snapshot/qcow2 image with backing,
the discards are saved as zero clusters in the snapshot image.

When committing the snapshot to the backing file, not
discard_in_l2_slice is called but zero_in_l2_slice. Which did not had
any logic to keep the reference when discard-no-unref is enabled.

Therefor we add logic in the zero_in_l2_slice call to keep the reference
on commit.

Next to that we also revert some logic from 42a2890a and just call
qcow2_free_any_cluster. As this will just decrease the refcount but not
remove the reference itself. And it will also send the discard further
to the lower layer.

Fixes: https://gitlab.com/qemu-project/qemu/-/issues/1621
Signed-off-by: Jean-Louis Dupond 
---
  block/qcow2-cluster.c | 18 +++---
  1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index f4f6cd6ad0..48532ca3c2 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1975,16 +1975,7 @@ static int discard_in_l2_slice(BlockDriverState *bs, 
uint64_t offset,
  if (has_subclusters(s)) {
  set_l2_bitmap(s, l2_slice, l2_index + i, new_l2_bitmap);
  }
-if (!keep_reference) {
-/* Then decrease the refcount */
-qcow2_free_any_cluster(bs, old_l2_entry, type);
-} else if (s->discard_passthrough[type] &&
-   (cluster_type == QCOW2_CLUSTER_NORMAL ||
-cluster_type == QCOW2_CLUSTER_ZERO_ALLOC)) {
-/* If we keep the reference, pass on the discard still */
-bdrv_pdiscard(s->data_file, old_l2_entry & L2E_OFFSET_MASK,
-  s->cluster_size);
-   }
+qcow2_free_any_cluster(bs, old_l2_entry, type);
  }
  
  qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);

@@ -2062,9 +2053,14 @@ zero_in_l2_slice(BlockDriverState *bs, uint64_t offset,
  QCow2ClusterType type = qcow2_get_cluster_type(bs, old_l2_entry);
  bool unmap = (type == QCOW2_CLUSTER_COMPRESSED) ||
  ((flags & BDRV_REQ_MAY_UNMAP) && 
qcow2_cluster_is_allocated(type));
-uint64_t new_l2_entry = unmap ? 0 : old_l2_entry;
+uint64_t new_l2_entry = old_l2_entry;
  uint64_t new_l2_bitmap = old_l2_bitmap;
  
+if (unmap &&

+!(s->discard_no_unref && type != QCOW2_CLUSTER_COMPRESSED)) {
+new_l2_entry = 0;
+}
+
  if (has_subclusters(s)) {
  new_l2_bitmap = QCOW_L2_BITMAP_ALL_ZEROES;
  } else {

Re: [PATCH v1 1/1] target/loongarch: Add preldx instruction

2023-09-05 Thread Philippe Mathieu-Daudé


On 5/9/23 14:39, Song Gao wrote:

Resolve the issue of starting the Loongnix 20.5[1] system failure.

Logs:
 Loading Linux 4.19.0-19-loongson-3 ...
 Loading initial ramdisk ...
 PROGRESS CODE: V02010004 I0
 PROGRESS CODE: V03101019 I0
 Error: unknown opcode. 903a3e6c: 0x382c6d82

[1] 
http://pkg.loongnix.cn/loongnix/isos/Loongnix-20.5/Loongnix-20.5.cartoon.gui.loongarch64.en.qcow2

Signed-off-by: Song Gao 
---
  target/loongarch/insns.decode  | 3 +++
  target/loongarch/disas.c   | 7 +++
  target/loongarch/insn_trans/trans_memory.c.inc | 5 +
  3 files changed, 15 insertions(+)


Reviewed-by: Philippe Mathieu-Daudé

Re: 'check-avocado' fails after c03f57fd5b ("Revert "tests: Use separate ...")

2023-09-05 Thread Daniel Henrique Barboza


Hi,

I managed to work around it. I'll post here the debugs for future reference.

- Here's all the steps after a clean qemu clone:

[danielhb@grind build]$ make check-avocado
  AVOCADO tests/avocado
/home/danielhb/work/test/qemu/build/pyvenv/bin/python3: No module named 
avocado.__main__; 'avocado' is a package and cannot be directly executed
make: *** [/home/danielhb/work/test/qemu/tests/Makefile.include:139: 
check-avocado] Error 1
 
[danielhb@grind build]$ V=1 make check-avocado

/home/danielhb/work/test/qemu/build/pyvenv/bin/python3 -B -m avocado --show=app 
run --job-results-dir=/home/danielhb/work/test/qemu/build/tests/results  
--filter-by-tags-include-empty --filter-by-tags-include-empty-key -t 
arch:riscv64 --failfast tests/avocado
/home/danielhb/work/test/qemu/build/pyvenv/bin/python3: No module named 
avocado.__main__; 'avocado' is a package and cannot be directly executed
make: *** [/home/danielhb/work/test/qemu/tests/Makefile.include:139: 
check-avocado] Error 1

[danielhb@grind build]$ cat 
/home/danielhb/work/test/qemu/build/pyvenv/bin/avocado
#!/home/danielhb/work/test/qemu/build/pyvenv/bin/python3
# -*- coding: utf-8 -*-
import re
import sys
from avocado.core.main import main
if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
sys.exit(main())
[danielhb@grind build]$

[danielhb@grind build]$ ./pyvenv/bin/python3
Python 3.11.4 (main, Jun  7 2023, 00:00:00) [GCC 13.1.1 20230511 (Red Hat 
13.1.1-2)] on linux
Type "help", "copyright", "credits" or "license" for more information.

from importlib.metadata import distribution
avocado = distribution('avocado-framework')
next((x for x in avocado.entry_points if x.name == 'avocado'))

EntryPoint(name='avocado', value='avocado.core.main:main', 
group='console_scripts')





[danielhb@grind build]$ ./pyvenv/bin/avocado --help
Traceback (most recent call last):
  File "/home/danielhb/work/test/qemu/build/./pyvenv/bin/avocado", line 5, in 

from avocado.core.main import main
ModuleNotFoundError: No module named 'avocado.core.main'
[danielhb@grind build]$


- I got suspicious after the above command failure, and noticed that 'avocado' 
didn't work
even outside of the QEMU tree:


[danielhb@grind ~]$ avocado --help
Traceback (most recent call last):
  File "/usr/bin/avocado", line 33, in 
sys.exit(load_entry_point('avocado-framework==92.0', 'console_scripts', 
'avocado')())
 
^
  File "/usr/bin/avocado", line 25, in importlib_load_entry_point
return next(matches).load()
   
  File "/usr/lib64/python3.11/importlib/metadata/__init__.py", line 202, in load
module = import_module(match.group('module'))
 
  File "/usr/lib64/python3.11/importlib/__init__.py", line 126, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
   
  File "", line 1204, in _gcd_import
  File "", line 1176, in _find_and_load
  File "", line 1140, in _find_and_load_unlocked
ModuleNotFoundError: No module named 'avocado.core.main'


- Turns out that I had 2 avocado versions installed: one from F38 and other 
from pip.
If I remove the 'pip' version I got a different error:

 (01/13) tests/avocado/empty_cpu_model.py:EmptyCPUModel.test: STARTED
 (06/13) tests/avocado/tuxrun_baselines.py:TuxRunBaselineTest.test_riscv64: 
ERROR: Test.__init__() got an unexpected keyword argument 'run.results_dir' 
(0.04 s)
 (...)
 (01/13) tests/avocado/empty_cpu_model.py:EmptyCPUModel.test: ERROR: 
Test.__init__() got an unexpected keyword argument 'run.results_dir' (0.04 s)
 (...)

- Which seems to be related to a known bug according to:

https://avocado-framework.readthedocs.io/en/101.0/releases/100_1.html


In the end I don't need 'avocado' outside of testing QEMU, so my solution was to
remove all avocado packages from the system and let QEMU install whatever it is
needed inside pyvenv. 'check-avocado' now works in 'master'. I am still unsure
why this particular patch triggered all this problem here, but I don't believe
this is worth pursuing unless other people starts to see problems. For now we
can leave it as is IMO.



Thanks for the help!


Daniel


On 9/5/23 07:49, Paolo Bonzini wrote:

On Tue, Sep 5, 2023 at 12:39 PM Paolo Bonzini  wrote:

/home/danielhb/work/test/qemu/build/pyvenv/bin/python3 -B 
python/scripts/mkvenv.py ensuregroup --online 
/home/danielhb/work/test/qemu/pythondeps.toml avocado
mkvenv: checking for avocado-framework(>=88.1, <93.0)
mkvenv: checking for pycdlib>=1.11.0
AVOCADO tests/avocado
/home/danielhb/work/test/qemu/build/pyvenv/bin/python3: No module named 
avocado.__main__; 'avocado' is a package and cannot be directly executed
make: *** [/home/danielhb/work/test/qemu/tests/Makefile.include:139: 
check-avocado] Error 1


Can you run it with "V=1" and also "cat
/hom

mips system emulation failure with virtio

2023-09-05 Thread Richard Purdie

With qemu 8.1.0 we see boot hangs fox x86-64 targets. 

These are fixed by 0d58c660689f6da1e3feff8a997014003d928b3b (softmmu:
Use async_run_on_cpu in tcg_commit) but if I add that commit, mips and
mips64 break, hanging at boot unable to find a rootfs. 

We use virtio for network and disk and both of those change in the
bootlog from messages like:

[1.726118] virtio-pci :00:13.0: enabling device ( -> 0003)
[1.728864] virtio-pci :00:14.0: enabling device ( -> 0003)
[1.729948] virtio-pci :00:15.0: enabling device ( -> 0003)
...
[2.162148] virtio_blk virtio2: 1/0/0 default/read/poll queues
[2.168311] virtio_blk virtio2: [vda] 1184242 512-byte logical 

to:

[1.777051] virtio-pci :00:13.0: enabling device ( -> 0003)
[1.779822] virtio-pci :00:14.0: enabling device ( -> 0003)
[1.780926] virtio-pci :00:15.0: enabling device ( -> 0003)
...
[1.894852] virtio_rng: probe of virtio1 failed with error -28
...
[2.063553] virtio_blk virtio2: 1/0/0 default/read/poll queues
[2.064260] virtio_blk: probe of virtio2 failed with error -28
[2.069080] virtio_net: probe of virtio0 failed with error -28


i.e. the virtio drivers no longer work.

I tested with current qemu master
(17780edd81d27fcfdb7a802efc870a99788bd2fc) and mips is still broken
there.

Is this issue known about?

Cheers,

Richard

Re: [PATCH v10 00/10] migration: Modify 'migrate' and 'migrate-incoming' QAPI commands for migration

2023-09-05 Thread Fabiano Rosas

Het Gala  writes:

> Hi qemu-devel community,
>
> A gentle reminder and request for all migration maintainers - Peter, 
> Juan, Dr. Gilbert and others too for review of the patchset series. 
> Received reviewed-by from Daniel on migration implementation patches but 
> need final approval from migration maintainers before getting it merged. 
> Also got acked-by tag from Markus on the QAPI patches. This is Part1 of 
> the 4 patchset series. Ultimate goal of the whole 4 series is to 
> 'introduce multiple interface support on top of existing multifd 
> capability'. Hope to get approval or comments from migration maintainers 
> on the patches soon.
>

Hi,

Is this the latest version of this series? I see errors with make
check. Let me know if I should wait for your next version to comment.

Re: [PATCH v22 03/20] target/s390x/cpu topology: handle STSI(15) and build the SYSIB

2023-09-05 Thread Thomas Huth


On 01/09/2023 17.57, Nina Schoetterl-Glausch wrote:

From: Pierre Morel 

On interception of STSI(15.1.x) the System Information Block
(SYSIB) is built from the list of pre-ordered topology entries.

Signed-off-by: Pierre Morel 
Co-developed-by: Nina Schoetterl-Glausch 
Signed-off-by: Nina Schoetterl-Glausch 
---
  MAINTAINERS  |   1 +
  qapi/machine-target.json |  14 ++
  include/hw/s390x/cpu-topology.h  |  25 +++
  include/hw/s390x/sclp.h  |   1 +
  target/s390x/cpu.h   |  76 
  hw/s390x/cpu-topology.c  |   2 +
  target/s390x/kvm/kvm.c   |   5 +-
  target/s390x/kvm/stsi-topology.c | 296 +++
  target/s390x/kvm/meson.build |   3 +-
  9 files changed, 421 insertions(+), 2 deletions(-)
  create mode 100644 target/s390x/kvm/stsi-topology.c

diff --git a/MAINTAINERS b/MAINTAINERS
index b10b83583f..692ce9f121 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1700,6 +1700,7 @@ M: Nina Schoetterl-Glausch 
  S: Supported
  F: include/hw/s390x/cpu-topology.h
  F: hw/s390x/cpu-topology.c
+F: target/s390x/kvm/stsi-topology.c
  
  X86 Machines

  
diff --git a/qapi/machine-target.json b/qapi/machine-target.json
index f0a6b72414..275234a20f 100644
--- a/qapi/machine-target.json
+++ b/qapi/machine-target.json
@@ -361,3 +361,17 @@
 'TARGET_MIPS',
 'TARGET_LOONGARCH64',
 'TARGET_RISCV' ] } }
+
+##
+# @CpuS390Polarization:
+#
+# An enumeration of cpu polarization that can be assumed by a virtual
+# S390 CPU
+#
+# Since: 8.2
+##
+{ 'enum': 'CpuS390Polarization',
+  'prefix': 'S390_CPU_POLARIZATION',
+  'data': [ 'horizontal', 'vertical' ],
+'if': { 'all': [ 'TARGET_S390X' , 'CONFIG_KVM' ] }
+}
diff --git a/include/hw/s390x/cpu-topology.h b/include/hw/s390x/cpu-topology.h
index 97b0af2795..fc15acf297 100644
--- a/include/hw/s390x/cpu-topology.h
+++ b/include/hw/s390x/cpu-topology.h
@@ -15,10 +15,35 @@
  #include "hw/boards.h"
  #include "qapi/qapi-types-machine-target.h"
  
+#define S390_TOPOLOGY_CPU_IFL   0x03

+
+typedef union s390_topology_id {
+uint64_t id;
+struct {
+uint8_t _reserved0;
+uint8_t drawer;
+uint8_t book;
+uint8_t socket;
+uint8_t type;
+uint8_t inv_polarization;


What sense does it make to store the polarization in an inverted way? ... I 
don't get that ... could you please at least add a comment somewhere for the 
rationale?



+uint8_t not_dedicated;
+uint8_t origin;
+};
+} s390_topology_id;
+
+typedef struct S390TopologyEntry {
+QTAILQ_ENTRY(S390TopologyEntry) next;
+s390_topology_id id;
+uint64_t mask;
+} S390TopologyEntry;
+
  typedef struct S390Topology {
  uint8_t *cores_per_socket;
+CpuS390Polarization polarization;
  } S390Topology;
  
+typedef QTAILQ_HEAD(, S390TopologyEntry) S390TopologyList;

+
  #ifdef CONFIG_KVM
  bool s390_has_topology(void);
  void s390_topology_setup_cpu(MachineState *ms, S390CPU *cpu, Error **errp);
diff --git a/include/hw/s390x/sclp.h b/include/hw/s390x/sclp.h
index cf1f2efae2..c49051e17e 100644
--- a/include/hw/s390x/sclp.h
+++ b/include/hw/s390x/sclp.h
@@ -112,6 +112,7 @@ typedef struct CPUEntry {
  } QEMU_PACKED CPUEntry;
  
  #define SCLP_READ_SCP_INFO_FIXED_CPU_OFFSET 128

+#define SCLP_READ_SCP_INFO_MNEST2
  typedef struct ReadInfo {
  SCCBHeader h;
  uint16_t rnmax;
diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
index 7ebd5e05b6..b8a0c02714 100644
--- a/target/s390x/cpu.h
+++ b/target/s390x/cpu.h
@@ -569,6 +569,29 @@ typedef struct SysIB_322 {
  } SysIB_322;
  QEMU_BUILD_BUG_ON(sizeof(SysIB_322) != 4096);
  
+/*

+ * Topology Magnitude fields (MAG) indicates the maximum number of
+ * topology list entries (TLE) at the corresponding nesting level.
+ */
+#define S390_TOPOLOGY_MAG  6
+#define S390_TOPOLOGY_MAG6 0
+#define S390_TOPOLOGY_MAG5 1
+#define S390_TOPOLOGY_MAG4 2
+#define S390_TOPOLOGY_MAG3 3
+#define S390_TOPOLOGY_MAG2 4
+#define S390_TOPOLOGY_MAG1 5
+/* Configuration topology */
+typedef struct SysIB_151x {
+uint8_t  reserved0[2];
+uint16_t length;
+uint8_t  mag[S390_TOPOLOGY_MAG];
+uint8_t  reserved1;
+uint8_t  mnest;
+uint32_t reserved2;
+char tle[];
+} SysIB_151x;
+QEMU_BUILD_BUG_ON(sizeof(SysIB_151x) != 16);
+
  typedef union SysIB {
  SysIB_111 sysib_111;
  SysIB_121 sysib_121;
@@ -576,9 +599,62 @@ typedef union SysIB {
  SysIB_221 sysib_221;
  SysIB_222 sysib_222;
  SysIB_322 sysib_322;
+SysIB_151x sysib_151x;
  } SysIB;
  QEMU_BUILD_BUG_ON(sizeof(SysIB) != 4096);
  
+/*

+ * CPU Topology List provided by STSI with fc=15 provides a list
+ * of two different Topology List Entries (TLE) types to specify
+ * the topology hierarchy.
+ *
+ * - Container Topology List Entry
+ *   Defines a container to contain other Topology List Entries
+ *   of any type, nested containers or CPU.
+ * - CPU Topology L

Re: [PATCH 1/2] block/meson.build: Restore alphabetical order of files

2023-09-05 Thread Philippe Mathieu-Daudé


On 5/9/23 15:06, Kevin Wolf wrote:

When commit 5e5733e5999 created block/meson.build, the list of
unconditionally added files was in alphabetical order. Later commits
added new files in random places. Reorder the list to be alphabetical
again. (As for ordering foo.c against foo-*.c, there are both ways used
currently; standardise on having foo.c first, even though this is
different from the original commit 5e5733e5999.)

Signed-off-by: Kevin Wolf 
---
  block/meson.build | 12 ++--
  1 file changed, 6 insertions(+), 6 deletions(-)


Reviewed-by: Philippe Mathieu-Daudé

Re: [QEMU PATCH v4 07/13] softmmu/memory: enable automatic deallocation of memory regions

2023-09-05 Thread Huang Rui

On Tue, Sep 05, 2023 at 05:17:32PM +0800, Akihiko Odaki wrote:
> On 2023/09/05 18:07, Huang Rui wrote:
> > On Thu, Aug 31, 2023 at 06:10:08PM +0800, Akihiko Odaki wrote:
> >> On 2023/08/31 18:32, Huang Rui wrote:
> >>> From: Xenia Ragiadakou 
> >>>
> >>> When the memory region has a different life-cycle from that of her parent,
> >>> could be automatically released, once has been unparent and once all of 
> >>> her
> >>> references have gone away, via the object's free callback.
> >>>
> >>> However, currently, references to the memory region are held by its owner
> >>> without first incrementing the memory region object's reference count.
> >>> As a result, the automatic deallocation of the object, not taking into
> >>> account those references, results in use-after-free memory corruption.
> >>>
> >>> This patch increases the reference count of the memory region object on
> >>> each memory_region_ref() and decreases it on each memory_region_unref().
> >>>
> >>> Signed-off-by: Xenia Ragiadakou 
> >>> Signed-off-by: Huang Rui 
> >>> ---
> >>>
> >>> New patch
> >>>
> >>>softmmu/memory.c | 19 +--
> >>>1 file changed, 17 insertions(+), 2 deletions(-)
> >>>
> >>> diff --git a/softmmu/memory.c b/softmmu/memory.c
> >>> index 7d9494ce70..0fdd5eebf9 100644
> >>> --- a/softmmu/memory.c
> >>> +++ b/softmmu/memory.c
> >>> @@ -1797,6 +1797,15 @@ Object *memory_region_owner(MemoryRegion *mr)
> >>>
> >>>void memory_region_ref(MemoryRegion *mr)
> >>>{
> >>> +if (!mr) {
> >>> +return;
> >>> +}
> >>> +
> >>> +/* Obtain a reference to prevent the memory region object
> >>> + * from being released under our feet.
> >>> + */
> >>> +object_ref(OBJECT(mr));
> >>> +
> >>>/* MMIO callbacks most likely will access data that belongs
> >>> * to the owner, hence the need to ref/unref the owner whenever
> >>> * the memory region is in use.
> >>> @@ -1807,16 +1816,22 @@ void memory_region_ref(MemoryRegion *mr)
> >>> * Memory regions without an owner are supposed to never go away;
> >>> * we do not ref/unref them because it slows down DMA sensibly.
> >>> */
> >>
> >> The collapsed comment says:
> >>   > The memory region is a child of its owner.  As long as the
> >>   > owner doesn't call unparent itself on the memory region,
> >>   > ref-ing the owner will also keep the memory region alive.
> >>   > Memory regions without an owner are supposed to never go away;
> >>   > we do not ref/unref them because it slows down DMA sensibly.
> >>
> >> It contradicts with this patch.
> > 
> > The reason that we modify it is because we would like to address the memory
> > leak issue in the original codes. Please see below, we find the memory
> > region will be crashed once we free(unref) the simple resource, because the
> > region will be freed in object_finalize() after unparent and the ref count
> > is to 0. Then the VM will be crashed with this.
> > 
> > In virgl_cmd_resource_map_blob():
> >  memory_region_init_ram_device_ptr(res->region, OBJECT(g), NULL, size, 
> > data);
> >  OBJECT(res->region)->free = g_free;
> >  memory_region_add_subregion(&b->hostmem, mblob.offset, res->region);
> >  memory_region_set_enabled(res->region, true);
> > 
> > In virtio_gpu_virgl_resource_unmap():
> >  memory_region_set_enabled(res->region, false);
> >  memory_region_del_subregion(&b->hostmem, res->region);
> >  object_unparent(OBJECT(res->region));
> >  res->region = NULL;
> > 
> > I spent a bit more time to understand your point, do you want me to update
> > corresponding comments or you have some concern about this change?
> 
> As the comment says ref-ing memory regions without an owner will slow 
> down DMA, you should avoid that. More concretely, you should check 
> mr->owner before doing object_ref(OBJECT(mr)).
> 

I get it, thanks to point this exactly. Will update it in V5.

Thanks,
Ray

Re: [PATCH 4/5] cxl/type3: add an optional mhd validation function for memory accesses

2023-09-05 Thread Gregory Price

On Mon, Sep 04, 2023 at 06:02:14PM +0100, Jonathan Cameron wrote:
> On Thu, 31 Aug 2023 21:29:13 -0400
> Gregory Price  wrote:
> 
> > When memory accesses are made, some MHSLD's would validate the address
> > is within the scope of allocated sections.  To do this, the base device
> > must call an optional function set by inherited devices.
> > 
> > Signed-off-by: Gregory Price 
> 
> This sort of callback addition can be done via class initialization.
> E.g. get_lsa_size()
> https://elixir.bootlin.com/qemu/latest/source/hw/mem/cxl_type3.c#L1494
> as the callback is the same for all instances of the class which
> in next patch is CXLNiagraClass where you already set the
> PCIClass callbacks in cxl_niagara_class_init()
> 
> You can then use something like:
> CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d);
> cvc->mhd_access_valid(ct3d, dpa_offset, size);
> 
> Jonathan
> 

Will make this change along with a few cleanups suggested elsewhere and
a few boneheaded mistakes.

~Gregory

Re: [PATCH 2/2] block: Make more BlockDriver definitions static

2023-09-05 Thread Philippe Mathieu-Daudé


On 5/9/23 15:06, Kevin Wolf wrote:

Most block driver implementations don't have any reason for their
BlockDriver to be public. The only exceptions are bdrv_file, bdrv_raw
and bdrv_qcow2, which are actually used in other source files.

Make all other BlockDriver definitions static if they aren't yet.

Signed-off-by: Kevin Wolf 
---
  block/copy-before-write.c | 2 +-
  block/preallocate.c   | 2 +-
  block/snapshot-access.c   | 2 +-
  3 files changed, 3 insertions(+), 3 deletions(-)


Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH 3/5] cxl/type3: Expose ct3 functions so that inheriters can call them

2023-09-05 Thread Gregory Price

On Tue, Sep 05, 2023 at 10:59:15AM +0200, Philippe Mathieu-Daudé wrote:
> Hi Gregory,
> 
> On 1/9/23 03:29, Gregory Price wrote:
> > For devices built on top of ct3, we need the init, realize, and
> > exit functions exposed to correctly start up and tear down.
> 
> You shouldn't need this. Your device class can inherit from
> the CT3 base class by setting its .parent to TYPE_CXL_TYPE3:
> 

realized this after the fact when testing and patching, forgot to remove
it, good eye.  Still learning QEMU internals.

> 
> You can see some documentation about QOM here:
> https://qemu-project.gitlab.io/qemu/devel/qom.html
> But still you'll have to look at examples in the tree.
> 
> 

Thanks!
Gregory

Re: [PATCH v2 1/4] softmmu: Support concurrent bounce buffers

2023-09-05 Thread Peter Xu

On Tue, Sep 05, 2023 at 09:38:39AM +0200, Mattias Nissler wrote:
> It would be nice to use a property on the device that originates the
> DMA operation to configure this. However, I don't see how to do this
> in a reasonable way without bigger changes: A typical call path is
> pci_dma_map -> dma_memory_map -> address_space_map. While pci_dma_map
> has a PCIDevice*, address_space_map only receives the AddressSpace*.
> So, we'd probably have to pass through a new QObject parameter to
> address_space_map that indicates the originator and pass that through?
> Or is there a better alternative to supply context information to
> address_space map? Let me know if any of these approaches sound
> appropriate and I'll be happy to explore them further.

Should be possible to do. The pci address space is not shared but
per-device by default (even if there is no vIOMMU intervention).  See
do_pci_register_device():

address_space_init(&pci_dev->bus_master_as,
   &pci_dev->bus_master_container_region, pci_dev->name);

Thanks,

-- 
Peter Xu

Re: [PATCH 0/5 v2] CXL: SK hynix Niagara MHSLD Device

2023-09-05 Thread Gregory Price

On Tue, Sep 05, 2023 at 11:04:56AM +0200, Philippe Mathieu-Daudé wrote:
> On 1/9/23 03:29, Gregory Price wrote:
> > v2:
> > - 5 patch series, first 4 are pull-aheads that can be merged separately
> > 
> > This patch set includes an emulation of the SK hynix Niagara MHSLD
> > platform with custom CCI commands that allow for isolation of memory
> > blocks between attached hosts.
> > 
> 
> Being at commit 17780edd81 I can't apply this series:
> 
> Applying: cxl/mailbox: move mailbox effect definitions to a header
> error: patch failed: hw/cxl/cxl-mailbox-utils.c:12
> error: hw/cxl/cxl-mailbox-utils.c: patch does not apply
> Patch failed at 0001 cxl/mailbox: move mailbox effect definitions to a
> header
> 
> On what is it based?

This is based on jonathan's branch here:
https://gitlab.com/jic23/qemu/-/tree/cxl-2023-08-30

I try to keep things on top of his working branch to avoid any rebasing
issues.  I should have included that with the cover letter, my bad.

~Gregory

Re: mips system emulation failure with virtio

2023-09-05 Thread Alex Bennée



Richard Purdie  writes:

> With qemu 8.1.0 we see boot hangs fox x86-64 targets. 
>
> These are fixed by 0d58c660689f6da1e3feff8a997014003d928b3b (softmmu:
> Use async_run_on_cpu in tcg_commit) but if I add that commit, mips and
> mips64 break, hanging at boot unable to find a rootfs. 

(Widen CC list)

>
> We use virtio for network and disk and both of those change in the
> bootlog from messages like:
>
> [1.726118] virtio-pci :00:13.0: enabling device ( -> 0003)
> [1.728864] virtio-pci :00:14.0: enabling device ( -> 0003)
> [1.729948] virtio-pci :00:15.0: enabling device ( -> 0003)
> ...
> [2.162148] virtio_blk virtio2: 1/0/0 default/read/poll queues
> [2.168311] virtio_blk virtio2: [vda] 1184242 512-byte logical 
>
> to:
>
> [1.777051] virtio-pci :00:13.0: enabling device ( -> 0003)
> [1.779822] virtio-pci :00:14.0: enabling device ( -> 0003)
> [1.780926] virtio-pci :00:15.0: enabling device ( -> 0003)
> ...
> [1.894852] virtio_rng: probe of virtio1 failed with error -28
> ...
> [2.063553] virtio_blk virtio2: 1/0/0 default/read/poll queues
> [2.064260] virtio_blk: probe of virtio2 failed with error -28
> [2.069080] virtio_net: probe of virtio0 failed with error -28
>
>
> i.e. the virtio drivers no longer work.

Interesting, as you say this seems to be VirtIO specific as the baseline
tests (using IDE) work fine:

  ➜  ./tests/venv/bin/avocado run 
./tests/avocado/tuxrun_baselines.py:test_mips64
  JOB ID : 71f3e3b7080164b78ef1c8c1bb6bc880932d8c9b
  JOB LOG: 
/home/alex/avocado/job-results/job-2023-09-05T15.01-71f3e3b/job.log
   (1/2) ./tests/avocado/tuxrun_baselines.py:TuxRunBaselineTest.test_mips64: 
PASS (12.19 s)
   (2/2) ./tests/avocado/tuxrun_baselines.py:TuxRunBaselineTest.test_mips64el: 
PASS (11.78 s)
  RESULTS: PASS 2 | ERROR 0 | FAIL 0 | SKIP 0 | WARN 0 | INTERRUPT 0 | 
CANCEL 0
  JOB TIME   : 24.79 s

> I tested with current qemu master
> (17780edd81d27fcfdb7a802efc870a99788bd2fc) and mips is still broken
> there.
>
> Is this issue known about?

Could you raise a bug at:

  https://gitlab.com/qemu-project/qemu/-/issues

I'm curious why MIPS VirtIO is affected but nothing else is...

>
> Cheers,
>
> Richard


-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro

Re: [PATCH v3 07/20] virtio: add vhost-user-base and a generic vhost-user-device

2023-09-05 Thread Matias Ezequiel Vara Larsen

On Mon, Jul 10, 2023 at 04:35:09PM +0100, Alex Bennée wrote:
> In theory we shouldn't need to repeat so much boilerplate to support
> vhost-user backends. This provides a generic vhost-user-base QOM
> object and a derived vhost-user-device for which the user needs to
> provide the few bits of information that aren't currently provided by
> the vhost-user protocol. This should provide a baseline implementation
> from which the other vhost-user stub can specialise.
> 
> Signed-off-by: Alex Bennée 
> 
> ---
> v2
>   - split into vub and vud
> ---
>  include/hw/virtio/vhost-user-device.h |  45 
>  hw/virtio/vhost-user-device.c | 324 ++
>  hw/virtio/meson.build |   2 +
>  3 files changed, 371 insertions(+)
>  create mode 100644 include/hw/virtio/vhost-user-device.h
>  create mode 100644 hw/virtio/vhost-user-device.c
> 
> diff --git a/include/hw/virtio/vhost-user-device.h 
> b/include/hw/virtio/vhost-user-device.h
> new file mode 100644
> index 00..9105011e25
> --- /dev/null
> +++ b/include/hw/virtio/vhost-user-device.h
> @@ -0,0 +1,45 @@
> +/*
> + * Vhost-user generic virtio device
> + *
> + * Copyright (c) 2023 Linaro Ltd
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +#ifndef QEMU_VHOST_USER_DEVICE_H
> +#define QEMU_VHOST_USER_DEVICE_H
> +
> +#include "hw/virtio/vhost.h"
> +#include "hw/virtio/vhost-user.h"
> +
> +#define TYPE_VHOST_USER_BASE "vhost-user-base"
> +
> +OBJECT_DECLARE_TYPE(VHostUserBase, VHostUserBaseClass, VHOST_USER_BASE)
> +
> +struct VHostUserBase {
> +VirtIODevice parent;
> +/* Properties */
> +CharBackend chardev;
> +uint16_t virtio_id;
> +uint32_t num_vqs;
> +/* State tracking */
> +VhostUserState vhost_user;
> +struct vhost_virtqueue *vhost_vq;
> +struct vhost_dev vhost_dev;
> +GPtrArray *vqs;
> +bool connected;
> +};
> +
> +/* needed so we can use the base realize after specialisation
> +   tweaks */
> +struct VHostUserBaseClass {
> +/*< private >*/
> +VirtioDeviceClass parent_class;
> +/*< public >*/
> +DeviceRealize parent_realize;
> +};
> +
> +/* shared for the benefit of the derived pci class */
> +#define TYPE_VHOST_USER_DEVICE "vhost-user-device"
> +
> +#endif /* QEMU_VHOST_USER_DEVICE_H */
> diff --git a/hw/virtio/vhost-user-device.c b/hw/virtio/vhost-user-device.c
> new file mode 100644
> index 00..b0239fa033
> --- /dev/null
> +++ b/hw/virtio/vhost-user-device.c
> @@ -0,0 +1,324 @@
> +/*
> + * Generic vhost-user stub. This can be used to connect to any
> + * vhost-user backend. All configuration details must be handled by
> + * the vhost-user daemon itself
> + *
> + * Copyright (c) 2023 Linaro Ltd
> + * Author: Alex Bennée 
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qapi/error.h"
> +#include "hw/qdev-properties.h"
> +#include "hw/virtio/virtio-bus.h"
> +#include "hw/virtio/vhost-user-device.h"
> +#include "qemu/error-report.h"
> +
> +static void vub_start(VirtIODevice *vdev)
> +{
> +BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
> +VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
> +VHostUserBase *vub = VHOST_USER_BASE(vdev);
> +int ret, i;
> +
> +if (!k->set_guest_notifiers) {
> +error_report("binding does not support guest notifiers");
> +return;
> +}
> +
> +ret = vhost_dev_enable_notifiers(&vub->vhost_dev, vdev);
> +if (ret < 0) {
> +error_report("Error enabling host notifiers: %d", -ret);
> +return;
> +}
> +
> +ret = k->set_guest_notifiers(qbus->parent, vub->vhost_dev.nvqs, true);
> +if (ret < 0) {
> +error_report("Error binding guest notifier: %d", -ret);
> +goto err_host_notifiers;
> +}
> +
> +vub->vhost_dev.acked_features = vdev->guest_features;
> +
> +ret = vhost_dev_start(&vub->vhost_dev, vdev, true);
> +if (ret < 0) {
> +error_report("Error starting vhost-user-device: %d", -ret);
> +goto err_guest_notifiers;
> +}
> +
> +/*
> + * guest_notifier_mask/pending not used yet, so just unmask
> + * everything here. virtio-pci will do the right thing by
> + * enabling/disabling irqfd.
> + */
> +for (i = 0; i < vub->vhost_dev.nvqs; i++) {
> +vhost_virtqueue_mask(&vub->vhost_dev, vdev, i, false);
> +}
> +
> +return;
> +
> +err_guest_notifiers:
> +k->set_guest_notifiers(qbus->parent, vub->vhost_dev.nvqs, false);
> +err_host_notifiers:
> +vhost_dev_disable_notifiers(&vub->vhost_dev, vdev);
> +}
> +
> +static void vub_stop(VirtIODevice *vdev)
> +{
> +VHostUserBase *vub = VHOST_USER_BASE(vdev);
> +BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
> +VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
> +int ret;
> +
> +if (!k->set_guest_notifiers) {
> +return;
> +}
> +
> +vhost_dev_stop(&vub->vhost_dev, vdev, true);
> +
> +ret = k->set_guest_noti

Re: [PATCH v6 04/17] nbd: Prepare for 64-bit request effect lengths

2023-09-05 Thread Eric Blake

On Mon, Sep 04, 2023 at 07:15:04PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> On 29.08.23 20:58, Eric Blake wrote:
> > Widen the length field of NBDRequest to 64-bits, although we can
> > assert that all current uses are still under 32 bits: either because
> > of NBD_MAX_BUFFER_SIZE which is even smaller (and where size_t can
> > still be appropriate, even on 32-bit platforms), or because nothing
> > ever puts us into NBD_MODE_EXTENDED yet (and while future patches will
> > allow larger transactions, the lengths in play here are still capped
> > at 32-bit).  There are no semantic changes, other than a typo fix in a
> > couple of error messages.
> > 
> > Signed-off-by: Eric Blake 
> > ---
> > 
> > v6: fix sign extension bug
> > 
> > v5: tweak commit message, adjust a few more spots [Vladimir]
> > 
> > v4: split off enum changes to earlier patches [Vladimir]
> 
> [..]
> 
> > --- a/nbd/server.c
> > +++ b/nbd/server.c
> > @@ -1165,7 +1165,7 @@ static int nbd_negotiate_options(NBDClient *client, 
> > Error **errp)
> >   client->optlen = length;
> > 
> >   if (length > NBD_MAX_BUFFER_SIZE) {
> > -error_setg(errp, "len (%" PRIu32" ) is larger than max len 
> > (%u)",
> > +error_setg(errp, "len (%" PRIu32 ") is larger than max len 
> > (%u)",
> >  length, NBD_MAX_BUFFER_SIZE);
> >   return -EINVAL;
> >   }
> > @@ -1441,7 +1441,7 @@ static int coroutine_fn nbd_receive_request(NBDClient 
> > *client, NBDRequest *reque
> >   request->type   = lduw_be_p(buf + 6);
> >   request->cookie = ldq_be_p(buf + 8);
> >   request->from   = ldq_be_p(buf + 16);
> > -request->len= ldl_be_p(buf + 24);
> > +request->len= (uint32_t)ldl_be_p(buf + 24); /* widen 32 to 64 bits 
> > */
> 
> should it be "(uint64_t)" ?

No. ldl_be_p() returns an int.  Widening int to 64 bits sign-extends;
we have to first make it unsigned (by casting to uint32_t) so that we
then have an unsigned value that widens by zero-extension.

Maybe we should fix ldl_bd_p() and friends to favor unsigned values.
'git grep ldul_be' has zero hits; even though Peter just touched the
docs patch claiming it exists:

https://lists.gnu.org/archive/html/qemu-devel/2023-09/msg00645.html


> 
> > 
> >   trace_nbd_receive_request(magic, request->flags, request->type,
> > request->from, request->len);
> > @@ -1899,7 +1899,7 @@ static int coroutine_fn 
> > nbd_co_send_simple_reply(NBDClient *client,
> >NBDRequest *request,
> >uint32_t error,
> >void *data,
> > - size_t len,
> > + uint64_t len,
> >Error **errp)
> >   {
> >   NBDSimpleReply reply;
> > @@ -1910,6 +1910,7 @@ static int coroutine_fn 
> > nbd_co_send_simple_reply(NBDClient *client,
> >   };
> > 
> >   assert(!len || !nbd_err);
> > +assert(len <= NBD_MAX_STRING_SIZE);
> 
> NBD_MAX_BUFFER_SIZE ?

No. MAX_STRING_SIZE is 4k, MAX_BUFFER_SIZE is 32M.  The smaller size
is the correct bound here (an error message has to be transmitted as a
single string, and the recipient does not expect more than a 4k error
message).

> 
> with that fixed:
> Reviewed-by: Vladimir Sementsov-Ogievskiy 
> 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.
Virtualization:  qemu.org | libguestfs.org

Re: [PATCH] docs/devel/loads-stores: Fix git grep regexes

2023-09-05 Thread Eric Blake

On Mon, Sep 04, 2023 at 05:17:03PM +0100, Peter Maydell wrote:
> The loads-and-stores documentation includes git grep regexes to find
> occurrences of the various functions.  Some of these regexes have
> errors, typically failing to escape the '?', '(' and ')' when they
> should be metacharacters (since these are POSIX basic REs). We also
> weren't consistent about whether to have a ':' on the end of the
> line introducing the list of regexes in each section.
> 
> Fix the errors.
> 
> The following shell rune will complain about any REs in the
> file which don't have any matches in the codebase:
>  for re in $(sed -ne 's/ - ``\(\\<.*\)``/\1/p' docs/devel/loads-stores.rst); 
> do git grep -q "$re" || echo "no matches for re $re"; done
> 
> Signed-off-by: Peter Maydell 
> ---
>  docs/devel/loads-stores.rst | 40 ++---
>  1 file changed, 20 insertions(+), 20 deletions(-)
> 
> diff --git a/docs/devel/loads-stores.rst b/docs/devel/loads-stores.rst
> index dab6dfa0acc..ec627aa9c06 100644
> --- a/docs/devel/loads-stores.rst
> +++ b/docs/devel/loads-stores.rst
> @@ -63,12 +63,12 @@ which stores ``val`` to ``ptr`` as an ``{endian}`` order 
> value
>  of size ``sz`` bytes.
>  
>  
> -Regexes for git grep
> +Regexes for git grep:
>   - ``\``

This claims that ldul_be_p() is a valid function name (which I would
expect to take a pointer to a 32-bit integer and produce an unsigned
result suitable for assigning into a 64-bit value).  But it does not
exist, and the fact that ldl_be_p() returns 'int' means I had to add a
cast to avoid unintended sign-extension:

https://lists.gnu.org/archive/html/qemu-devel/2023-08/msg05234.html

cast added in relation to v5 patch at
https://lists.gnu.org/archive/html/qemu-devel/2023-08/msg04923.html

>   - ``\``
>   - ``\``
> - - ``\``
> - - ``\``
> + - ``\``
> + - ``\``

So as long as we are touching the docs, is it worth considering the
larger task of auditing whether it is appropriate to have all of the
ld*_ functions return unsigned values, and/or implement ldu/lds
variants that guarantee zero or sign extension for widening 32-bit
values when assigning to 64-bit destinations?

>  
>  ``cpu_{ld,st}*_mmu``
>  
> @@ -121,8 +121,8 @@ store: ``cpu_st{size}{end}_mmu(env, ptr, val, oi, 
> retaddr)``
>   - ``_le`` : little endian
>  
>  Regexes for git grep:
> - - ``\``

As a counterpoint, the cpu_ldl_* functions already return uint32_t.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.
Virtualization:  qemu.org | libguestfs.org

Re: [PATCH 0/8] move softmmu options processing from os-posix.c to vl.c

2023-09-05 Thread Eric Blake

On Fri, Sep 01, 2023 at 11:44:42PM +0200, Paolo Bonzini wrote:
> Queued, thanks.
> 
> Paolo

I also had these on my plate for an upcoming NBD pull request; I'm
fine whichever tree they show up through.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.
Virtualization:  qemu.org | libguestfs.org

Re: CXL Namespaces of ACPI disappearing in Qemu demo

2023-09-05 Thread Jonathan Cameron via

On Tue, 5 Sep 2023 18:45:02 +0800
Yuquan Wang  wrote:

> Hi, Jonathan
> On 2023-09-04 20:43,  jonathan.cameron wrote:
> > 
> > At the system design level, MMIO space of Root complex register space via 
> > RCRB
> > does not map in a similar fashion to PCIE MMIO space (which is handled via
> > address decoding in the PCIE fabric). It is much more similar to MMIO for 
> > platform
> > devices - as such the implementation handles in like a platform device 
> > (well 16 of
> > them which seemed enough for any sane usecase).
> > 
> >   
> 
> Oh,thanks! According to above, therefore, the core factor is the 
> implementation of RCRB.
> 
> > 
> > So in theory we could make some space for the CXL root bridge RCRB registers
> > but it would make various generic paths more complex.  In a real system
> > those registers are likely to be far from the PCI MMIO space anyway so the
> > way it's modeled is probably more realistic than pushing the RCRB into the
> > existing allocation.
> >   
> 
> Here implies that all CXL root bridge will use RCRB registers.
> 
> From Table 8-17 and Figure 9-14 in CXL3.0 specification, I understood that 
> only RCH DP &
> RCD UP will use RCRBs, and CXL host bridges VH mode will use other way to 
> realize
> the CHBCR. I had tried to find more explanation in CXL spec, but I haven't 
> found. Hence 
> this is why I am confused.

Ah. That distinction is a bit messy.  Both an RCRB and CHBCR (CXL Host Bridge 
Component
Registers) are similar in the sense that they are mapped in host memory space. 
As I understand it the distinction is more about the format / contents of that 
memory
than how you access them. As an aside, they are described by a static ACPI 
table,
so they can't be in the MMIO space used for BARs etc.

Jonathan


> 
> Many thanks
> Yuquan

Re: [PATCH v6 06/17] nbd/server: Support a request payload

2023-09-05 Thread Vladimir Sementsov-Ogievskiy


On 29.08.23 20:58, Eric Blake wrote:

Upcoming additions to support NBD 64-bit effect lengths allow for the
possibility to distinguish between payload length (capped at 32M) and
effect length (64 bits, although we generally assume 63 bits because
of off_t limitations).  Without that extension, only the NBD_CMD_WRITE
request has a payload; but with the extension, it makes sense to allow
at least NBD_CMD_BLOCK_STATUS to have both a payload and effect length
in a future patch (where the payload is a limited-size struct that in
turn gives the real effect length as well as a subset of known ids for
which status is requested).  Other future NBD commands may also have a
request payload, so the 64-bit extension introduces a new
NBD_CMD_FLAG_PAYLOAD_LEN that distinguishes between whether the header
length is a payload length or an effect length, rather than
hard-coding the decision based on the command; although a client
should never send a command with a payload without the negotiation
phase proving such extension is available, we are now able to
gracefully fail unexpected client payloads while keeping the
connection alive.  Note that we do not support the payload version of
BLOCK_STATUS yet.

Signed-off-by: Eric Blake 
---

v5: retitled from v4 13/24, rewrite on top of previous patch's switch
statement [Vladimir]

v4: less indentation on several 'if's [Vladimir]
---
  nbd/server.c | 33 -
  nbd/trace-events |  1 +
  2 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/nbd/server.c b/nbd/server.c
index dd3ab59224c..adcfcdeacb7 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -2334,7 +2334,8 @@ static int coroutine_fn 
nbd_co_receive_request(NBDRequestData *req,
 Error **errp)
  {
  NBDClient *client = req->client;
-bool check_length = false;
+bool extended_with_payload;
+bool check_length;
  bool check_rofs = false;
  bool allocate_buffer = false;
  unsigned payload_len = 0;
@@ -2350,6 +2351,9 @@ static int coroutine_fn 
nbd_co_receive_request(NBDRequestData *req,

  trace_nbd_co_receive_request_decode_type(request->cookie, request->type,
   nbd_cmd_lookup(request->type));
+check_length = extended_with_payload = client->mode >= NBD_MODE_EXTENDED &&
+request->flags & NBD_CMD_FLAG_PAYLOAD_LEN;
+
  switch (request->type) {
  case NBD_CMD_DISC:
  /* Special case: we're going to disconnect without a reply,
@@ -2366,6 +2370,14 @@ static int coroutine_fn 
nbd_co_receive_request(NBDRequestData *req,
  break;

  case NBD_CMD_WRITE:
+if (client->mode >= NBD_MODE_EXTENDED) {
+if (!extended_with_payload) {
+/* The client is noncompliant. Trace it, but proceed. */
+trace_nbd_co_receive_ext_payload_compliance(request->from,
+request->len);
+}
+valid_flags |= NBD_CMD_FLAG_PAYLOAD_LEN;
+}
  payload_len = request->len;
  check_length = true;
  allocate_buffer = true;
@@ -2407,6 +2419,15 @@ static int coroutine_fn 
nbd_co_receive_request(NBDRequestData *req,


more context:

/* Payload and buffer handling. */
if (!payload_len) {
req->complete = true;
}
if (check_length && request->len > NBD_MAX_BUFFER_SIZE) {
/* READ, WRITE, CACHE */
error_setg(errp, "len (%" PRIu64 ") is larger than max len (%u)",
   request->len, NBD_MAX_BUFFER_SIZE);
return -EINVAL;
}



+if (extended_with_payload && !allocate_buffer) {


it's correct but strange, as allocate_buffer is (READ or WRITE), and READ is 
totally unrelated here.


+/*
+ * For now, we don't support payloads on other commands; but
+ * we can keep the connection alive by ignoring the payload.


Was it in specification, that we can safely ignore unknown payload for known 
commands?


+ */
+assert(request->type != NBD_CMD_WRITE);
+payload_len = request->len;


what I don't like here, is that we already set req->complete to true for this 
request, when payload_len was zero.

Probably, for extended_with_payload requests we should initialize payload_len 
at top of the function?


+request->len = 0;
+}
  if (allocate_buffer) {>   /* READ, WRITE */
  req->data = blk_try_blockalign(client->exp->common.blk,
@@ -2417,10 +2438,12 @@ static int coroutine_fn 
nbd_co_receive_request(NBDRequestData *req,
  }
  }
  if (payload_len) {
-/* WRITE */
-assert(req->data);
-ret = nbd_read(client->ioc, req->data, payload_len,
-   "CMD_WRITE data", errp);
+if (req->data) {


and req->data is actually (READ or WRITE) ( == allocated_buffer) as well.

I'd prefer here "if (request->type == NBD_CMD_WRITE)" here


+ret =

Re: [PATCH v6 04/17] nbd: Prepare for 64-bit request effect lengths

2023-09-05 Thread Vladimir Sementsov-Ogievskiy


On 05.09.23 17:24, Eric Blake wrote:

On Mon, Sep 04, 2023 at 07:15:04PM +0300, Vladimir Sementsov-Ogievskiy wrote:

On 29.08.23 20:58, Eric Blake wrote:

Widen the length field of NBDRequest to 64-bits, although we can
assert that all current uses are still under 32 bits: either because
of NBD_MAX_BUFFER_SIZE which is even smaller (and where size_t can
still be appropriate, even on 32-bit platforms), or because nothing
ever puts us into NBD_MODE_EXTENDED yet (and while future patches will
allow larger transactions, the lengths in play here are still capped
at 32-bit).  There are no semantic changes, other than a typo fix in a
couple of error messages.

Signed-off-by: Eric Blake 
---

v6: fix sign extension bug

v5: tweak commit message, adjust a few more spots [Vladimir]

v4: split off enum changes to earlier patches [Vladimir]


[..]


--- a/nbd/server.c
+++ b/nbd/server.c
@@ -1165,7 +1165,7 @@ static int nbd_negotiate_options(NBDClient *client, Error 
**errp)
   client->optlen = length;

   if (length > NBD_MAX_BUFFER_SIZE) {
-error_setg(errp, "len (%" PRIu32" ) is larger than max len (%u)",
+error_setg(errp, "len (%" PRIu32 ") is larger than max len (%u)",
  length, NBD_MAX_BUFFER_SIZE);
   return -EINVAL;
   }
@@ -1441,7 +1441,7 @@ static int coroutine_fn nbd_receive_request(NBDClient 
*client, NBDRequest *reque
   request->type   = lduw_be_p(buf + 6);
   request->cookie = ldq_be_p(buf + 8);
   request->from   = ldq_be_p(buf + 16);
-request->len= ldl_be_p(buf + 24);
+request->len= (uint32_t)ldl_be_p(buf + 24); /* widen 32 to 64 bits */


should it be "(uint64_t)" ?


No. ldl_be_p() returns an int.  Widening int to 64 bits sign-extends;
we have to first make it unsigned (by casting to uint32_t) so that we
then have an unsigned value that widens by zero-extension.

Maybe we should fix ldl_bd_p() and friends to favor unsigned values.
'git grep ldul_be' has zero hits; even though Peter just touched the
docs patch claiming it exists:

https://lists.gnu.org/archive/html/qemu-devel/2023-09/msg00645.html






   trace_nbd_receive_request(magic, request->flags, request->type,
 request->from, request->len);
@@ -1899,7 +1899,7 @@ static int coroutine_fn 
nbd_co_send_simple_reply(NBDClient *client,
NBDRequest *request,
uint32_t error,
void *data,
- size_t len,
+ uint64_t len,
Error **errp)
   {
   NBDSimpleReply reply;
@@ -1910,6 +1910,7 @@ static int coroutine_fn 
nbd_co_send_simple_reply(NBDClient *client,
   };

   assert(!len || !nbd_err);
+assert(len <= NBD_MAX_STRING_SIZE);


NBD_MAX_BUFFER_SIZE ?


No. MAX_STRING_SIZE is 4k, MAX_BUFFER_SIZE is 32M.  The smaller size
is the correct bound here (an error message has to be transmitted as a
single string, and the recipient does not expect more than a 4k error
message).



I miss something.. Why it's an error message? It's may be a simple reply for 
CMD_READ, where len is request->len


--
Best regards,
Vladimir

Re: 'check-avocado' fails after c03f57fd5b ("Revert "tests: Use separate ...")

2023-09-05 Thread Philippe Mathieu-Daudé


On 5/9/23 15:13, Daniel Henrique Barboza wrote:

Hi,

I managed to work around it. I'll post here the debugs for future 
reference.



- I got suspicious after the above command failure, and noticed that 
'avocado' didn't work

even outside of the QEMU tree:


[danielhb@grind ~]$ avocado --help
Traceback (most recent call last):
   File "/usr/bin/avocado", line 33, in 
     sys.exit(load_entry_point('avocado-framework==92.0', 


92.0 should be fine...

python/setup.cfg:37:avocado-framework >= 90.0
python/tests/minreqs.txt:26:avocado-framework==90.0
pythondeps.toml:30:# avocado-framework, for example right now the limit 
is 92.x.
pythondeps.toml:31:avocado-framework = { accepted = "(>=88.1, <93.0)", 
installed = "88.1", canary = "avocado" }


- Turns out that I had 2 avocado versions installed: one from F38 and 
other from pip.

If I remove the 'pip' version I got a different error:

  (01/13) tests/avocado/empty_cpu_model.py:EmptyCPUModel.test: STARTED
  (06/13) 
tests/avocado/tuxrun_baselines.py:TuxRunBaselineTest.test_riscv64: 
ERROR: Test.__init__() got an unexpected keyword argument 
'run.results_dir' (0.04 s)

  (...)
  (01/13) tests/avocado/empty_cpu_model.py:EmptyCPUModel.test: ERROR: 
Test.__init__() got an unexpected keyword argument 'run.results_dir' 
(0.04 s)

  (...)

- Which seems to be related to a known bug according to:

https://avocado-framework.readthedocs.io/en/101.0/releases/100_1.html


In the end I don't need 'avocado' outside of testing QEMU, so my 
solution was to
remove all avocado packages from the system and let QEMU install 
whatever it is
needed inside pyvenv. 'check-avocado' now works in 'master'. I am still 
unsure
why this particular patch triggered all this problem here, but I don't 
believe
this is worth pursuing unless other people starts to see problems. For 
now we

can leave it as is IMO.

[PATCH 2/2] virtio: Drop out of coroutine context in virtio_load()

2023-09-05 Thread Kevin Wolf

virtio_load() as a whole should run in coroutine context because it
reads from the migration stream and we don't want this to block.

However, it calls virtio_set_features_nocheck() and devices don't
expect their .set_features callback to run in a coroutine and therefore
call functions that may not be called in coroutine context. To fix this,
drop out of coroutine context for calling virtio_set_features_nocheck().

Without this fix, the following crash was reported:

  #0  __pthread_kill_implementation (threadid=, 
signo=signo@entry=6, no_tid=no_tid@entry=0) at pthread_kill.c:44
  #1  0x7efc738c05d3 in __pthread_kill_internal (signo=6, 
threadid=) at pthread_kill.c:78
  #2  0x7efc73873d26 in __GI_raise (sig=sig@entry=6) at 
../sysdeps/posix/raise.c:26
  #3  0x7efc738477f3 in __GI_abort () at abort.c:79
  #4  0x7efc7384771b in __assert_fail_base (fmt=0x7efc739dbcb8 "", 
assertion=assertion@entry=0x560aebfbf5cf "!qemu_in_coroutine()",
 file=file@entry=0x560aebfcd2d4 "../block/graph-lock.c", 
line=line@entry=275, function=function@entry=0x560aebfcd34d "void 
bdrv_graph_rdlock_main_loop(void)") at assert.c:92
  #5  0x7efc7386ccc6 in __assert_fail (assertion=0x560aebfbf5cf 
"!qemu_in_coroutine()", file=0x560aebfcd2d4 "../block/graph-lock.c", line=275,
 function=0x560aebfcd34d "void bdrv_graph_rdlock_main_loop(void)") at 
assert.c:101
  #6  0x560aebcd8dd6 in bdrv_register_buf ()
  #7  0x560aeb97ed97 in ram_block_added.llvm ()
  #8  0x560aebb8303f in ram_block_add.llvm ()
  #9  0x560aebb834fa in qemu_ram_alloc_internal.llvm ()
  #10 0x560aebb2ac98 in vfio_region_mmap ()
  #11 0x560aebb3ea0f in vfio_bars_register ()
  #12 0x560aebb3c628 in vfio_realize ()
  #13 0x560aeb90f0c2 in pci_qdev_realize ()
  #14 0x560aebc40305 in device_set_realized ()
  #15 0x560aebc48e07 in property_set_bool.llvm ()
  #16 0x560aebc46582 in object_property_set ()
  #17 0x560aebc4cd58 in object_property_set_qobject ()
  #18 0x560aebc46ba7 in object_property_set_bool ()
  #19 0x560aeb98b3ca in qdev_device_add_from_qdict ()
  #20 0x560aebb1fbaf in virtio_net_set_features ()
  #21 0x560aebb46b51 in virtio_set_features_nocheck ()
  #22 0x560aebb47107 in virtio_load ()
  #23 0x560aeb9ae7ce in vmstate_load_state ()
  #24 0x560aeb9d2ee9 in qemu_loadvm_state_main ()
  #25 0x560aeb9d45e1 in qemu_loadvm_state ()
  #26 0x560aeb9bc32c in process_incoming_migration_co.llvm ()
  #27 0x560aebeace56 in coroutine_trampoline.llvm ()

Cc: qemu-sta...@nongnu.org
Buglink: https://issues.redhat.com/browse/RHEL-832
Signed-off-by: Kevin Wolf 
---
 hw/virtio/virtio.c | 45 -
 1 file changed, 40 insertions(+), 5 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 309038fd46..969c25f4cf 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -2825,8 +2825,9 @@ static int virtio_device_put(QEMUFile *f, void *opaque, 
size_t size,
 }
 
 /* A wrapper for use as a VMState .get function */
-static int virtio_device_get(QEMUFile *f, void *opaque, size_t size,
- const VMStateField *field)
+static int coroutine_mixed_fn
+virtio_device_get(QEMUFile *f, void *opaque, size_t size,
+  const VMStateField *field)
 {
 VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
 DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));
@@ -2853,6 +2854,39 @@ static int virtio_set_features_nocheck(VirtIODevice 
*vdev, uint64_t val)
 return bad ? -1 : 0;
 }
 
+typedef struct VirtioSetFeaturesNocheckData {
+Coroutine *co;
+VirtIODevice *vdev;
+uint64_t val;
+int ret;
+} VirtioSetFeaturesNocheckData;
+
+static void virtio_set_features_nocheck_bh(void *opaque)
+{
+VirtioSetFeaturesNocheckData *data = opaque;
+
+data->ret = virtio_set_features_nocheck(data->vdev, data->val);
+aio_co_wake(data->co);
+}
+
+static int coroutine_mixed_fn
+virtio_set_features_nocheck_maybe_co(VirtIODevice *vdev, uint64_t val)
+{
+if (qemu_in_coroutine()) {
+VirtioSetFeaturesNocheckData data = {
+.co = qemu_coroutine_self(),
+.vdev = vdev,
+.val = val,
+};
+aio_bh_schedule_oneshot(qemu_get_current_aio_context(),
+virtio_set_features_nocheck_bh, &data);
+qemu_coroutine_yield();
+return data.ret;
+} else {
+return virtio_set_features_nocheck(vdev, val);
+}
+}
+
 int virtio_set_features(VirtIODevice *vdev, uint64_t val)
 {
 int ret;
@@ -2906,7 +2940,8 @@ size_t virtio_get_config_size(const 
VirtIOConfigSizeParams *params,
 return config_size;
 }
 
-int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
+int coroutine_mixed_fn
+virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
 {
 int i, ret;
 int32_t config_len;
@@ -3023,14 +3058,14 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int 
version_i

1 2 3 >

1 - 100 of 213 matches

Mail list logo