date:20221013

From: Volker Rümelin 

It seems there is a demand [1] for low latency playback over
SPICE. Add a pcm_ops buffer_get_free function to reduce the
playback latency. The mixing engine buffer becomes a temporary
buffer.

[1] https://lists.nongnu.org/archive/html/qemu-devel/2022-01/msg01644.html

Signed-off-by: Volker Rümelin 
Reviewed-by: Marc-André Lureau 
Message-Id: <20220923183640.8314-6-vr_q...@t-online.de>
Signed-off-by: Gerd Hoffmann 
---
 audio/spiceaudio.c | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/audio/spiceaudio.c b/audio/spiceaudio.c
index a8d370fe6f31..22892a7b9d42 100644
--- a/audio/spiceaudio.c
+++ b/audio/spiceaudio.c
@@ -120,6 +120,13 @@ static void line_out_fini (HWVoiceOut *hw)
 spice_server_remove_interface (&out->sin.base);
 }
 
+static size_t line_out_get_free(HWVoiceOut *hw)
+{
+SpiceVoiceOut *out = container_of(hw, SpiceVoiceOut, hw);
+
+return audio_rate_peek_bytes(&out->rate, &hw->info);
+}
+
 static void *line_out_get_buffer(HWVoiceOut *hw, size_t *size)
 {
 SpiceVoiceOut *out = container_of(hw, SpiceVoiceOut, hw);
@@ -133,8 +140,6 @@ static void *line_out_get_buffer(HWVoiceOut *hw, size_t 
*size)
 *size = MIN((out->fsize - out->fpos) << 2, *size);
 }
 
-*size = audio_rate_get_bytes(&hw->info, &out->rate, *size);
-
 return out->frame + out->fpos;
 }
 
@@ -142,6 +147,8 @@ static size_t line_out_put_buffer(HWVoiceOut *hw, void 
*buf, size_t size)
 {
 SpiceVoiceOut *out = container_of(hw, SpiceVoiceOut, hw);
 
+audio_rate_add_bytes(&out->rate, size);
+
 if (buf) {
 assert(buf == out->frame + out->fpos && out->fpos <= out->fsize);
 out->fpos += size >> 2;
@@ -282,6 +289,7 @@ static struct audio_pcm_ops audio_callbacks = {
 .init_out = line_out_init,
 .fini_out = line_out_fini,
 .write= audio_generic_write,
+.buffer_get_free = line_out_get_free,
 .get_buffer_out = line_out_get_buffer,
 .put_buffer_out = line_out_put_buffer,
 .enable_out = line_out_enable,
-- 
2.37.3

[PULL 02/26] audio: fix GUS audio playback with out.mixing-engine=off

From: Volker Rümelin 

Fix GUS audio playback with out.mixing-engine=off.

The GUS audio device needs to know the amount of samples to
produce in advance.

To reproduce start qemu with
-parallel none -device gus,audiodev=audio0
-audiodev pa,id=audio0,out.mixing-engine=off

and start the cartoon.exe demo in a FreeDOS guest. The demo file
is available on the download page of the GUSemu32 author.

Signed-off-by: Volker Rümelin 
Acked-by: Marc-André Lureau 
Message-Id: <20220923183640.8314-2-vr_q...@t-online.de>
Signed-off-by: Gerd Hoffmann 
---
 audio/audio.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/audio/audio.c b/audio/audio.c
index 08aec51e7085..29da359b416b 100644
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -1139,7 +1139,8 @@ static void audio_run_out (AudioState *s)
 }
 
 if (sw->active) {
-sw->callback.fn(sw->callback.opaque, INT_MAX);
+sw->callback.fn(sw->callback.opaque,
+hw_free * sw->info.bytes_per_frame);
 }
 
 continue;
-- 
2.37.3

[PULL 15/26] cirrus_vga: fix potential memory overflow

From: lu zhipeng 

Signed-off-by: lu zhipeng 
Message-Id: <20220929122352.1891-1-luzhip...@cestc.cn>
Signed-off-by: Gerd Hoffmann 
---
 hw/display/cirrus_vga.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c
index 3bb6a58698c1..2577005d03ce 100644
--- a/hw/display/cirrus_vga.c
+++ b/hw/display/cirrus_vga.c
@@ -834,7 +834,7 @@ static void cirrus_bitblt_cputovideo_next(CirrusVGAState * 
s)
word alignment, so we keep them for the next line */
 /* XXX: keep alignment to speed up transfer */
 end_ptr = s->cirrus_bltbuf + s->cirrus_blt_srcpitch;
-copy_count = s->cirrus_srcptr_end - end_ptr;
+copy_count = MIN(s->cirrus_srcptr_end - end_ptr, 
CIRRUS_BLTBUFSIZE);
 memmove(s->cirrus_bltbuf, end_ptr, copy_count);
 s->cirrus_srcptr = s->cirrus_bltbuf + copy_count;
 s->cirrus_srcptr_end = s->cirrus_bltbuf + 
s->cirrus_blt_srcpitch;
-- 
2.37.3

[PULL 11/26] audio: fix sw->buf size for audio recording

From: Volker Rümelin 

The calculation of the buffer size needed to store audio samples
after resampling is wrong for audio recording. For audio recording
sw->ratio is calculated as

sw->ratio = frontend sample rate / backend sample rate.

>From this follows

frontend samples = frontend sample rate / backend sample rate
 * backend samples
frontend samples = sw->ratio * backend samples

In 2 of 3 places in the audio recording code where sw->ratio
is used in a calculation to get the number of frontend frames,
the calculation is wrong. Fix this. The 3rd formula in
audio_pcm_sw_read() is correct.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/71
Signed-off-by: Volker Rümelin 
Acked-by: Marc-André Lureau 
Message-Id: <20220923183640.8314-11-vr_q...@t-online.de>
Signed-off-by: Gerd Hoffmann 
---
 audio/audio_template.h | 4 
 audio/audio.c  | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/audio/audio_template.h b/audio/audio_template.h
index 98ab557684d8..720a32e57e7d 100644
--- a/audio/audio_template.h
+++ b/audio/audio_template.h
@@ -110,7 +110,11 @@ static int glue (audio_pcm_sw_alloc_resources_, TYPE) (SW 
*sw)
 return 0;
 }
 
+#ifdef DAC
 samples = ((int64_t) sw->HWBUF->size << 32) / sw->ratio;
+#else
+samples = (int64_t)sw->HWBUF->size * sw->ratio >> 32;
+#endif
 
 sw->buf = audio_calloc(__func__, samples, sizeof(struct st_sample));
 if (!sw->buf) {
diff --git a/audio/audio.c b/audio/audio.c
index ed2b9d5f7e15..886725747bda 100644
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -995,7 +995,7 @@ void AUD_set_active_in (SWVoiceIn *sw, int on)
  */
 static size_t audio_frontend_frames_in(SWVoiceIn *sw, size_t frames_in)
 {
-return ((int64_t)frames_in << 32) / sw->ratio;
+return (int64_t)frames_in * sw->ratio >> 32;
 }
 
 static size_t audio_get_avail (SWVoiceIn *sw)
-- 
2.37.3

[PULL 22/26] ui/gtk: Fix the implicit mouse ungrabbing logic

From: Akihiko Odaki 

Although the grab menu item represents the tabbed displays, the old
implicit mouse ungrabbing logic changes the grab menu item even for
an untabbed display.

Leave the grab menu item when implicitly ungrabbing mouse for an
untabbed display. The new ungrabbing logic introduced in
gd_mouse_mode_change() strictly follows the corresponding grabbing
logic found in gd_button_event().

Signed-off-by: Akihiko Odaki 
Message-Id: <20221008140116.11473-1-akihiko.od...@daynix.com>
Signed-off-by: Gerd Hoffmann 
---
 ui/gtk.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/ui/gtk.c b/ui/gtk.c
index 1467b8c7d7f0..6fc2e2396393 100644
--- a/ui/gtk.c
+++ b/ui/gtk.c
@@ -681,9 +681,13 @@ static void gd_mouse_mode_change(Notifier *notify, void 
*data)
 
 s = container_of(notify, GtkDisplayState, mouse_mode_notifier);
 /* release the grab at switching to absolute mode */
-if (qemu_input_is_absolute() && gd_is_grab_active(s)) {
-gtk_check_menu_item_set_active(GTK_CHECK_MENU_ITEM(s->grab_item),
-   FALSE);
+if (qemu_input_is_absolute() && s->ptr_owner) {
+if (!s->ptr_owner->window) {
+gtk_check_menu_item_set_active(GTK_CHECK_MENU_ITEM(s->grab_item),
+   FALSE);
+} else {
+gd_ungrab_pointer(s);
+}
 }
 for (i = 0; i < s->nb_vcs; i++) {
 VirtualConsole *vc = &s->vc[i];
-- 
2.37.3

[PULL 03/26] audio: run downstream playback queue unconditionally

From: Volker Rümelin 

Run the downstream playback queue even if the emulated audio
device didn't write new samples. There still may be buffered
audio samples downstream.

This is for the -audiodev out.mixing-engine=off case. Commit
a8a98cfd42 ("audio: run downstream playback queue uncondition-
ally") fixed the out.mixing-engine=on case.

Signed-off-by: Volker Rümelin 
Acked-by: Marc-André Lureau 
Message-Id: <20220923183640.8314-3-vr_q...@t-online.de>
Signed-off-by: Gerd Hoffmann 
---
 audio/audio.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/audio/audio.c b/audio/audio.c
index 29da359b416b..567f953e66f9 100644
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -1143,6 +1143,10 @@ static void audio_run_out (AudioState *s)
 hw_free * sw->info.bytes_per_frame);
 }
 
+if (hw->pcm_ops->run_buffer_out) {
+hw->pcm_ops->run_buffer_out(hw);
+}
+
 continue;
 }
 
@@ -1501,10 +1505,6 @@ size_t audio_generic_write(HWVoiceOut *hw, void *buf, 
size_t size)
 }
 }
 
-if (hw->pcm_ops->run_buffer_out) {
-hw->pcm_ops->run_buffer_out(hw);
-}
-
 return total;
 }
 
-- 
2.37.3

[PULL 05/26] audio: add more audio rate control functions

From: Volker Rümelin 

The next patch needs two new rate control functions. The first
one returns the bytes needed at call time to maintain the
selected rate. The second one adjusts the bytes actually sent.

Split the audio_rate_get_bytes() function into these two
functions and reintroduce audio_rate_get_bytes().

Signed-off-by: Volker Rümelin 
Reviewed-by: Marc-André Lureau 
Message-Id: <20220923183640.8314-5-vr_q...@t-online.de>
Signed-off-by: Gerd Hoffmann 
---
 audio/audio_int.h |  2 ++
 audio/audio.c | 35 ---
 2 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/audio/audio_int.h b/audio/audio_int.h
index 2a6914d2aa65..97e20e842927 100644
--- a/audio/audio_int.h
+++ b/audio/audio_int.h
@@ -263,6 +263,8 @@ typedef struct RateCtl {
 } RateCtl;
 
 void audio_rate_start(RateCtl *rate);
+size_t audio_rate_peek_bytes(RateCtl *rate, struct audio_pcm_info *info);
+void audio_rate_add_bytes(RateCtl *rate, size_t bytes_used);
 size_t audio_rate_get_bytes(struct audio_pcm_info *info, RateCtl *rate,
 size_t bytes_avail);
 
diff --git a/audio/audio.c b/audio/audio.c
index 567f953e66f9..61cdd73df5aa 100644
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -2251,26 +2251,39 @@ void audio_rate_start(RateCtl *rate)
 rate->start_ticks = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
 }
 
-size_t audio_rate_get_bytes(struct audio_pcm_info *info, RateCtl *rate,
-size_t bytes_avail)
+size_t audio_rate_peek_bytes(RateCtl *rate, struct audio_pcm_info *info)
 {
 int64_t now;
 int64_t ticks;
 int64_t bytes;
-int64_t samples;
-size_t ret;
+int64_t frames;
 
 now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
 ticks = now - rate->start_ticks;
 bytes = muldiv64(ticks, info->bytes_per_second, NANOSECONDS_PER_SECOND);
-samples = (bytes - rate->bytes_sent) / info->bytes_per_frame;
-if (samples < 0 || samples > 65536) {
-AUD_log(NULL, "Resetting rate control (%" PRId64 " samples)\n", 
samples);
+frames = (bytes - rate->bytes_sent) / info->bytes_per_frame;
+if (frames < 0 || frames > 65536) {
+AUD_log(NULL, "Resetting rate control (%" PRId64 " frames)\n", frames);
 audio_rate_start(rate);
-samples = 0;
+frames = 0;
 }
 
-ret = MIN(samples * info->bytes_per_frame, bytes_avail);
-rate->bytes_sent += ret;
-return ret;
+return frames * info->bytes_per_frame;
+}
+
+void audio_rate_add_bytes(RateCtl *rate, size_t bytes_used)
+{
+rate->bytes_sent += bytes_used;
+}
+
+size_t audio_rate_get_bytes(struct audio_pcm_info *info, RateCtl *rate,
+size_t bytes_avail)
+{
+size_t bytes;
+
+bytes = audio_rate_peek_bytes(rate, info);
+bytes = MIN(bytes, bytes_avail);
+audio_rate_add_bytes(rate, bytes);
+
+return bytes;
 }
-- 
2.37.3

[PULL 00/26] Kraxel 20221013 patches

The following changes since commit f1d33f55c47dfdaf8daacd618588ad3ae4c452d1:

  Merge tag 'pull-testing-gdbstub-plugins-gitdm-061022-3' of 
https://github.com/stsquad/qemu into staging (2022-10-06 07:11:56 -0400)

are available in the Git repository at:

  https://gitlab.com/kraxel/qemu.git tags/kraxel-20221013-pull-request

for you to fetch changes up to 61ddafbcfac4975ee245cd3453be86b0632a5605:

  audio: improve out.voices test (2022-10-12 20:36:17 +0200)


pci: cleanup virtio ids.
audio: bugfixes and latency improvements.
misc fixes for hw/display and ui



Akihiko Odaki (1):
  ui/gtk: Fix the implicit mouse ungrabbing logic

Bryce Mills (1):
  gtk: Add show_menubar=on|off command line option.

Dongwon Kim (1):
  ui/gtk-egl: egl context needs to be unbound in the end of
gd_egl_switch

Gerd Hoffmann (6):
  docs: add firmware feature flags
  pci-ids: drop PCI_DEVICE_ID_VIRTIO_IOMMU
  pci-ids: drop PCI_DEVICE_ID_VIRTIO_MEM
  pci-ids: drop PCI_DEVICE_ID_VIRTIO_PMEM
  pci-ids: drop list of modern virtio devices
  pci-ids: document modern virtio-pci ids in pci.h too

Helge Konetzka (2):
  audio: fix in.voices test
  audio: improve out.voices test

Mauro Matteo Cascella (1):
  ui/vnc-clipboard: fix integer underflow in vnc_client_cut_text_ext

Sebastian Mitterle (1):
  qemu-edid: Restrict input parameter -d to avoid division by zero

Volker Rümelin (12):
  audio: refactor code in audio_run_out()
  audio: fix GUS audio playback with out.mixing-engine=off
  audio: run downstream playback queue unconditionally
  alsaaudio: reduce playback latency
  audio: add more audio rate control functions
  spiceaudio: add a pcm_ops buffer_get_free function
  spiceaudio: update comment
  audio: swap audio_rate_get_bytes() function parameters
  audio: rename audio_sw_bytes_free()
  audio: refactor audio_get_avail()
  audio: fix sw->buf size for audio recording
  audio: prevent an integer overflow in resampling code

lu zhipeng (1):
  cirrus_vga: fix potential memory overflow

 docs/specs/pci-ids.txt   |  16 +++--
 audio/audio_int.h|   4 +-
 audio/audio_template.h   |   4 ++
 audio/rate_template.h|  11 ++--
 include/hw/pci/pci.h |  13 +++-
 audio/alsaaudio.c|  38 +++-
 audio/audio.c| 111 +++
 audio/dbusaudio.c|   4 +-
 audio/noaudio.c  |   4 +-
 audio/spiceaudio.c   |  19 --
 audio/wavaudio.c |   2 +-
 hw/display/cirrus_vga.c  |   2 +-
 hw/virtio/virtio-iommu-pci.c |   4 +-
 hw/virtio/virtio-mem-pci.c   |   2 -
 hw/virtio/virtio-pci.c   |   2 +-
 hw/virtio/virtio-pmem-pci.c  |   2 -
 qemu-edid.c  |   4 ++
 ui/gtk-egl.c |   3 +
 ui/gtk.c |  25 +---
 ui/vnc.c |  11 +++-
 docs/interop/firmware.json   |  21 +--
 qapi/ui.json |   5 +-
 qemu-options.hx  |   3 +
 23 files changed, 218 insertions(+), 92 deletions(-)

-- 
2.37.3

[PULL 19/26] pci-ids: drop PCI_DEVICE_ID_VIRTIO_PMEM

Not needed for a virtio 1.0 device.  virtio_pci_device_plugged()
overrides them anyway (so no functional change).

Signed-off-by: Gerd Hoffmann 
Reviewed-by: David Hildenbrand 
Reviewed-by: Pankaj Gupta 
Tested-by: Pankaj Gupta 
Message-Id: <20221004112100.301935-4-kra...@redhat.com>
---
 include/hw/pci/pci.h| 1 -
 hw/virtio/virtio-pmem-pci.c | 2 --
 2 files changed, 3 deletions(-)

diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index b6aefb33fb17..42c83cb5ed00 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -84,7 +84,6 @@ extern bool pci_available;
 #define PCI_DEVICE_ID_VIRTIO_RNG 0x1005
 #define PCI_DEVICE_ID_VIRTIO_9P  0x1009
 #define PCI_DEVICE_ID_VIRTIO_VSOCK   0x1012
-#define PCI_DEVICE_ID_VIRTIO_PMEM0x1013
 
 #define PCI_VENDOR_ID_REDHAT 0x1b36
 #define PCI_DEVICE_ID_REDHAT_BRIDGE  0x0001
diff --git a/hw/virtio/virtio-pmem-pci.c b/hw/virtio/virtio-pmem-pci.c
index 2b2a0b1eae10..7d9f4ec189b9 100644
--- a/hw/virtio/virtio-pmem-pci.c
+++ b/hw/virtio/virtio-pmem-pci.c
@@ -90,8 +90,6 @@ static void virtio_pmem_pci_class_init(ObjectClass *klass, 
void *data)
 
 k->realize = virtio_pmem_pci_realize;
 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
-pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
-pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_PMEM;
 pcidev_k->revision = VIRTIO_PCI_ABI_VERSION;
 pcidev_k->class_id = PCI_CLASS_OTHERS;
 
-- 
2.37.3

[PULL 04/26] alsaaudio: reduce playback latency

From: Volker Rümelin 

Change the buffer_get_free pcm_ops function to report the free
ALSA playback buffer. The generic buffer becomes a temporary
buffer and is empty after a call to audio_run_out().

Signed-off-by: Volker Rümelin 
Acked-by: Marc-André Lureau 
Message-Id: <20220923183640.8314-4-vr_q...@t-online.de>
Signed-off-by: Gerd Hoffmann 
---
 audio/alsaaudio.c | 38 +-
 1 file changed, 37 insertions(+), 1 deletion(-)

diff --git a/audio/alsaaudio.c b/audio/alsaaudio.c
index 4a61378cd757..7a2a94cd427d 100644
--- a/audio/alsaaudio.c
+++ b/audio/alsaaudio.c
@@ -602,6 +602,42 @@ static int alsa_open(bool in, struct alsa_params_req *req,
 return -1;
 }
 
+static size_t alsa_buffer_get_free(HWVoiceOut *hw)
+{
+ALSAVoiceOut *alsa = (ALSAVoiceOut *)hw;
+snd_pcm_sframes_t avail;
+size_t alsa_free, generic_free, generic_in_use;
+
+avail = snd_pcm_avail_update(alsa->handle);
+if (avail < 0) {
+if (avail == -EPIPE) {
+if (!alsa_recover(alsa->handle)) {
+avail = snd_pcm_avail_update(alsa->handle);
+}
+}
+if (avail < 0) {
+alsa_logerr(avail,
+"Could not obtain number of available frames\n");
+avail = 0;
+}
+}
+
+alsa_free = avail * hw->info.bytes_per_frame;
+generic_free = audio_generic_buffer_get_free(hw);
+generic_in_use = hw->samples * hw->info.bytes_per_frame - generic_free;
+if (generic_in_use) {
+/*
+ * This code can only be reached in the unlikely case that
+ * snd_pcm_avail_update() returned a larger number of frames
+ * than snd_pcm_writei() could write. Make sure that all
+ * remaining bytes in the generic buffer can be written.
+ */
+alsa_free = alsa_free > generic_in_use ? alsa_free - generic_in_use : 
0;
+}
+
+return alsa_free;
+}
+
 static size_t alsa_write(HWVoiceOut *hw, void *buf, size_t len)
 {
 ALSAVoiceOut *alsa = (ALSAVoiceOut *) hw;
@@ -916,7 +952,7 @@ static struct audio_pcm_ops alsa_pcm_ops = {
 .init_out = alsa_init_out,
 .fini_out = alsa_fini_out,
 .write= alsa_write,
-.buffer_get_free = audio_generic_buffer_get_free,
+.buffer_get_free = alsa_buffer_get_free,
 .run_buffer_out = audio_generic_run_buffer_out,
 .enable_out = alsa_enable_out,
 
-- 
2.37.3

[PULL 16/26] docs: add firmware feature flags

Add new firmware feature flags for the recently added confidential
computing operating modes by amd and intel.

While being at it also fix the path to the amd sev documentation.

Signed-off-by: Gerd Hoffmann 
Reviewed-by: Kashyap Chamarthy 
Message-Id: <20220930133220.1771336-1-kra...@redhat.com>
---
 docs/interop/firmware.json | 21 -
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/docs/interop/firmware.json b/docs/interop/firmware.json
index 4e049b1c7ca0..56814f02b3c0 100644
--- a/docs/interop/firmware.json
+++ b/docs/interop/firmware.json
@@ -113,13 +113,22 @@
 #   Virtualization, as specified in the AMD64 Architecture
 #   Programmer's Manual. QEMU command line options related to
 #   this feature are documented in
-#   "docs/amd-memory-encryption.txt".
+#   "docs/system/i386/amd-memory-encryption.rst".
 #
 # @amd-sev-es: The firmware supports running under AMD Secure Encrypted
 #  Virtualization - Encrypted State, as specified in the AMD64
 #  Architecture Programmer's Manual. QEMU command line options
 #  related to this feature are documented in
-#  "docs/amd-memory-encryption.txt".
+#  "docs/system/i386/amd-memory-encryption.rst".
+#
+# @amd-sev-snp: The firmware supports running under AMD Secure Encrypted
+#   Virtualization - Secure Nested Paging, as specified in the
+#   AMD64 Architecture Programmer's Manual. QEMU command line
+#   options related to this feature are documented in
+#   "docs/system/i386/amd-memory-encryption.rst".
+#
+# @intel-tdx: The firmware supports running under Intel Trust Domain
+# Extensions (TDX).
 #
 # @enrolled-keys: The variable store (NVRAM) template associated with
 # the firmware binary has the UEFI Secure Boot
@@ -185,9 +194,11 @@
 # Since: 3.0
 ##
 { 'enum' : 'FirmwareFeature',
-  'data' : [ 'acpi-s3', 'acpi-s4', 'amd-sev', 'amd-sev-es', 'enrolled-keys',
- 'requires-smm', 'secure-boot', 'verbose-dynamic',
- 'verbose-static' ] }
+  'data' : [ 'acpi-s3', 'acpi-s4',
+ 'amd-sev', 'amd-sev-es', 'amd-sev-snp',
+ 'intel-tdx',
+ 'enrolled-keys', 'requires-smm', 'secure-boot',
+ 'verbose-dynamic', 'verbose-static' ] }
 
 ##
 # @FirmwareFlashFile:
-- 
2.37.3

[PULL 08/26] audio: swap audio_rate_get_bytes() function parameters

From: Volker Rümelin 

Swap the rate and info parameters of the audio_rate_get_bytes()
function to align the parameter order with the rest of the
audio_rate_*() functions.

Signed-off-by: Volker Rümelin 
Reviewed-by: Marc-André Lureau 
Message-Id: <20220923183640.8314-8-vr_q...@t-online.de>
Signed-off-by: Gerd Hoffmann 
---
 audio/audio_int.h  | 2 +-
 audio/audio.c  | 2 +-
 audio/dbusaudio.c  | 4 ++--
 audio/noaudio.c| 4 ++--
 audio/spiceaudio.c | 2 +-
 audio/wavaudio.c   | 2 +-
 6 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/audio/audio_int.h b/audio/audio_int.h
index 97e20e842927..e87ce014a04b 100644
--- a/audio/audio_int.h
+++ b/audio/audio_int.h
@@ -265,7 +265,7 @@ typedef struct RateCtl {
 void audio_rate_start(RateCtl *rate);
 size_t audio_rate_peek_bytes(RateCtl *rate, struct audio_pcm_info *info);
 void audio_rate_add_bytes(RateCtl *rate, size_t bytes_used);
-size_t audio_rate_get_bytes(struct audio_pcm_info *info, RateCtl *rate,
+size_t audio_rate_get_bytes(RateCtl *rate, struct audio_pcm_info *info,
 size_t bytes_avail);
 
 static inline size_t audio_ring_dist(size_t dst, size_t src, size_t len)
diff --git a/audio/audio.c b/audio/audio.c
index 61cdd73df5aa..7213f8bf07ca 100644
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -2276,7 +2276,7 @@ void audio_rate_add_bytes(RateCtl *rate, size_t 
bytes_used)
 rate->bytes_sent += bytes_used;
 }
 
-size_t audio_rate_get_bytes(struct audio_pcm_info *info, RateCtl *rate,
+size_t audio_rate_get_bytes(RateCtl *rate, struct audio_pcm_info *info,
 size_t bytes_avail)
 {
 size_t bytes;
diff --git a/audio/dbusaudio.c b/audio/dbusaudio.c
index a3d656d3b017..722df0355e1e 100644
--- a/audio/dbusaudio.c
+++ b/audio/dbusaudio.c
@@ -82,7 +82,7 @@ static void *dbus_get_buffer_out(HWVoiceOut *hw, size_t *size)
 }
 
 *size = MIN(vo->buf_size - vo->buf_pos, *size);
-*size = audio_rate_get_bytes(&hw->info, &vo->rate, *size);
+*size = audio_rate_get_bytes(&vo->rate, &hw->info, *size);
 
 return vo->buf + vo->buf_pos;
 
@@ -343,7 +343,7 @@ dbus_read(HWVoiceIn *hw, void *buf, size_t size)
 
 trace_dbus_audio_read(size);
 
-/* size = audio_rate_get_bytes(&hw->info, &vo->rate, size); */
+/* size = audio_rate_get_bytes(&vo->rate, &hw->info, size); */
 
 g_hash_table_iter_init(&iter, da->in_listeners);
 while (g_hash_table_iter_next(&iter, NULL, (void **)&listener)) {
diff --git a/audio/noaudio.c b/audio/noaudio.c
index 84a6bfbb1c87..4fdee5adecff 100644
--- a/audio/noaudio.c
+++ b/audio/noaudio.c
@@ -44,7 +44,7 @@ typedef struct NoVoiceIn {
 static size_t no_write(HWVoiceOut *hw, void *buf, size_t len)
 {
 NoVoiceOut *no = (NoVoiceOut *) hw;
-return audio_rate_get_bytes(&hw->info, &no->rate, len);
+return audio_rate_get_bytes(&no->rate, &hw->info, len);
 }
 
 static int no_init_out(HWVoiceOut *hw, struct audsettings *as, void 
*drv_opaque)
@@ -89,7 +89,7 @@ static void no_fini_in (HWVoiceIn *hw)
 static size_t no_read(HWVoiceIn *hw, void *buf, size_t size)
 {
 NoVoiceIn *no = (NoVoiceIn *) hw;
-int64_t bytes = audio_rate_get_bytes(&hw->info, &no->rate, size);
+int64_t bytes = audio_rate_get_bytes(&no->rate, &hw->info, size);
 
 audio_pcm_info_clear_buf(&hw->info, buf, bytes / hw->info.bytes_per_frame);
 return bytes;
diff --git a/audio/spiceaudio.c b/audio/spiceaudio.c
index f52f3a8bbb66..d17ef1a25efb 100644
--- a/audio/spiceaudio.c
+++ b/audio/spiceaudio.c
@@ -239,7 +239,7 @@ static void line_in_fini (HWVoiceIn *hw)
 static size_t line_in_read(HWVoiceIn *hw, void *buf, size_t len)
 {
 SpiceVoiceIn *in = container_of (hw, SpiceVoiceIn, hw);
-uint64_t to_read = audio_rate_get_bytes(&hw->info, &in->rate, len) >> 2;
+uint64_t to_read = audio_rate_get_bytes(&in->rate, &hw->info, len) >> 2;
 size_t ready = spice_server_record_get_samples(&in->sin, buf, to_read);
 
 /*
diff --git a/audio/wavaudio.c b/audio/wavaudio.c
index ac666335c783..3e1d84db83ee 100644
--- a/audio/wavaudio.c
+++ b/audio/wavaudio.c
@@ -42,7 +42,7 @@ typedef struct WAVVoiceOut {
 static size_t wav_write_out(HWVoiceOut *hw, void *buf, size_t len)
 {
 WAVVoiceOut *wav = (WAVVoiceOut *) hw;
-int64_t bytes = audio_rate_get_bytes(&hw->info, &wav->rate, len);
+int64_t bytes = audio_rate_get_bytes(&wav->rate, &hw->info, len);
 assert(bytes % hw->info.bytes_per_frame == 0);
 
 if (bytes && fwrite(buf, bytes, 1, wav->f) != 1) {
-- 
2.37.3

[PULL 07/26] spiceaudio: update comment

From: Volker Rümelin 

Replace a comment with a question with the answer.

Signed-off-by: Volker Rümelin 
Reviewed-by: Marc-André Lureau 
Message-Id: <20220923183640.8314-7-vr_q...@t-online.de>
Signed-off-by: Gerd Hoffmann 
---
 audio/spiceaudio.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/audio/spiceaudio.c b/audio/spiceaudio.c
index 22892a7b9d42..f52f3a8bbb66 100644
--- a/audio/spiceaudio.c
+++ b/audio/spiceaudio.c
@@ -242,7 +242,10 @@ static size_t line_in_read(HWVoiceIn *hw, void *buf, 
size_t len)
 uint64_t to_read = audio_rate_get_bytes(&hw->info, &in->rate, len) >> 2;
 size_t ready = spice_server_record_get_samples(&in->sin, buf, to_read);
 
-/* XXX: do we need this? */
+/*
+ * If the client didn't send new frames, it most likely disconnected.
+ * Generate silence in this case to avoid a stalled audio stream.
+ */
 if (ready == 0) {
 memset(buf, 0, to_read << 2);
 ready = to_read;
-- 
2.37.3

[PULL 14/26] ui/gtk-egl: egl context needs to be unbound in the end of gd_egl_switch

From: Dongwon Kim 

A thread often fails to bind an egl context to itself after guest VM is
rebooted because the context is still owned by another thread. It is not
very clear what condition makes this happen but this can be prevented
by unbinding the context from the thread in the end of gd_egl_switch.

Cc: Gerd Hoffmann 
Signed-off-by: Dongwon Kim 
Message-Id: <20220928215805.4661-1-dongwon@intel.com>
Signed-off-by: Gerd Hoffmann 
---
 ui/gtk-egl.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/ui/gtk-egl.c b/ui/gtk-egl.c
index b5bffbab2522..35f917ceb15e 100644
--- a/ui/gtk-egl.c
+++ b/ui/gtk-egl.c
@@ -195,6 +195,9 @@ void gd_egl_switch(DisplayChangeListener *dcl,
 if (resized) {
 gd_update_windowsize(vc);
 }
+
+eglMakeCurrent(qemu_egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE,
+   EGL_NO_CONTEXT);
 }

 QEMUGLContext gd_egl_create_context(DisplayGLCtx *dgc,
-- 
2.37.3

[PULL 24/26] gtk: Add show_menubar=on|off command line option.

From: Bryce Mills 

The patch adds "show_menubar" command line option for GTK UI similar to
"show_tabs". This option allows to hide menu bar initially, it still can
be toggled by shortcut and other shortcuts still work.

Signed-off-by: Bryce Mills 
Acked-by: Markus Armbruster 
Message-Id: 

Signed-off-by: Gerd Hoffmann 
---
 ui/gtk.c| 15 ++-
 qapi/ui.json|  5 -
 qemu-options.hx |  3 +++
 3 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/ui/gtk.c b/ui/gtk.c
index 6fc2e2396393..92daaa6a6edb 100644
--- a/ui/gtk.c
+++ b/ui/gtk.c
@@ -2171,7 +2171,7 @@ static GSList *gd_vc_gfx_init(GtkDisplayState *s, 
VirtualConsole *vc,
 return group;
 }
 
-static GtkWidget *gd_create_menu_view(GtkDisplayState *s)
+static GtkWidget *gd_create_menu_view(GtkDisplayState *s, DisplayOptions *opts)
 {
 GSList *group = NULL;
 GtkWidget *view_menu;
@@ -2269,7 +2269,8 @@ static GtkWidget *gd_create_menu_view(GtkDisplayState *s)
 s->show_menubar_item = gtk_check_menu_item_new_with_mnemonic(
 _("Show Menubar"));
 gtk_check_menu_item_set_active(GTK_CHECK_MENU_ITEM(s->show_menubar_item),
-   TRUE);
+   !opts->u.gtk.has_show_menubar ||
+   opts->u.gtk.show_menubar);
 gtk_accel_group_connect(s->accel_group, GDK_KEY_m, HOTKEY_MODIFIERS, 0,
 g_cclosure_new_swap(G_CALLBACK(gd_accel_show_menubar), s, NULL));
 gtk_accel_label_set_accel(
@@ -2280,13 +2281,13 @@ static GtkWidget *gd_create_menu_view(GtkDisplayState 
*s)
 return view_menu;
 }
 
-static void gd_create_menus(GtkDisplayState *s)
+static void gd_create_menus(GtkDisplayState *s, DisplayOptions *opts)
 {
 GtkSettings *settings;
 
 s->accel_group = gtk_accel_group_new();
 s->machine_menu = gd_create_menu_machine(s);
-s->view_menu = gd_create_menu_view(s);
+s->view_menu = gd_create_menu_view(s, opts);
 
 s->machine_menu_item = gtk_menu_item_new_with_mnemonic(_("_Machine"));
 gtk_menu_item_set_submenu(GTK_MENU_ITEM(s->machine_menu_item),
@@ -2363,7 +2364,7 @@ static void gtk_display_init(DisplayState *ds, 
DisplayOptions *opts)
 
 gtk_window_set_icon_name(GTK_WINDOW(s->window), "qemu");
 
-gd_create_menus(s);
+gd_create_menus(s, opts);
 
 gd_connect_signals(s);
 
@@ -2378,6 +2379,10 @@ static void gtk_display_init(DisplayState *ds, 
DisplayOptions *opts)
 gtk_container_add(GTK_CONTAINER(s->window), s->vbox);
 
 gtk_widget_show_all(s->window);
+if (opts->u.gtk.has_show_menubar &&
+!opts->u.gtk.show_menubar) {
+gtk_widget_hide(s->menu_bar);
+}
 
 vc = gd_vc_find_current(s);
 gtk_widget_set_sensitive(s->view_menu, vc != NULL);
diff --git a/qapi/ui.json b/qapi/ui.json
index 286c5731d1c3..0abba3e930a4 100644
--- a/qapi/ui.json
+++ b/qapi/ui.json
@@ -1199,13 +1199,16 @@
 #   interfaces (e.g. VGA and virtual console character devices)
 #   by default.
 #   Since 7.1
+# @show-menubar: Display the main window menubar. Defaults to "on".
+#Since 8.0
 #
 # Since: 2.12
 ##
 { 'struct'  : 'DisplayGTK',
   'data': { '*grab-on-hover' : 'bool',
 '*zoom-to-fit'   : 'bool',
-'*show-tabs' : 'bool'  } }
+'*show-tabs' : 'bool',
+'*show-menubar'  : 'bool'  } }
 
 ##
 # @DisplayEGLHeadless:
diff --git a/qemu-options.hx b/qemu-options.hx
index 95b998a13b01..bb0979bef908 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1969,6 +1969,7 @@ DEF("display", HAS_ARG, QEMU_OPTION_display,
 #if defined(CONFIG_GTK)
 "-display gtk[,full-screen=on|off][,gl=on|off][,grab-on-hover=on|off]\n"
 "
[,show-tabs=on|off][,show-cursor=on|off][,window-close=on|off]\n"
+"[,show-menubar=on|off]\n"
 #endif
 #if defined(CONFIG_VNC)
 "-display vnc=[,]\n"
@@ -2061,6 +2062,8 @@ SRST
 
 ``window-close=on|off`` : Allow to quit qemu with window close button
 
+``show-menubar=on|off`` : Display the main window menubar, defaults to 
"on"
+
 ``curses[,charset=]``
 Display video output via curses. For graphics device models
 which support a text mode, QEMU can display this output using a
-- 
2.37.3

[PULL 10/26] audio: refactor audio_get_avail()

From: Volker Rümelin 

Split out the code in audio_get_avail() that calculates the
buffer size that the audio frontend can read. This is similar
to the code changes in audio_get_free().

Signed-off-by: Volker Rümelin 
Reviewed-by: Marc-André Lureau 
Message-Id: <20220923183640.8314-10-vr_q...@t-online.de>
Signed-off-by: Gerd Hoffmann 
---
 audio/audio.c | 24 +++-
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/audio/audio.c b/audio/audio.c
index 28262ffd58a5..ed2b9d5f7e15 100644
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -986,6 +986,18 @@ void AUD_set_active_in (SWVoiceIn *sw, int on)
 }
 }
 
+/**
+ * audio_frontend_frames_in() - returns the number of frames the resampling
+ * code generates from frames_in frames
+ *
+ * @sw: audio recording frontend
+ * @frames_in: number of frames
+ */
+static size_t audio_frontend_frames_in(SWVoiceIn *sw, size_t frames_in)
+{
+return ((int64_t)frames_in << 32) / sw->ratio;
+}
+
 static size_t audio_get_avail (SWVoiceIn *sw)
 {
 size_t live;
@@ -1002,12 +1014,12 @@ static size_t audio_get_avail (SWVoiceIn *sw)
 }
 
 ldebug (
-"%s: get_avail live %zu ret %" PRId64 "\n",
+"%s: get_avail live %zu frontend frames %zu\n",
 SW_NAME (sw),
-live, (((int64_t) live << 32) / sw->ratio) * sw->info.bytes_per_frame
+live, audio_frontend_frames_in(sw, live)
 );
 
-return (((int64_t) live << 32) / sw->ratio) * sw->info.bytes_per_frame;
+return live;
 }
 
 /**
@@ -1309,11 +1321,13 @@ static void audio_run_in (AudioState *s)
 sw->total_hw_samples_acquired -= min;
 
 if (sw->active) {
+size_t sw_avail = audio_get_avail(sw);
 size_t avail;
 
-avail = audio_get_avail (sw);
+avail = audio_frontend_frames_in(sw, sw_avail);
 if (avail > 0) {
-sw->callback.fn (sw->callback.opaque, avail);
+sw->callback.fn(sw->callback.opaque,
+avail * sw->info.bytes_per_frame);
 }
 }
 }
-- 
2.37.3

[PULL 09/26] audio: rename audio_sw_bytes_free()

From: Volker Rümelin 

Rename and refactor audio_sw_bytes_free(). This function is not
limited to calculate the free audio buffer size. The renamed
function returns the number of frames instead of bytes.

Signed-off-by: Volker Rümelin 
Reviewed-by: Marc-André Lureau 
Message-Id: <20220923183640.8314-9-vr_q...@t-online.de>
Signed-off-by: Gerd Hoffmann 
---
 audio/audio.c | 20 ++--
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/audio/audio.c b/audio/audio.c
index 7213f8bf07ca..28262ffd58a5 100644
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -1010,9 +1010,16 @@ static size_t audio_get_avail (SWVoiceIn *sw)
 return (((int64_t) live << 32) / sw->ratio) * sw->info.bytes_per_frame;
 }
 
-static size_t audio_sw_bytes_free(SWVoiceOut *sw, size_t free)
+/**
+ * audio_frontend_frames_out() - returns the number of frames needed to
+ * get frames_out frames after resampling
+ *
+ * @sw: audio playback frontend
+ * @frames_out: number of frames
+ */
+static size_t audio_frontend_frames_out(SWVoiceOut *sw, size_t frames_out)
 {
-return (((int64_t)free << 32) / sw->ratio) * sw->info.bytes_per_frame;
+return ((int64_t)frames_out << 32) / sw->ratio;
 }
 
 static size_t audio_get_free(SWVoiceOut *sw)
@@ -1034,8 +1041,8 @@ static size_t audio_get_free(SWVoiceOut *sw)
 dead = sw->hw->mix_buf->size - live;
 
 #ifdef DEBUG_OUT
-dolog("%s: get_free live %zu dead %zu sw_bytes %zu\n",
-  SW_NAME(sw), live, dead, audio_sw_bytes_free(sw, dead));
+dolog("%s: get_free live %zu dead %zu frontend frames %zu\n",
+  SW_NAME(sw), live, dead, audio_frontend_frames_out(sw, dead));
 #endif
 
 return dead;
@@ -1156,13 +1163,14 @@ static void audio_run_out (AudioState *s)
 size_t free;
 
 if (hw_free > sw->total_hw_samples_mixed) {
-free = audio_sw_bytes_free(sw,
+free = audio_frontend_frames_out(sw,
 MIN(sw_free, hw_free - sw->total_hw_samples_mixed));
 } else {
 free = 0;
 }
 if (free > 0) {
-sw->callback.fn(sw->callback.opaque, free);
+sw->callback.fn(sw->callback.opaque,
+free * sw->info.bytes_per_frame);
 }
 }
 }
-- 
2.37.3

[PULL 26/26] audio: improve out.voices test

From: Helge Konetzka 

Improve readability of audio out.voices test:
If 1 is logged and set after positive test, 1 should be tested.

Signed-off-by: Helge Konetzka 
Reviewed-by: Marc-André Lureau 
Message-Id: <20221012114925.5084-3...@zapateado.de>
Signed-off-by: Gerd Hoffmann 
---
 audio/audio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/audio/audio.c b/audio/audio.c
index 1ecdbc4191f2..cc664271ebb5 100644
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -1772,7 +1772,7 @@ static AudioState *audio_init(Audiodev *dev, const char 
*name)
 s->nb_hw_voices_out = audio_get_pdo_out(dev)->voices;
 s->nb_hw_voices_in = audio_get_pdo_in(dev)->voices;
 
-if (s->nb_hw_voices_out <= 0) {
+if (s->nb_hw_voices_out < 1) {
 dolog ("Bogus number of playback voices %d, setting to 1\n",
s->nb_hw_voices_out);
 s->nb_hw_voices_out = 1;
-- 
2.37.3

Re: [PATCH v3 2/3] block: introduce zone append write for zoned devices

2022-10-13 Thread Sam Li

Damien Le Moal  于2022年10月13日周四 13:55写道：
>
> On 10/10/22 11:33, Sam Li wrote:
> > A zone append command is a write operation that specifies the first
> > logical block of a zone as the write position. When writing to a zoned
> > block device using zone append, the byte offset of writes is pointing
> > to the write pointer of that zone. Upon completion the device will
> > respond with the position the data has been written in the zone.
> >
> > Signed-off-by: Sam Li 
> > ---
> >  block/block-backend.c | 64 +++
> >  block/file-posix.c| 64 ---
> >  block/io.c| 21 ++
> >  block/raw-format.c|  7 
> >  include/block/block-io.h  |  3 ++
> >  include/block/block_int-common.h  |  3 ++
> >  include/block/raw-aio.h   |  4 +-
> >  include/sysemu/block-backend-io.h |  9 +
> >  8 files changed, 168 insertions(+), 7 deletions(-)
> >
> > diff --git a/block/block-backend.c b/block/block-backend.c
> > index ddc569e3ac..bfdb719bc8 100644
> > --- a/block/block-backend.c
> > +++ b/block/block-backend.c
> > @@ -1439,6 +1439,9 @@ typedef struct BlkRwCo {
> >  struct {
> >  BlockZoneOp op;
> >  } zone_mgmt;
> > +struct {
> > +int64_t *append_sector;
>
> I would call this "sector", since it will always be referenced as
> "->zone_append.sector", you get the "append" for free :)
>
> That said, shouldn't this be a byte value, so called "offset" ? Not
> entirely sure...

Yes, it can be changed to "offset"(byte) following QEMU's convention.
Just need to add conversions to virtio_blk_zone_append/*_complete,
which is easily done.

>
> > +} zone_append;
> >  };
> >  } BlkRwCo;
> >
> > @@ -1869,6 +1872,46 @@ BlockAIOCB *blk_aio_zone_mgmt(BlockBackend *blk, 
> > BlockZoneOp op,
> >  return &acb->common;
> >  }
> >
> > +static void coroutine_fn blk_aio_zone_append_entry(void *opaque) {
> > +BlkAioEmAIOCB *acb = opaque;
> > +BlkRwCo *rwco = &acb->rwco;
> > +
> > +rwco->ret = blk_co_zone_append(rwco->blk, 
> > rwco->zone_append.append_sector,
> > +   rwco->iobuf, rwco->flags);
> > +blk_aio_complete(acb);
> > +}
> > +
> > +BlockAIOCB *blk_aio_zone_append(BlockBackend *blk, int64_t *offset,
> > +QEMUIOVector *qiov, BdrvRequestFlags flags,
> > +BlockCompletionFunc *cb, void *opaque) {
> > +BlkAioEmAIOCB *acb;
> > +Coroutine *co;
> > +IO_CODE();
> > +
> > +blk_inc_in_flight(blk);
> > +acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
> > +acb->rwco = (BlkRwCo) {
> > +.blk= blk,
> > +.ret= NOT_DONE,
> > +.flags  = flags,
> > +.iobuf  = qiov,
> > +.zone_append = {
> > +.append_sector = offset,
>
> See above comment. So since this is a byte value, this needs to be
> called "offset", no ?

Yes, same answers above.

>
> > +},
> > +};
> > +acb->has_returned = false;
> > +
> > +co = qemu_coroutine_create(blk_aio_zone_append_entry, acb);
> > +bdrv_coroutine_enter(blk_bs(blk), co);
> > +acb->has_returned = true;
> > +if (acb->rwco.ret != NOT_DONE) {
> > +replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
> > + blk_aio_complete_bh, acb);
> > +}
> > +
> > +return &acb->common;
> > +}
> > +
> >  /*
> >   * Send a zone_report command.
> >   * offset is a byte offset from the start of the device. No alignment
> > @@ -1921,6 +1964,27 @@ int coroutine_fn blk_co_zone_mgmt(BlockBackend *blk, 
> > BlockZoneOp op,
> >  return ret;
> >  }
> >
> > +/*
> > + * Send a zone_append command.
> > + */
> > +int coroutine_fn blk_co_zone_append(BlockBackend *blk, int64_t *offset,
> > +QEMUIOVector *qiov, BdrvRequestFlags flags)
> > +{
> > +int ret;
> > +IO_CODE();
> > +
> > +blk_inc_in_flight(blk);
> > +blk_wait_while_drained(blk);
> > +if (!blk_is_available(blk)) {
> > +blk_dec_in_flight(blk);
> > +return -ENOMEDIUM;
> > +}
> > +
> > +ret = bdrv_co_zone_append(blk_bs(blk), offset, qiov, flags);
> > +blk_dec_in_flight(blk);
> > +return ret;
> > +}
> > +
> >  void blk_drain(BlockBackend *blk)
> >  {
> >  BlockDriverState *bs = blk_bs(blk);
> > diff --git a/block/file-posix.c b/block/file-posix.c
> > index 17c0b58158..08ab164df4 100755
> > --- a/block/file-posix.c
> > +++ b/block/file-posix.c
> > @@ -1657,7 +1657,7 @@ static ssize_t handle_aiocb_rw_vector(RawPosixAIOData 
> > *aiocb)
> >  ssize_t len;
> >
> >  do {
> > -if (aiocb->aio_type & QEMU_AIO_WRITE)
> > +if (aiocb->aio_type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND))
> >  len = qemu_pwritev(aiocb->aio_fildes,
> > aiocb->io.iov,
> >

[PULL 17/26] pci-ids: drop PCI_DEVICE_ID_VIRTIO_IOMMU

Not needed for a virtio 1.0 device.  virtio_pci_device_plugged()
overrides them anyway (so no functional change).

Signed-off-by: Gerd Hoffmann 
Reviewed-by: Eric Auger 
Tested-by: Eric Auger 
Message-Id: <20221004112100.301935-2-kra...@redhat.com>
---
 include/hw/pci/pci.h | 1 -
 hw/virtio/virtio-iommu-pci.c | 4 +---
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index b54b6ef88fc3..89eaca429389 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -85,7 +85,6 @@ extern bool pci_available;
 #define PCI_DEVICE_ID_VIRTIO_9P  0x1009
 #define PCI_DEVICE_ID_VIRTIO_VSOCK   0x1012
 #define PCI_DEVICE_ID_VIRTIO_PMEM0x1013
-#define PCI_DEVICE_ID_VIRTIO_IOMMU   0x1014
 #define PCI_DEVICE_ID_VIRTIO_MEM 0x1015
 
 #define PCI_VENDOR_ID_REDHAT 0x1b36
diff --git a/hw/virtio/virtio-iommu-pci.c b/hw/virtio/virtio-iommu-pci.c
index 844d64770406..79ea8334f04e 100644
--- a/hw/virtio/virtio-iommu-pci.c
+++ b/hw/virtio/virtio-iommu-pci.c
@@ -74,8 +74,6 @@ static void virtio_iommu_pci_class_init(ObjectClass *klass, 
void *data)
 k->realize = virtio_iommu_pci_realize;
 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
 device_class_set_props(dc, virtio_iommu_pci_properties);
-pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
-pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_IOMMU;
 pcidev_k->revision = VIRTIO_PCI_ABI_VERSION;
 pcidev_k->class_id = PCI_CLASS_OTHERS;
 dc->hotpluggable = false;
@@ -90,7 +88,7 @@ static void virtio_iommu_pci_instance_init(Object *obj)
 }
 
 static const VirtioPCIDeviceTypeInfo virtio_iommu_pci_info = {
-.generic_name  = TYPE_VIRTIO_IOMMU_PCI,
+.generic_name  = TYPE_VIRTIO_IOMMU_PCI,
 .instance_size = sizeof(VirtIOIOMMUPCI),
 .instance_init = virtio_iommu_pci_instance_init,
 .class_init= virtio_iommu_pci_class_init,
-- 
2.37.3

[PULL 12/26] audio: prevent an integer overflow in resampling code

From: Volker Rümelin 

There are corner cases where rate->opos can overflow. For
example, if QEMU is started with -audiodev pa,id=audio0,
out.frequency=11025 -device ich9-intel-hda -device hda-duplex,
audiodev=audio0 and the guest plays audio with a sampling
frequency of 44100Hz, rate->opos will overflow after 27.05h
and the audio stream will be silent for a long time.

To prevent a rate->opos and also a rate->ipos overflow, both
are wrapped around after a short time. The wrap around point
rate->ipos >= 0x10001 is an arbitrarily selected value and can
be any small value, 0 and 1 included.

The comment that an ipos overflow will result in an infinite
loop has been removed, because in this case the resampling code
only generates no more output samples and the audio stream stalls.
However, there is no infinite loop.

Signed-off-by: Volker Rümelin 
Message-Id: <20220923183640.8314-12-vr_q...@t-online.de>
Signed-off-by: Gerd Hoffmann 
---
 audio/rate_template.h | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/audio/rate_template.h b/audio/rate_template.h
index f94c940c61b1..b432719ebbaa 100644
--- a/audio/rate_template.h
+++ b/audio/rate_template.h
@@ -72,11 +72,6 @@ void NAME (void *opaque, struct st_sample *ibuf, struct 
st_sample *obuf,
 ilast = *ibuf++;
 rate->ipos++;
 
-/* if ipos overflow, there is  a infinite loop */
-if (rate->ipos == 0x) {
-rate->ipos = 1;
-rate->opos = rate->opos & 0x;
-}
 /* See if we finished the input buffer yet */
 if (ibuf >= iend) {
 goto the_end;
@@ -85,6 +80,12 @@ void NAME (void *opaque, struct st_sample *ibuf, struct 
st_sample *obuf,
 
 icur = *ibuf;
 
+/* wrap ipos and opos around long before they overflow */
+if (rate->ipos >= 0x10001) {
+rate->ipos = 1;
+rate->opos &= 0x;
+}
+
 /* interpolate */
 #ifdef FLOAT_MIXENG
 #ifdef RECIPROCAL
-- 
2.37.3

[PULL 25/26] audio: fix in.voices test

From: Helge Konetzka 

Calling qemu with valid -audiodev ...,in.voices=0 results in an obsolete
warning:
  audio: Bogus number of capture voices 0, setting to 0
This patch fixes the in.voices test.

Signed-off-by: Helge Konetzka 
Reviewed-by: Marc-André Lureau 
Message-Id: <20221012114925.5084-2...@zapateado.de>
Signed-off-by: Gerd Hoffmann 
---
 audio/audio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/audio/audio.c b/audio/audio.c
index 886725747bda..1ecdbc4191f2 100644
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -1778,7 +1778,7 @@ static AudioState *audio_init(Audiodev *dev, const char 
*name)
 s->nb_hw_voices_out = 1;
 }
 
-if (s->nb_hw_voices_in <= 0) {
+if (s->nb_hw_voices_in < 0) {
 dolog ("Bogus number of capture voices %d, setting to 0\n",
s->nb_hw_voices_in);
 s->nb_hw_voices_in = 0;
-- 
2.37.3

[PULL 21/26] pci-ids: document modern virtio-pci ids in pci.h too

While being at it add a #define for the magic 0x1040 number.

Signed-off-by: Gerd Hoffmann 
Reviewed-by: Eric Auger 
Message-Id: <20221004112100.301935-6-kra...@redhat.com>
---
 include/hw/pci/pci.h   | 10 ++
 hw/virtio/virtio-pci.c |  2 +-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 42c83cb5ed00..d1ac308574f1 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -76,6 +76,7 @@ extern bool pci_available;
 #define PCI_SUBVENDOR_ID_REDHAT_QUMRANET 0x1af4
 #define PCI_SUBDEVICE_ID_QEMU0x1100
 
+/* legacy virtio-pci devices */
 #define PCI_DEVICE_ID_VIRTIO_NET 0x1000
 #define PCI_DEVICE_ID_VIRTIO_BLOCK   0x1001
 #define PCI_DEVICE_ID_VIRTIO_BALLOON 0x1002
@@ -85,6 +86,15 @@ extern bool pci_available;
 #define PCI_DEVICE_ID_VIRTIO_9P  0x1009
 #define PCI_DEVICE_ID_VIRTIO_VSOCK   0x1012
 
+/*
+ * modern virtio-pci devices get their id assigned automatically,
+ * there is no need to add #defines here.  It gets calculated as
+ *
+ * PCI_DEVICE_ID = PCI_DEVICE_ID_VIRTIO_10_BASE +
+ * virtio_bus_get_vdev_id(bus)
+ */
+#define PCI_DEVICE_ID_VIRTIO_10_BASE 0x1040
+
 #define PCI_VENDOR_ID_REDHAT 0x1b36
 #define PCI_DEVICE_ID_REDHAT_BRIDGE  0x0001
 #define PCI_DEVICE_ID_REDHAT_SERIAL  0x0002
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index a50c5a57d7e5..e7d80242b73f 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1688,7 +1688,7 @@ static void virtio_pci_device_plugged(DeviceState *d, 
Error **errp)
 pci_set_word(config + PCI_VENDOR_ID,
  PCI_VENDOR_ID_REDHAT_QUMRANET);
 pci_set_word(config + PCI_DEVICE_ID,
- 0x1040 + virtio_bus_get_vdev_id(bus));
+ PCI_DEVICE_ID_VIRTIO_10_BASE + 
virtio_bus_get_vdev_id(bus));
 pci_config_set_revision(config, 1);
 }
 config[PCI_INTERRUPT_PIN] = 1;
-- 
2.37.3

[PULL 20/26] pci-ids: drop list of modern virtio devices

Drop the list of modern virtio devices and explain how they
are calculated instead.

Signed-off-by: Gerd Hoffmann 
Reviewed-by: Eric Auger 
Message-Id: <20221004112100.301935-5-kra...@redhat.com>
---
 docs/specs/pci-ids.txt | 16 +++-
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/docs/specs/pci-ids.txt b/docs/specs/pci-ids.txt
index dd6859d039d0..e463c4cb3a22 100644
--- a/docs/specs/pci-ids.txt
+++ b/docs/specs/pci-ids.txt
@@ -22,16 +22,14 @@ maintained as part of the virtio specification.
 1af4:1004  SCSI host bus adapter device (legacy)
 1af4:1005  entropy generator device (legacy)
 1af4:1009  9p filesystem device (legacy)
+1af4:1012  vsock device (bug compatibility)
 
-1af4:1041  network device (modern)
-1af4:1042  block device (modern)
-1af4:1043  console device (modern)
-1af4:1044  entropy generator device (modern)
-1af4:1045  balloon device (modern)
-1af4:1048  SCSI host bus adapter device (modern)
-1af4:1049  9p filesystem device (modern)
-1af4:1050  virtio gpu device (modern)
-1af4:1052  virtio input device (modern)
+1af4:1040  Start of ID range for modern virtio devices.  The PCI device
+   to  ID is calculated from the virtio device ID by adding the
+1af4:10ef  0x1040 offset.  The virtio IDs are defined in the virtio
+   specification.  The Linux kernel has a header file with
+   defines for all virtio IDs (linux/virtio_ids.h), qemu has a
+   copy in include/standard-headers/.
 
 1af4:10f0  Available for experimental usage without registration.  Must get
to  official ID when the code leaves the test lab (i.e. when seeking
-- 
2.37.3

[PULL 23/26] qemu-edid: Restrict input parameter -d to avoid division by zero

From: Sebastian Mitterle 

A zero value for dpi will lead to a division by zero in qemu_edid_dpi_to_mm().
Tested by runnig qemu-edid -dX, X = 0, 100.

Resolves: qemu-project/qemu#1249

Suggested-by: Thomas Huth 
Signed-off-by: Sebastian Mitterle 
Message-Id: <20221011151216.64897-1-smitt...@redhat.com>
Signed-off-by: Gerd Hoffmann 
---
 qemu-edid.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/qemu-edid.c b/qemu-edid.c
index 20c958d9c7eb..92e1a660a76b 100644
--- a/qemu-edid.c
+++ b/qemu-edid.c
@@ -92,6 +92,10 @@ int main(int argc, char *argv[])
 fprintf(stderr, "not a number: %s\n", optarg);
 exit(1);
 }
+if (dpi == 0) {
+fprintf(stderr, "cannot be zero: %s\n", optarg);
+exit(1);
+}
 break;
 case 'v':
 info.vendor = optarg;
-- 
2.37.3

[PULL 13/26] ui/vnc-clipboard: fix integer underflow in vnc_client_cut_text_ext

From: Mauro Matteo Cascella 

Extended ClientCutText messages start with a 4-byte header. If len < 4,
an integer underflow occurs in vnc_client_cut_text_ext. The result is
used to decompress data in a while loop in inflate_buffer, leading to
CPU consumption and denial of service. Prevent this by checking dlen in
protocol_client_msg.

Fixes: CVE-2022-3165
Fixes: 0bf41cab93e5 ("ui/vnc: clipboard support")
Reported-by: TangPeng 
Signed-off-by: Mauro Matteo Cascella 
Message-Id: <20220925204511.1103214-1-mcasc...@redhat.com>
Signed-off-by: Gerd Hoffmann 
---
 ui/vnc.c | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/ui/vnc.c b/ui/vnc.c
index 6a05d061479e..acb3629cd8e2 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -2442,8 +2442,8 @@ static int protocol_client_msg(VncState *vs, uint8_t 
*data, size_t len)
 if (len == 1) {
 return 8;
 }
+uint32_t dlen = abs(read_s32(data, 4));
 if (len == 8) {
-uint32_t dlen = abs(read_s32(data, 4));
 if (dlen > (1 << 20)) {
 error_report("vnc: client_cut_text msg payload has %u bytes"
  " which exceeds our limit of 1MB.", dlen);
@@ -2456,8 +2456,13 @@ static int protocol_client_msg(VncState *vs, uint8_t 
*data, size_t len)
 }
 
 if (read_s32(data, 4) < 0) {
-vnc_client_cut_text_ext(vs, abs(read_s32(data, 4)),
-read_u32(data, 8), data + 12);
+if (dlen < 4) {
+error_report("vnc: malformed payload (header less than 4 
bytes)"
+ " in extended clipboard pseudo-encoding.");
+vnc_client_error(vs);
+break;
+}
+vnc_client_cut_text_ext(vs, dlen, read_u32(data, 8), data + 12);
 break;
 }
 vnc_client_cut_text(vs, read_u32(data, 4), data + 8);
-- 
2.37.3

Re: [PATCH v3 1/3] file-posix:add the tracking of the zones write pointers

2022-10-13 Thread Sam Li

Damien Le Moal  于2022年10月13日周四 15:30写道：
>
> On 2022/10/13 16:08, Sam Li wrote:
> > Damien Le Moal  于2022年10月13日周四 13:13写道：
> >>
> >> On 10/10/22 11:33, Sam Li wrote:
> >>> Since Linux doesn't have a user API to issue zone append operations to
> >>> zoned devices from user space, the file-posix driver is modified to add
> >>> zone append emulation using regular writes. To do this, the file-posix
> >>> driver tracks the wp location of all zones of the device. It uses an
> >>> array of uint64_t. The most significant bit of each wp location indicates
> >>> if the zone type is conventional zones.
> >>>
> >>> The zones wp can be changed due to the following operations issued:
> >>> - zone reset: change the wp to the start offset of that zone
> >>> - zone finish: change to the end location of that zone
> >>> - write to a zone
> >>> - zone append
> >>>
> >>> Signed-off-by: Sam Li 
> >>> ---
> >>>  block/file-posix.c   | 158 +++
> >>>  include/block/block-common.h |  14 +++
> >>>  include/block/block_int-common.h |   5 +
> >>>  3 files changed, 177 insertions(+)
> >>>
> >>> diff --git a/block/file-posix.c b/block/file-posix.c
> >>> index a9d347292e..17c0b58158 100755
> >>> --- a/block/file-posix.c
> >>> +++ b/block/file-posix.c
> >>> @@ -206,6 +206,7 @@ typedef struct RawPosixAIOData {
> >>>  struct {
> >>>  struct iovec *iov;
> >>>  int niov;
> >>> +int64_t *append_sector;
> >>
> >> This should be added as part of patch 2. You do not need this to track
> >> the wp of zones in this patch.
> >>
> >>>  } io;
> >>>  struct {
> >>>  uint64_t cmd;
> >>> @@ -226,6 +227,7 @@ typedef struct RawPosixAIOData {
> >>>  struct {
> >>>  unsigned long zone_op;
> >>>  const char *zone_op_name;
> >>> +bool all;
> >>>  } zone_mgmt;
> >>>  };
> >>>  } RawPosixAIOData;
> >>> @@ -1331,6 +1333,67 @@ static int hdev_get_max_segments(int fd, struct 
> >>> stat *st) {
> >>>  #endif
> >>>  }
> >>>
> >>> +#if defined(CONFIG_BLKZONED)
> >>> +static int get_zones_wp(int64_t offset, int fd, BlockZoneWps *wps,
> >>
> >> Nit: It would seem more natural to have the fd argument first...
> >>
> >>> +unsigned int nrz) {
> >>> +struct blk_zone *blkz;
> >>> +int64_t rep_size;
> >>> +int64_t sector = offset >> BDRV_SECTOR_BITS;
> >>> +int ret, n = 0, i = 0;
> >>> +rep_size = sizeof(struct blk_zone_report) + nrz * sizeof(struct 
> >>> blk_zone);
> >>> +g_autofree struct blk_zone_report *rep = NULL;
> >>> +
> >>> +rep = g_malloc(rep_size);
> >>> +blkz = (struct blk_zone *)(rep + 1);
> >>> +while (n < nrz) {
> >>> +memset(rep, 0, rep_size);
> >>> +rep->sector = sector;
> >>> +rep->nr_zones = nrz - n;
> >>> +
> >>> +do {
> >>> +ret = ioctl(fd, BLKREPORTZONE, rep);
> >>> +} while (ret != 0 && errno == EINTR);
> >>> +if (ret != 0) {
> >>> +error_report("%d: ioctl BLKREPORTZONE at %" PRId64 " failed 
> >>> %d",
> >>> +fd, offset, errno);
> >>> +return -errno;
> >>> +}
> >>> +
> >>> +if (!rep->nr_zones) {
> >>> +break;
> >>> +}
> >>> +
> >>> +for (i = 0; i < rep->nr_zones; i++, n++) {
> >>> +/*
> >>> + * The wp tracking cares only about sequential writes 
> >>> required and
> >>> + * sequential write preferred zones so that the wp can 
> >>> advance to
> >>> + * the right location.
> >>> + * Use the most significant bit of the wp location to 
> >>> indicate the
> >>> + * zone type: 0 for SWR/SWP zones and 1 for conventional 
> >>> zones.
> >>> + */
> >>> +if (!(blkz[i].type != BLK_ZONE_TYPE_CONVENTIONAL)) {
> >>
> >> Double negation... This can simply be:
> >>
> >> if (blkz[i].type == BLK_ZONE_TYPE_CONVENTIONAL) {
> >>
> >>> +wps->wp[i] += 1ULL << 63;
> >>
> >> No need for the += here. This can be "=".
> >>
> >>> +} else {
> >>> +wps->wp[i] = blkz[i].wp << BDRV_SECTOR_BITS;
> >>> +}
> >>> +}
> >>> +sector = blkz[i-1].start + blkz[i-1].len;
> >>
> >> spaces missing around the "-" in the "i-1" expressions.
> >>
> >>> +}
> >>> +
> >>> +return 0;
> >>> +}
> >>> +
> >>> +static void update_zones_wp(int64_t offset, int fd, BlockZoneWps *wps,
> >>
> >> Same nit as above: fd being the first argument would be a little more
> >> natural in my opinion.
> >>
> >>> +unsigned int nrz) {
> >>> +qemu_mutex_lock(&wps->lock);
> >>> +if (get_zones_wp(offset, fd, wps, nrz) < 0) {
> >>> +error_report("report zone wp failed");
> >>> +return;
> >>
> >> You are leacking the lock here. Remove the return. Also, given that
> >> get_zones_wp() already prints a message if report fails, I do no

Re: [PATCH v3 1/3] file-posix:add the tracking of the zones write pointers

2022-10-13 Thread Damien Le Moal

On 2022/10/13 16:08, Sam Li wrote:
> Damien Le Moal  于2022年10月13日周四 13:13写道：
>>
>> On 10/10/22 11:33, Sam Li wrote:
>>> Since Linux doesn't have a user API to issue zone append operations to
>>> zoned devices from user space, the file-posix driver is modified to add
>>> zone append emulation using regular writes. To do this, the file-posix
>>> driver tracks the wp location of all zones of the device. It uses an
>>> array of uint64_t. The most significant bit of each wp location indicates
>>> if the zone type is conventional zones.
>>>
>>> The zones wp can be changed due to the following operations issued:
>>> - zone reset: change the wp to the start offset of that zone
>>> - zone finish: change to the end location of that zone
>>> - write to a zone
>>> - zone append
>>>
>>> Signed-off-by: Sam Li 
>>> ---
>>>  block/file-posix.c   | 158 +++
>>>  include/block/block-common.h |  14 +++
>>>  include/block/block_int-common.h |   5 +
>>>  3 files changed, 177 insertions(+)
>>>
>>> diff --git a/block/file-posix.c b/block/file-posix.c
>>> index a9d347292e..17c0b58158 100755
>>> --- a/block/file-posix.c
>>> +++ b/block/file-posix.c
>>> @@ -206,6 +206,7 @@ typedef struct RawPosixAIOData {
>>>  struct {
>>>  struct iovec *iov;
>>>  int niov;
>>> +int64_t *append_sector;
>>
>> This should be added as part of patch 2. You do not need this to track
>> the wp of zones in this patch.
>>
>>>  } io;
>>>  struct {
>>>  uint64_t cmd;
>>> @@ -226,6 +227,7 @@ typedef struct RawPosixAIOData {
>>>  struct {
>>>  unsigned long zone_op;
>>>  const char *zone_op_name;
>>> +bool all;
>>>  } zone_mgmt;
>>>  };
>>>  } RawPosixAIOData;
>>> @@ -1331,6 +1333,67 @@ static int hdev_get_max_segments(int fd, struct stat 
>>> *st) {
>>>  #endif
>>>  }
>>>
>>> +#if defined(CONFIG_BLKZONED)
>>> +static int get_zones_wp(int64_t offset, int fd, BlockZoneWps *wps,
>>
>> Nit: It would seem more natural to have the fd argument first...
>>
>>> +unsigned int nrz) {
>>> +struct blk_zone *blkz;
>>> +int64_t rep_size;
>>> +int64_t sector = offset >> BDRV_SECTOR_BITS;
>>> +int ret, n = 0, i = 0;
>>> +rep_size = sizeof(struct blk_zone_report) + nrz * sizeof(struct 
>>> blk_zone);
>>> +g_autofree struct blk_zone_report *rep = NULL;
>>> +
>>> +rep = g_malloc(rep_size);
>>> +blkz = (struct blk_zone *)(rep + 1);
>>> +while (n < nrz) {
>>> +memset(rep, 0, rep_size);
>>> +rep->sector = sector;
>>> +rep->nr_zones = nrz - n;
>>> +
>>> +do {
>>> +ret = ioctl(fd, BLKREPORTZONE, rep);
>>> +} while (ret != 0 && errno == EINTR);
>>> +if (ret != 0) {
>>> +error_report("%d: ioctl BLKREPORTZONE at %" PRId64 " failed 
>>> %d",
>>> +fd, offset, errno);
>>> +return -errno;
>>> +}
>>> +
>>> +if (!rep->nr_zones) {
>>> +break;
>>> +}
>>> +
>>> +for (i = 0; i < rep->nr_zones; i++, n++) {
>>> +/*
>>> + * The wp tracking cares only about sequential writes required 
>>> and
>>> + * sequential write preferred zones so that the wp can advance 
>>> to
>>> + * the right location.
>>> + * Use the most significant bit of the wp location to indicate 
>>> the
>>> + * zone type: 0 for SWR/SWP zones and 1 for conventional zones.
>>> + */
>>> +if (!(blkz[i].type != BLK_ZONE_TYPE_CONVENTIONAL)) {
>>
>> Double negation... This can simply be:
>>
>> if (blkz[i].type == BLK_ZONE_TYPE_CONVENTIONAL) {
>>
>>> +wps->wp[i] += 1ULL << 63;
>>
>> No need for the += here. This can be "=".
>>
>>> +} else {
>>> +wps->wp[i] = blkz[i].wp << BDRV_SECTOR_BITS;
>>> +}
>>> +}
>>> +sector = blkz[i-1].start + blkz[i-1].len;
>>
>> spaces missing around the "-" in the "i-1" expressions.
>>
>>> +}
>>> +
>>> +return 0;
>>> +}
>>> +
>>> +static void update_zones_wp(int64_t offset, int fd, BlockZoneWps *wps,
>>
>> Same nit as above: fd being the first argument would be a little more
>> natural in my opinion.
>>
>>> +unsigned int nrz) {
>>> +qemu_mutex_lock(&wps->lock);
>>> +if (get_zones_wp(offset, fd, wps, nrz) < 0) {
>>> +error_report("report zone wp failed");
>>> +return;
>>
>> You are leacking the lock here. Remove the return. Also, given that
>> get_zones_wp() already prints a message if report fails, I do not think
>> the message here is useful.
>>
>> Also, why is this function void typed ? How can the caller know if the
>> update succeeded or not ?
> 
> Update failures mean get_zones_wp() fails and that will be reported by
> error_report. The error message indicates updates fail not reports
> fail. Maybe modifying the m

[PULL 18/26] pci-ids: drop PCI_DEVICE_ID_VIRTIO_MEM

Not needed for a virtio 1.0 device.  virtio_pci_device_plugged()
overrides them anyway (so no functional change).

Signed-off-by: Gerd Hoffmann 
Reviewed-by: David Hildenbrand 
Message-Id: <20221004112100.301935-3-kra...@redhat.com>
---
 include/hw/pci/pci.h   | 1 -
 hw/virtio/virtio-mem-pci.c | 2 --
 2 files changed, 3 deletions(-)

diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 89eaca429389..b6aefb33fb17 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -85,7 +85,6 @@ extern bool pci_available;
 #define PCI_DEVICE_ID_VIRTIO_9P  0x1009
 #define PCI_DEVICE_ID_VIRTIO_VSOCK   0x1012
 #define PCI_DEVICE_ID_VIRTIO_PMEM0x1013
-#define PCI_DEVICE_ID_VIRTIO_MEM 0x1015
 
 #define PCI_VENDOR_ID_REDHAT 0x1b36
 #define PCI_DEVICE_ID_REDHAT_BRIDGE  0x0001
diff --git a/hw/virtio/virtio-mem-pci.c b/hw/virtio/virtio-mem-pci.c
index be2383b0c522..5c5c1e3ae3da 100644
--- a/hw/virtio/virtio-mem-pci.c
+++ b/hw/virtio/virtio-mem-pci.c
@@ -104,8 +104,6 @@ static void virtio_mem_pci_class_init(ObjectClass *klass, 
void *data)
 
 k->realize = virtio_mem_pci_realize;
 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
-pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
-pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_MEM;
 pcidev_k->revision = VIRTIO_PCI_ABI_VERSION;
 pcidev_k->class_id = PCI_CLASS_OTHERS;
 
-- 
2.37.3

[RISU PATCH v2 5/5] loongarch: Add block 'safefloat' and nanbox_s()

Some LoongArch instructions don't care the high 32bit,
so use nanbox_s() set the high 32bit 0x.

Reviewed-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 loongarch64.risu   | 119 +++--
 risugen|   2 +-
 risugen_loongarch64.pm |  23 
 3 files changed, 103 insertions(+), 41 deletions(-)

diff --git a/loongarch64.risu b/loongarch64.risu
index d059811..d625a12 100644
--- a/loongarch64.risu
+++ b/loongarch64.risu
@@ -62,7 +62,7 @@ mulw_d_wu LA64  0001 1 rk:5 rj:5 rd:5 \
 !constraints { $rk != 2 && $rj != 2 && $rd != 2; }
 
 #div.{w[u]/d[u]} rd,rj,rk
-# the docement 2.2.13,  rk, rj, need in 32bit [0x0 ~0x7FFF]
+# div.w{u}, mod.w[u]  rk, rj, need in [0x0 ~0x7FFF]
 # use function set_reg_w($reg)
 div_w LA64  0010 0 rk:5 rj:5 rd:5 \
 !constraints { $rk != 2 && $rj != 2 && $rd != 2; } \
@@ -436,47 +436,68 @@ crcc_w_d_w LA64  0010 0 rk:5 rj:5 rd:5 \
 #
 # Floating point arithmetic operation instruction
 #
-fadd_s LA64  0001 1 fk:5 fj:5 fd:5
+fadd_s LA64  0001 1 fk:5 fj:5 fd:5 \
+!safefloat { nanbox_s($fd); }
 fadd_d LA64  0001 00010 fk:5 fj:5 fd:5
-fsub_s LA64  0001 00101 fk:5 fj:5 fd:5
+fsub_s LA64  0001 00101 fk:5 fj:5 fd:5 \
+!safefloat { nanbox_s($fd); }
 fsub_d LA64  0001 00110 fk:5 fj:5 fd:5
-fmul_s LA64  0001 01001 fk:5 fj:5 fd:5
+fmul_s LA64  0001 01001 fk:5 fj:5 fd:5 \
+!safefloat { nanbox_s($fd); }
 fmul_d LA64  0001 01010 fk:5 fj:5 fd:5
-fdiv_s LA64  0001 01101 fk:5 fj:5 fd:5
+fdiv_s LA64  0001 01101 fk:5 fj:5 fd:5 \
+!safefloat { nanbox_s($fd); }
 fdiv_d LA64  0001 01110 fk:5 fj:5 fd:5
-fmadd_s LA64  1001 fa:5 fk:5 fj:5 fd:5
+fmadd_s LA64  1001 fa:5 fk:5 fj:5 fd:5 \
+!safefloat { nanbox_s($fd); }
 fmadd_d LA64  1010 fa:5 fk:5 fj:5 fd:5
-fmsub_s LA64  1101 fa:5 fk:5 fj:5 fd:5
+fmsub_s LA64  1101 fa:5 fk:5 fj:5 fd:5 \
+!safefloat { nanbox_s($fd); }
 fmsub_d LA64  1110 fa:5 fk:5 fj:5 fd:5
-fnmadd_s LA64  10001001 fa:5 fk:5 fj:5 fd:5
+fnmadd_s LA64  10001001 fa:5 fk:5 fj:5 fd:5 \
+!safefloat { nanbox_s($fd); }
 fnmadd_d LA64  10001010 fa:5 fk:5 fj:5 fd:5
-fnmsub_s LA64  10001101 fa:5 fk:5 fj:5 fd:5
+fnmsub_s LA64  10001101 fa:5 fk:5 fj:5 fd:5 \
+!safefloat { nanbox_s($fd); }
 fnmsub_d LA64  10001110 fa:5 fk:5 fj:5 fd:5
-fmax_s LA64  0001 10001 fk:5 fj:5 fd:5
+fmax_s LA64  0001 10001 fk:5 fj:5 fd:5 \
+!safefloat { nanbox_s($fd); }
 fmax_d LA64  0001 10010 fk:5 fj:5 fd:5
-fmin_s LA64  0001 10101 fk:5 fj:5 fd:5
+fmin_s LA64  0001 10101 fk:5 fj:5 fd:5 \
+!safefloat { nanbox_s($fd); }
 fmin_d LA64  0001 10110 fk:5 fj:5 fd:5
-fmaxa_s LA64  0001 11001 fk:5 fj:5 fd:5
+fmaxa_s LA64  0001 11001 fk:5 fj:5 fd:5 \
+!safefloat { nanbox_s($fd); }
 fmaxa_d LA64  0001 11010 fk:5 fj:5 fd:5
-fmina_s LA64  0001 11101 fk:5 fj:5 fd:5
+fmina_s LA64  0001 11101 fk:5 fj:5 fd:5 \
+!safefloat { nanbox_s($fd); }
 fmina_d LA64  0001 0 fk:5 fj:5 fd:5
-fabs_s LA64  00010001 01000 1 fj:5 fd:5
+fabs_s LA64  00010001 01000 1 fj:5 fd:5 \
+!safefloat { nanbox_s($fd); }
 fabs_d LA64  00010001 01000 00010 fj:5 fd:5
-fneg_s LA64  00010001 01000 00101 fj:5 fd:5
+fneg_s LA64  00010001 01000 00101 fj:5 fd:5 \
+!safefloat { nanbox_s($fd); }
 fneg_d LA64  00010001 01000 00110 fj:5 fd:5
-fsqrt_s LA64  00010001 01000 10001 fj:5 fd:5
+fsqrt_s LA64  00010001 01000 10001 fj:5 fd:5 \
+!safefloat { nanbox_s($fd); }
 fsqrt_d LA64  00010001 01000 10010 fj:5 fd:5
-frecip_s LA64  00010001 01000 10101 fj:5 fd:5
+frecip_s LA64  00010001 01000 10101 fj:5 fd:5 \
+!safefloat { nanbox_s($fd); }
 frecip_d LA64  00010001 01000 10110 fj:5 fd:5
-frsqrt_s LA64  00010001 01000 11001 fj:5 fd:5
+frsqrt_s LA64  00010001 01000 11001 fj:5 fd:5 \
+!safefloat { nanbox_s($fd); }
 frsqrt_d LA64  00010001 01000 11010 fj:5 fd:5
-fscaleb_s LA64  00010001 1 fk:5 fj:5 fd:5
+fscaleb_s LA64  00010001 1 fk:5 fj:5 fd:5 \
+!safefloat { nanbox_s($fd); }
 fscaleb_d LA64  00010001 00010 fk:5 fj:5 fd:5
-flogb_s LA64  00010001 01000 01001 fj:5 fd:5
+flogb_s LA64  00010001 01000 01001 fj:5 fd:5 \
+!safefloat { nanbox_s($fd); }
 flogb_d LA64  00010001 01000 01010 fj:5 fd:5
-fcopysign_s LA64  00010001 00101 fk:5 fj:5 fd:5
+fcopysign_s LA64  00010001 00101 fk:5 fj:5 fd:5 \
+!safefloat { nanbox_s($fd); }
 fcopysign_d LA64  00010001 00110 fk:5 fj:5 fd:5
-fclass_s LA64  00010001 01000 01101 fj:5 fd:5
+fclass_s LA64  00010001 01000 01101 fj:5 fd:5 \
+!safefloat { nanbox_s($fd); }
 fclass_d LA64  00010001 01000 01110 fj:5 fd:5
 
 #
@@ -490,43 +511,59 @@ fcmp_cond_d LA64  1110 cond:5 fk:5 fj:5 00 cd

Re: [PATCH v3 1/3] file-posix:add the tracking of the zones write pointers

2022-10-13 Thread Sam Li

Damien Le Moal  于2022年10月13日周四 13:13写道：
>
> On 10/10/22 11:33, Sam Li wrote:
> > Since Linux doesn't have a user API to issue zone append operations to
> > zoned devices from user space, the file-posix driver is modified to add
> > zone append emulation using regular writes. To do this, the file-posix
> > driver tracks the wp location of all zones of the device. It uses an
> > array of uint64_t. The most significant bit of each wp location indicates
> > if the zone type is conventional zones.
> >
> > The zones wp can be changed due to the following operations issued:
> > - zone reset: change the wp to the start offset of that zone
> > - zone finish: change to the end location of that zone
> > - write to a zone
> > - zone append
> >
> > Signed-off-by: Sam Li 
> > ---
> >  block/file-posix.c   | 158 +++
> >  include/block/block-common.h |  14 +++
> >  include/block/block_int-common.h |   5 +
> >  3 files changed, 177 insertions(+)
> >
> > diff --git a/block/file-posix.c b/block/file-posix.c
> > index a9d347292e..17c0b58158 100755
> > --- a/block/file-posix.c
> > +++ b/block/file-posix.c
> > @@ -206,6 +206,7 @@ typedef struct RawPosixAIOData {
> >  struct {
> >  struct iovec *iov;
> >  int niov;
> > +int64_t *append_sector;
>
> This should be added as part of patch 2. You do not need this to track
> the wp of zones in this patch.
>
> >  } io;
> >  struct {
> >  uint64_t cmd;
> > @@ -226,6 +227,7 @@ typedef struct RawPosixAIOData {
> >  struct {
> >  unsigned long zone_op;
> >  const char *zone_op_name;
> > +bool all;
> >  } zone_mgmt;
> >  };
> >  } RawPosixAIOData;
> > @@ -1331,6 +1333,67 @@ static int hdev_get_max_segments(int fd, struct stat 
> > *st) {
> >  #endif
> >  }
> >
> > +#if defined(CONFIG_BLKZONED)
> > +static int get_zones_wp(int64_t offset, int fd, BlockZoneWps *wps,
>
> Nit: It would seem more natural to have the fd argument first...
>
> > +unsigned int nrz) {
> > +struct blk_zone *blkz;
> > +int64_t rep_size;
> > +int64_t sector = offset >> BDRV_SECTOR_BITS;
> > +int ret, n = 0, i = 0;
> > +rep_size = sizeof(struct blk_zone_report) + nrz * sizeof(struct 
> > blk_zone);
> > +g_autofree struct blk_zone_report *rep = NULL;
> > +
> > +rep = g_malloc(rep_size);
> > +blkz = (struct blk_zone *)(rep + 1);
> > +while (n < nrz) {
> > +memset(rep, 0, rep_size);
> > +rep->sector = sector;
> > +rep->nr_zones = nrz - n;
> > +
> > +do {
> > +ret = ioctl(fd, BLKREPORTZONE, rep);
> > +} while (ret != 0 && errno == EINTR);
> > +if (ret != 0) {
> > +error_report("%d: ioctl BLKREPORTZONE at %" PRId64 " failed 
> > %d",
> > +fd, offset, errno);
> > +return -errno;
> > +}
> > +
> > +if (!rep->nr_zones) {
> > +break;
> > +}
> > +
> > +for (i = 0; i < rep->nr_zones; i++, n++) {
> > +/*
> > + * The wp tracking cares only about sequential writes required 
> > and
> > + * sequential write preferred zones so that the wp can advance 
> > to
> > + * the right location.
> > + * Use the most significant bit of the wp location to indicate 
> > the
> > + * zone type: 0 for SWR/SWP zones and 1 for conventional zones.
> > + */
> > +if (!(blkz[i].type != BLK_ZONE_TYPE_CONVENTIONAL)) {
>
> Double negation... This can simply be:
>
> if (blkz[i].type == BLK_ZONE_TYPE_CONVENTIONAL) {
>
> > +wps->wp[i] += 1ULL << 63;
>
> No need for the += here. This can be "=".
>
> > +} else {
> > +wps->wp[i] = blkz[i].wp << BDRV_SECTOR_BITS;
> > +}
> > +}
> > +sector = blkz[i-1].start + blkz[i-1].len;
>
> spaces missing around the "-" in the "i-1" expressions.
>
> > +}
> > +
> > +return 0;
> > +}
> > +
> > +static void update_zones_wp(int64_t offset, int fd, BlockZoneWps *wps,
>
> Same nit as above: fd being the first argument would be a little more
> natural in my opinion.
>
> > +unsigned int nrz) {
> > +qemu_mutex_lock(&wps->lock);
> > +if (get_zones_wp(offset, fd, wps, nrz) < 0) {
> > +error_report("report zone wp failed");
> > +return;
>
> You are leacking the lock here. Remove the return. Also, given that
> get_zones_wp() already prints a message if report fails, I do not think
> the message here is useful.
>
> Also, why is this function void typed ? How can the caller know if the
> update succeeded or not ?

Update failures mean get_zones_wp() fails and that will be reported by
error_report. The error message indicates updates fail not reports
fail. Maybe modifying the message suffices error checking?

+qemu_mutex_lock(&wps->lock);
+if (

[PATCH] configure: Avoid using strings binary

2022-10-13 Thread Michal Privoznik

When determining the endiandness of the target architecture we're
building for a small program is compiled, which in an obfuscated
way declares two strings. Then, we look which string is in
correct order (using strings binary) and deduct the endiandness.
But using the strings binary is problematic, because it's part of
toolchain (strings is just a symlink to
x86_64-pc-linux-gnu-strings or llvm-strings). And when
(cross-)compiling, it requires users to set the symlink to the
correct toolchain.

Fortunately, we have a better alternative anyways. Since we
require either clang or gcc we can rely on macros they declare.

Bug: https://bugs.gentoo.org/876933
Signed-off-by: Michal Privoznik 
---
 configure | 33 ++---
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/configure b/configure
index 45ee6f4eb3..91e04635cb 100755
--- a/configure
+++ b/configure
@@ -1426,27 +1426,30 @@ fi
 # ---
 # big/little endian test
 cat > $TMPC << EOF
-#include 
-short big_endian[] = { 0x4269, 0x4765, 0x4e64, 0x4961, 0x4e00, 0, };
-short little_endian[] = { 0x694c, 0x7454, 0x654c, 0x6e45, 0x6944, 0x6e41, 0, };
-int main(int argc, char *argv[])
-{
-return printf("%s %s\n", (char *)big_endian, (char *)little_endian);
-}
+#if defined(__BYTE_ORDER) && __BYTE_ORDER == __BIG_ENDIAN || \
+defined(__BIG_ENDIAN__)
+# error BIG
+#endif
+int main(void) { return 0; }
 EOF
 
 if compile_prog ; then
-if strings -a $TMPE | grep -q BiGeNdIaN ; then
-bigendian="yes"
-elif strings -a $TMPE | grep -q LiTtLeEnDiAn ; then
-bigendian="no"
-else
-echo big/little test failed
-exit 1
-fi
+  bigendian="yes"
 else
+  cat > $TMPC << EOF
+#if defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN || \
+defined(__LITTLE_ENDIAN__)
+# error LITTLE
+#endif
+int main(void) { return 0; }
+EOF
+
+  if compile_prog ; then
+bigendian="no"
+  else
 echo big/little test failed
 exit 1
+  fi
 fi
 
 ##
-- 
2.35.1

[RISU PATCH v2 1/5] risu: Use alternate stack

We can use alternate stack, so that we can use sp register as intput/ouput 
register.
I had tested aarch64/LoongArch architecture.

Reviewed-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 risu.c | 16 +++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/risu.c b/risu.c
index 1c096a8..714074e 100644
--- a/risu.c
+++ b/risu.c
@@ -329,7 +329,7 @@ static void set_sigill_handler(void (*fn) (int, siginfo_t 
*, void *))
 memset(&sa, 0, sizeof(struct sigaction));
 
 sa.sa_sigaction = fn;
-sa.sa_flags = SA_SIGINFO;
+sa.sa_flags = SA_SIGINFO | SA_ONSTACK;
 sigemptyset(&sa.sa_mask);
 if (sigaction(SIGILL, &sa, 0) != 0) {
 perror("sigaction");
@@ -550,6 +550,7 @@ int main(int argc, char **argv)
 char *trace_fn = NULL;
 struct option *longopts;
 char *shortopts;
+stack_t ss;
 
 longopts = setup_options(&shortopts);
 
@@ -617,6 +618,19 @@ int main(int argc, char **argv)
 
 load_image(imgfile);
 
+/* create alternate stack */
+ss.ss_sp = malloc(SIGSTKSZ);
+if (ss.ss_sp == NULL) {
+perror("malloc");
+exit(EXIT_FAILURE);
+}
+ss.ss_size = SIGSTKSZ;
+ss.ss_flags = 0;
+if (sigaltstack(&ss, NULL) == -1) {
+perror("sigaltstac");
+exit(EXIT_FAILURE);
+}
+
 /* E.g. select requested SVE vector length. */
 arch_init();
 
-- 
2.31.1

Re: [PATCH v3 2/3] block: introduce zone append write for zoned devices

2022-10-13 Thread Damien Le Moal

On 2022/10/13 16:27, Sam Li wrote:
> Damien Le Moal  于2022年10月13日周四 13:55写道：
>>
>> On 10/10/22 11:33, Sam Li wrote:
>>> A zone append command is a write operation that specifies the first
>>> logical block of a zone as the write position. When writing to a zoned
>>> block device using zone append, the byte offset of writes is pointing
>>> to the write pointer of that zone. Upon completion the device will
>>> respond with the position the data has been written in the zone.
>>>
>>> Signed-off-by: Sam Li 
>>> ---
>>>  block/block-backend.c | 64 +++
>>>  block/file-posix.c| 64 ---
>>>  block/io.c| 21 ++
>>>  block/raw-format.c|  7 
>>>  include/block/block-io.h  |  3 ++
>>>  include/block/block_int-common.h  |  3 ++
>>>  include/block/raw-aio.h   |  4 +-
>>>  include/sysemu/block-backend-io.h |  9 +
>>>  8 files changed, 168 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/block/block-backend.c b/block/block-backend.c
>>> index ddc569e3ac..bfdb719bc8 100644
>>> --- a/block/block-backend.c
>>> +++ b/block/block-backend.c
>>> @@ -1439,6 +1439,9 @@ typedef struct BlkRwCo {
>>>  struct {
>>>  BlockZoneOp op;
>>>  } zone_mgmt;
>>> +struct {
>>> +int64_t *append_sector;
>>
>> I would call this "sector", since it will always be referenced as
>> "->zone_append.sector", you get the "append" for free :)
>>
>> That said, shouldn't this be a byte value, so called "offset" ? Not
>> entirely sure...
> 
> Yes, it can be changed to "offset"(byte) following QEMU's convention.
> Just need to add conversions to virtio_blk_zone_append/*_complete,
> which is easily done.
> 
>>
>>> +} zone_append;
>>>  };
>>>  } BlkRwCo;
>>>
>>> @@ -1869,6 +1872,46 @@ BlockAIOCB *blk_aio_zone_mgmt(BlockBackend *blk, 
>>> BlockZoneOp op,
>>>  return &acb->common;
>>>  }
>>>
>>> +static void coroutine_fn blk_aio_zone_append_entry(void *opaque) {
>>> +BlkAioEmAIOCB *acb = opaque;
>>> +BlkRwCo *rwco = &acb->rwco;
>>> +
>>> +rwco->ret = blk_co_zone_append(rwco->blk, 
>>> rwco->zone_append.append_sector,
>>> +   rwco->iobuf, rwco->flags);
>>> +blk_aio_complete(acb);
>>> +}
>>> +
>>> +BlockAIOCB *blk_aio_zone_append(BlockBackend *blk, int64_t *offset,
>>> +QEMUIOVector *qiov, BdrvRequestFlags flags,
>>> +BlockCompletionFunc *cb, void *opaque) {
>>> +BlkAioEmAIOCB *acb;
>>> +Coroutine *co;
>>> +IO_CODE();
>>> +
>>> +blk_inc_in_flight(blk);
>>> +acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
>>> +acb->rwco = (BlkRwCo) {
>>> +.blk= blk,
>>> +.ret= NOT_DONE,
>>> +.flags  = flags,
>>> +.iobuf  = qiov,
>>> +.zone_append = {
>>> +.append_sector = offset,
>>
>> See above comment. So since this is a byte value, this needs to be
>> called "offset", no ?
> 
> Yes, same answers above.
> 
>>
>>> +},
>>> +};
>>> +acb->has_returned = false;
>>> +
>>> +co = qemu_coroutine_create(blk_aio_zone_append_entry, acb);
>>> +bdrv_coroutine_enter(blk_bs(blk), co);
>>> +acb->has_returned = true;
>>> +if (acb->rwco.ret != NOT_DONE) {
>>> +replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
>>> + blk_aio_complete_bh, acb);
>>> +}
>>> +
>>> +return &acb->common;
>>> +}
>>> +
>>>  /*
>>>   * Send a zone_report command.
>>>   * offset is a byte offset from the start of the device. No alignment
>>> @@ -1921,6 +1964,27 @@ int coroutine_fn blk_co_zone_mgmt(BlockBackend *blk, 
>>> BlockZoneOp op,
>>>  return ret;
>>>  }
>>>
>>> +/*
>>> + * Send a zone_append command.
>>> + */
>>> +int coroutine_fn blk_co_zone_append(BlockBackend *blk, int64_t *offset,
>>> +QEMUIOVector *qiov, BdrvRequestFlags flags)
>>> +{
>>> +int ret;
>>> +IO_CODE();
>>> +
>>> +blk_inc_in_flight(blk);
>>> +blk_wait_while_drained(blk);
>>> +if (!blk_is_available(blk)) {
>>> +blk_dec_in_flight(blk);
>>> +return -ENOMEDIUM;
>>> +}
>>> +
>>> +ret = bdrv_co_zone_append(blk_bs(blk), offset, qiov, flags);
>>> +blk_dec_in_flight(blk);
>>> +return ret;
>>> +}
>>> +
>>>  void blk_drain(BlockBackend *blk)
>>>  {
>>>  BlockDriverState *bs = blk_bs(blk);
>>> diff --git a/block/file-posix.c b/block/file-posix.c
>>> index 17c0b58158..08ab164df4 100755
>>> --- a/block/file-posix.c
>>> +++ b/block/file-posix.c
>>> @@ -1657,7 +1657,7 @@ static ssize_t handle_aiocb_rw_vector(RawPosixAIOData 
>>> *aiocb)
>>>  ssize_t len;
>>>
>>>  do {
>>> -if (aiocb->aio_type & QEMU_AIO_WRITE)
>>> +if (aiocb->aio_type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND))
>>>  len = qemu_pwritev(aiocb->aio_fildes,
>>>

[RISU PATCH v2 2/5] loongarch: Add LoongArch basic test support

This patch adds LoongArch server, client support, and basic test file.

Reviewed-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 risu_loongarch64.c |  50 ++
 risu_reginfo_loongarch64.c | 183 +
 risu_reginfo_loongarch64.h |  25 +
 test_loongarch64.s |  92 +++
 4 files changed, 350 insertions(+)
 create mode 100644 risu_loongarch64.c
 create mode 100644 risu_reginfo_loongarch64.c
 create mode 100644 risu_reginfo_loongarch64.h
 create mode 100644 test_loongarch64.s

diff --git a/risu_loongarch64.c b/risu_loongarch64.c
new file mode 100644
index 000..bda15d4
--- /dev/null
+++ b/risu_loongarch64.c
@@ -0,0 +1,50 @@
+/**
+ * Copyright (c) 2022 Loongson Technology Corporation Limited
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * based on Peter Maydell's risu_arm.c
+ */
+
+#include 
+#include 
+#include 
+
+#include "risu.h"
+
+void advance_pc(void *vuc)
+{
+struct ucontext *uc = vuc;
+uc->uc_mcontext.sc_pc += 4;
+}
+
+void set_ucontext_paramreg(void *vuc, uint64_t value)
+{
+struct ucontext *uc = vuc;
+uc->uc_mcontext.sc_regs[4] = value;
+}
+
+uint64_t get_reginfo_paramreg(struct reginfo *ri)
+{
+return ri->regs[4];
+}
+
+int get_risuop(struct reginfo *ri)
+{
+/* Return the risuop we have been asked to do
+ * (or OP_SIGILL if this was a SIGILL for a non-risuop insn)
+ */
+uint32_t insn = ri->faulting_insn;
+uint32_t op = insn & 0xf;
+uint32_t key = insn & ~0xf;
+uint32_t risukey = 0x01f0;
+return (key != risukey) ? OP_SIGILL : op;
+}
+
+uintptr_t get_pc(struct reginfo *ri)
+{
+   return ri->pc;
+}
diff --git a/risu_reginfo_loongarch64.c b/risu_reginfo_loongarch64.c
new file mode 100644
index 000..af6ab77
--- /dev/null
+++ b/risu_reginfo_loongarch64.c
@@ -0,0 +1,183 @@
+/**
+ * Copyright (c) 2022 Loongson Technology Corporation Limited
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * based on Peter Maydell's risu_reginfo_arm.c
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "risu.h"
+#include "risu_reginfo_loongarch64.h"
+
+const struct option * const arch_long_opts;
+const char * const arch_extra_help;
+
+struct _ctx_layout {
+struct sctx_info *addr;
+unsigned int size;
+};
+
+struct extctx_layout {
+unsigned long size;
+unsigned int flags;
+struct _ctx_layout fpu;
+struct _ctx_layout end;
+};
+
+void process_arch_opt(int opt, const char *arg)
+{
+abort();
+}
+
+void arch_init(void)
+{
+}
+
+int reginfo_size(struct reginfo *ri)
+{
+return sizeof(*ri);
+}
+
+static int parse_extcontext(struct sigcontext *sc, struct extctx_layout 
*extctx)
+{
+uint32_t magic, size;
+struct sctx_info *info = (struct sctx_info *)&sc->sc_extcontext;
+
+while(1) {
+magic = (uint32_t)info->magic;
+size =  (uint32_t)info->size;
+switch (magic) {
+case 0: /* END*/
+return 0;
+case FPU_CTX_MAGIC:
+if (size < (sizeof(struct sctx_info) +
+sizeof(struct fpu_context))) {
+return -1;
+}
+extctx->fpu.addr = info;
+break;
+default:
+return -1;
+   }
+   info = (struct sctx_info *)((char *)info +size);
+}
+return 0;
+}
+
+/* reginfo_init: initialize with a ucontext */
+void reginfo_init(struct reginfo *ri, ucontext_t *context)
+{
+int i;
+struct ucontext *uc = (struct ucontext *)context;
+struct extctx_layout extctx;
+
+memset(&extctx, 0, sizeof(struct extctx_layout));
+memset(ri, 0, sizeof(*ri));
+
+for (i = 1; i < 32; i++) {
+ri->regs[i] = uc->uc_mcontext.sc_regs[i]; //sp:r3, tp:r2
+}
+
+ri->regs[2] = 0xdeadbeefdeadbeef;
+ri->pc = uc->uc_mcontext.sc_pc - (unsigned long)image_start_address;
+ri->flags = uc->uc_mcontext.sc_flags;
+ri->faulting_insn = *(uint32_t *)uc->uc_mcontext.sc_pc;
+
+parse_extcontext(&uc->uc_mcontext, &extctx);
+if (extctx.fpu.addr) {
+struct sctx_info *info = extctx.fpu.addr;
+struct fpu_context *fpu_ctx = (struct fpu_c

[RISU PATCH v2 0/5] Add LoongArch architectures support

hi,

This series adds LoongArch architectures support, we had tested two
mode:
1. LoongArch host server +  LoongArch host client;
2. LoongArch host server  + qemu client.

You can find all LoongArch instructions at [1].
This series not contains all LoongArch instructions,
such as pcadd, syscalls, rdtime and jumps.

[1]:
https://github.com/loongson/LoongArch-Documentation/releases/download/2022.08.12/LoongArch-Vol1-v1.02-EN.pdf

V2:

- rewrite write_mov_ri();
- get_risuop return a RisuOp;
- test again with 1 million instructions.

Thanks.
Song Gao

Song Gao (5):
  risu: Use alternate stack
  loongarch: Add LoongArch basic test support
  loongarch: Implement risugen module
  loongarch: Add risufile with loongarch instructions
  loongarch: Add block 'safefloat' and nanbox_s()

 loongarch64.risu   | 612 +
 risu.c |  16 +-
 risu_loongarch64.c |  50 +++
 risu_reginfo_loongarch64.c | 183 +++
 risu_reginfo_loongarch64.h |  25 ++
 risugen|   2 +-
 risugen_loongarch64.pm | 509 ++
 test_loongarch64.s |  92 ++
 8 files changed, 1487 insertions(+), 2 deletions(-)
 create mode 100644 loongarch64.risu
 create mode 100644 risu_loongarch64.c
 create mode 100644 risu_reginfo_loongarch64.c
 create mode 100644 risu_reginfo_loongarch64.h
 create mode 100644 risugen_loongarch64.pm
 create mode 100644 test_loongarch64.s

-- 
2.31.1

Re: MultiFD and default channel out of order mapping on receive side.

2022-10-13 Thread Daniel P . Berrangé

On Thu, Oct 13, 2022 at 01:23:40AM +0530, manish.mishra wrote:
> Hi Everyone,
> Hope everyone is doing great. I have seen some live migration issues with 
> qemu-4.2 when using multiFD. Signature of issue is something like this.
> 2022-10-01T09:57:53.972864Z qemu-kvm: failed to receive packet via multifd 
> channel 0: multifd: received packet magic 5145564d expected 11223344
> 
> Basically default live migration channel packet is received on multiFD 
> channel. I see a older patch explaining potential reason for this behavior.
> https://lists.gnu.org/archive/html/qemu-devel/2019-10/msg05920.html
> > [PATCH 3/3] migration/multifd: fix potential wrong acception order of IO.
> 
> But i see this patch was not merged. By looking at qemu master code, i
> could not find any other patch too which can handle this issue. So as
> per my understanding this is still a potential issue even in qemu
> master. I mainly wanted to check why this patch was dropped?

See my repllies in that message - it broke compatilibity of data on
the wire, meaning old QEMU can't talk to new QEMU and vica-verca.

We need a fix for this issue, but it needs to take into account
wire compatibility.

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

[RISU PATCH v2 4/5] loongarch: Add risufile with loongarch instructions

Acked-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 loongarch64.risu | 573 +++
 1 file changed, 573 insertions(+)
 create mode 100644 loongarch64.risu

diff --git a/loongarch64.risu b/loongarch64.risu
new file mode 100644
index 000..d059811
--- /dev/null
+++ b/loongarch64.risu
@@ -0,0 +1,573 @@
+###
+# Copyright (c) 2022 Loongson Technology Corporation Limited
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Eclipse Public License v1.0
+# which accompanies this distribution, and is available at
+# http://www.eclipse.org/legal/epl-v10.html
+#
+# Contributors:
+# based on aarch64.risu by Claudio Fontana
+# based on arm.risu by Peter Maydell
+###
+
+# Input file for risugen defining LoongArch64 instructions
+.mode loongarch64
+
+#
+# Fixed point arithmetic operation instruction
+#
+add_w LA64  0001 0 rk:5 rj:5 rd:5 \
+!constraints { $rk != 2 && $rj != 2 && $rd != 2; }
+add_d LA64  0001 1 rk:5 rj:5 rd:5 \
+!constraints { $rk != 2 && $rj != 2 && $rd != 2; }
+sub_w LA64  0001 00010 rk:5 rj:5 rd:5 \
+!constraints { $rk != 2 && $rj != 2 && $rd != 2; }
+sub_d LA64  0001 00011 rk:5 rj:5 rd:5 \
+!constraints { $rk != 2 && $rj != 2 && $rd != 2; }
+slt LA64  0001 00100 rk:5 rj:5 rd:5 \
+!constraints { $rk != 2 && $rj != 2 && $rd != 2; }
+sltu LA64  0001 00101 rk:5 rj:5 rd:5 \
+!constraints { $rk != 2 && $rj != 2 && $rd != 2; }
+slti LA64  001000 si12:12 rj:5 rd:5 \
+!constraints { $rj != 2 && $rd != 2; }
+sltui LA64  001001 si12:12 rj:5 rd:5 \
+!constraints { $rj != 2 && $rd != 2; }
+nor LA64  0001 01000 rk:5 rj:5 rd:5 \
+!constraints { $rk != 2 && $rj != 2 && $rd != 2; }
+and LA64  0001 01001 rk:5 rj:5 rd:5 \
+!constraints { $rk != 2 && $rj != 2 && $rd != 2; }
+or LA64  0001 01010 rk:5 rj:5 rd:5 \
+!constraints { $rk != 2 && $rj != 2 && $rd != 2; }
+xor LA64  0001 01011 rk:5 rj:5 rd:5 \
+!constraints { $rk != 2 && $rj != 2 && $rd != 2; }
+orn LA64  0001 01100 rk:5 rj:5 rd:5 \
+!constraints { $rk != 2 && $rj != 2 && $rd != 2; }
+andn LA64  0001 01101 rk:5 rj:5 rd:5 \
+!constraints { $rk != 2 && $rj != 2 && $rd != 2; }
+mul_w LA64  0001 11000 rk:5 rj:5 rd:5 \
+!constraints { $rk != 2 && $rj != 2 && $rd != 2; }
+mul_d LA64  0001 11011 rk:5 rj:5 rd:5 \
+!constraints { $rk != 2 && $rj != 2 && $rd != 2; }
+mulh_w LA64  0001 11001 rk:5 rj:5 rd:5 \
+!constraints { $rk != 2 && $rj != 2 && $rd != 2; }
+mulh_d LA64  0001 11100 rk:5 rj:5 rd:5 \
+!constraints { $rk != 2 && $rj != 2 && $rd != 2; }
+mulh_wu LA64  0001 11010 rk:5 rj:5 rd:5 \
+!constraints { $rk != 2 && $rj != 2 && $rd != 2; }
+mulh_du LA64  0001 11101 rk:5 rj:5 rd:5 \
+!constraints { $rk != 2 && $rj != 2 && $rd != 2; }
+mulw_d_w LA64  0001 0 rk:5 rj:5 rd:5 \
+!constraints { $rk != 2 && $rj != 2 && $rd != 2; }
+mulw_d_wu LA64  0001 1 rk:5 rj:5 rd:5 \
+!constraints { $rk != 2 && $rj != 2 && $rd != 2; }
+
+#div.{w[u]/d[u]} rd,rj,rk
+# the docement 2.2.13,  rk, rj, need in 32bit [0x0 ~0x7FFF]
+# use function set_reg_w($reg)
+div_w LA64  0010 0 rk:5 rj:5 rd:5 \
+!constraints { $rk != 2 && $rj != 2 && $rd != 2; } \
+!memory { set_reg_w($rj); set_reg_w($rk); }
+div_wu LA64  0010 00010 rk:5 rj:5 rd:5 \
+!constraints { $rk != 2 && $rj != 2 && $rd != 2; } \
+!memory { set_reg_w($rj); set_reg_w($rk); }
+div_d LA64  0010 00100 rk:5 rj:5 rd:5 \
+!constraints { $rk != 2 && $rj != 2 && $rd != 2; }
+div_du LA64  0010 00110 rk:5 rj:5 rd:5 \
+!constraints { $rk != 2 && $rj != 2 && $rd != 2; }
+mod_w LA64  0010 1 rk:5 rj:5 rd:5 \
+!constraints { $rk != 2 && $rj != 2 && $rd != 2; } \
+!memory { set_reg_w($rj); set_reg_w($rk); }
+mod_wu LA64  0010 00011 rk:5 rj:5 rd:5 \
+!constraints { $rk != 2 && $rj != 2 && $rd != 2; } \
+!memory { set_reg_w($rj); set_reg_w($rk); }
+mod_d LA64  0010 00101 rk:5 rj:5 rd:5 \
+!constraints { $rk != 2 && $rj != 2 && $rd != 2; }
+mod_du LA64  0010 00111 rk:5 rj:5 rd:5 \
+!constraints { $rk != 2 && $rj != 2 && $rd != 2; }
+
+alsl_w LA64   010 sa2:2 rk:5 rj:5 rd:5 \
+!constraints { $rk != 2 && $rj != 2 && $rd != 2; }
+alsl_wu LA64   011 sa2:2 rk:5 rj:5 rd:5 \
+!constraints { $rk != 2 && $rj != 2 && $rd != 2; }
+alsl_d LA64  0010 110 sa2:2 rk:5 rj:5 rd:5 \
+!constraints { $rk != 2 && $rj != 2 && $rd != 2; }
+lu12i_w LA64 0001 010 si20:20 rd:5 \
+!constraints { $rd != 2; }
+lu32i_d LA64 0001 011 si20:20 rd:5 \
+!constraints { $rd != 2; }
+lu52i_d LA

[RISU PATCH v2 3/5] loongarch: Implement risugen module

Reviewed-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 risugen_loongarch64.pm | 486 +
 1 file changed, 486 insertions(+)
 create mode 100644 risugen_loongarch64.pm

diff --git a/risugen_loongarch64.pm b/risugen_loongarch64.pm
new file mode 100644
index 000..98948a2
--- /dev/null
+++ b/risugen_loongarch64.pm
@@ -0,0 +1,486 @@
+#!/usr/bin/perl -w
+###
+# Copyright (c) 2022 Loongson Technology Corporation Limited
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Eclipse Public License v1.0
+# which accompanies this distribution, and is available at
+# http://www.eclipse.org/legal/epl-v10.html
+#
+# Contributors:
+# based on Peter Maydell (Linaro) - initial implementation
+###
+
+# risugen -- generate a test binary file for use with risu
+# See 'risugen --help' for usage information.
+package risugen_loongarch64;
+
+use strict;
+use warnings;
+
+use risugen_common;
+
+require Exporter;
+
+our @ISA= qw(Exporter);
+our @EXPORT = qw(write_test_code);
+
+my $periodic_reg_random = 1;
+
+# Maximum alignment restriction permitted for a memory op.
+my $MAXALIGN = 64;
+
+my $OP_COMPARE = 0;# compare registers
+my $OP_TESTEND = 1;# end of test, stop
+my $OP_SETMEMBLOCK = 2;# r4 is address of memory block (8192 bytes)
+my $OP_GETMEMBLOCK = 3;# add the address of memory block to r4
+my $OP_COMPAREMEM = 4; # compare memory block
+
+sub write_risuop($)
+{
+my ($op) = @_;
+insn32(0x01f0 | $op);
+}
+
+sub write_set_fcsr($)
+{
+my ($fcsr) = @_;
+# movgr2fcsr r0, r0
+insn32(0x0114c000);
+}
+
+# Global used to communicate between align(x) and reg() etc.
+my $alignment_restriction;
+
+sub set_reg_w($)
+{
+my($reg)=@_;
+# Set reg [0x0, 0x7FFF]
+
+# $reg << 33
+# slli.d  $reg, $reg, 33
+insn32(0x41 | 33 << 10 | $reg << 5 | $reg);
+# $reg >> 33
+# srli.d  $reg, $reg, 33
+insn32(0x45 | 33 << 10 | $reg << 5 | $reg);
+
+return $reg;
+}
+
+sub align($)
+{
+my ($a) = @_;
+if (!is_pow_of_2($a) || ($a < 0) || ($a > $MAXALIGN)) {
+die "bad align() value $a\n";
+}
+$alignment_restriction = $a;
+}
+
+sub write_sub_rrr($$$)
+{
+my ($rd, $rj, $rk) = @_;
+# sub.d rd, rj, rk
+insn32(0x00118000 | $rk << 10 | $rj << 5 | $rd);
+}
+
+sub write_mov_rr($$$)
+{
+my($rd, $rj, $rk) = @_;
+# add.d rd, rj, r0
+insn32(0x00108000 | 0 << 10 | $rj << 5 | $rd);
+}
+
+sub write_mov_ri($$)
+{
+my ($rd, $imm) = @_;
+
+if ($imm >= -0x8000 && $imm <= 0x7fff) {
+# lu12i.w rd, si20
+insn32(0x1400 | (($imm >> 12) & 0xf) << 5 | $rd);
+# ori rd, rd, ui12
+insn32(0x0380 | ($imm & 0xfff) << 10 | $rd << 5 | $rd);
+} else {
+   die "unhandled immediate load";
+}
+}
+
+sub write_get_offset()
+{
+# Emit code to get a random offset within the memory block, of the
+# right alignment, into r4
+# We require the offset to not be within 256 bytes of either
+# end, to (more than) allow for the worst case data transfer, which is
+# 16 * 64 bit regs
+my $offset = (rand(2048 - 512) + 256) & ~($alignment_restriction - 1);
+write_mov_ri(4, $offset);
+write_risuop($OP_GETMEMBLOCK);
+}
+
+sub reg_plus_reg($$@)
+{
+my ($base, $idx, @trashed) = @_;
+my $savedidx = 0;
+if ($idx == 4) {
+# Save the index into some other register for the
+# moment, because the risuop will trash r4.
+$idx = 5;
+$idx++ if $idx == $base;
+$savedidx = 1;
+write_mov_rr($idx, 4, 0);
+}
+# Get a random offset within the memory block, of the
+# right alignment.
+write_get_offset();
+
+write_sub_rrr($base, 4, $idx);
+if ($base != 4) {
+if ($savedidx) {
+write_mov_rr(4, $idx, 0);
+write_mov_ri($idx, 0);
+} else {
+write_mov_ri(4, 0);
+}
+} else {
+   if ($savedidx) {
+write_mov_ri($idx, 0);
+   }
+}
+
+if (grep $_ == $base, @trashed) {
+return -1;
+}
+return $base;
+}
+
+sub reg_plus_imm($$@)
+{
+# Handle reg + immediate addressing mode
+my ($base, $imm, @trashed) = @_;
+
+write_get_offset();
+# Now r4 is the address we want to do the access to,
+# so set the basereg by doing the inverse of the
+# addressing mode calculation, ie base = r4 - imm
+# We could do this more cleverly with a sub immediate.
+if ($base != 4) {
+write_mov_ri($base, $imm);
+write_sub_rrr($base, 4, $base);
+# Clear r4 to avoid register compare mismatches
+# when the memory block location differs between machines.
+ write_mov_ri(4, 0);
+}else {
+# We borrow r1 as a

Re: [PATCH] target/i386: Switch back XFRM value

2022-10-13 Thread Huang, Kai

On Thu, 2022-10-13 at 02:23 -0400, Yang Zhong wrote:
> > > enclave only supported SSE and x87 feature(xfrm=0x3).
> > 
> > Is this true?  Perhaps I am missing something, but it seems env-
> > > features[FEAT_XSAVE_XCR0_LO] only includes LBR bit, which is bit 15.
> 
>   We printed the XFRM value from SGX SDK to find this issue.

I don't know how you added the print, but the exact value that set to SGX CPUID
is irrelevant, as it is wrong anyway.  The value can also differ when you run on
different machines, etc.  IMHO in changelog we just need to point out the fact
that the XSAVE enabling patch wrongly messed up with SGX CPUID and this patch
fixes that.

Re: [RFC PATCH 1/4] docs/devel: add a maintainers section to development process

2022-10-13 Thread Markus Armbruster

Alex Bennée  writes:

> We don't currently have a clear place in the documentation to describe
> the rolls and responsibilities of a maintainer. Lets create one so we
> can. I've moved a few small bits out of other files to try and keep
> everything in one place.
>
> Signed-off-by: Alex Bennée 
> ---
>  docs/devel/code-of-conduct.rst   |  2 +
>  docs/devel/index-process.rst |  1 +
>  docs/devel/maintainers.rst   | 84 
>  docs/devel/submitting-a-pull-request.rst | 12 ++--
>  4 files changed, 91 insertions(+), 8 deletions(-)
>  create mode 100644 docs/devel/maintainers.rst
>
> diff --git a/docs/devel/code-of-conduct.rst b/docs/devel/code-of-conduct.rst
> index 195444d1b4..f734ed0317 100644
> --- a/docs/devel/code-of-conduct.rst
> +++ b/docs/devel/code-of-conduct.rst
> @@ -1,3 +1,5 @@
> +.. _code_of_conduct:
> +
>  Code of Conduct
>  ===
>  
> diff --git a/docs/devel/index-process.rst b/docs/devel/index-process.rst
> index d0d7a200fd..d50dd74c3e 100644
> --- a/docs/devel/index-process.rst
> +++ b/docs/devel/index-process.rst
> @@ -8,6 +8,7 @@ Notes about how to interact with the community and how and 
> where to submit patch
>  
> code-of-conduct
> conflict-resolution
> +   maintainers
> style
> submitting-a-patch
> trivial-patches
> diff --git a/docs/devel/maintainers.rst b/docs/devel/maintainers.rst
> new file mode 100644
> index 00..e3c7003bfa
> --- /dev/null
> +++ b/docs/devel/maintainers.rst
> @@ -0,0 +1,84 @@
> +.. _maintainers:
> +
> +The Roll of Maintainers

Do you mean "Role"?

> +===
> +
> +Maintainers are a critical part of the projects contributor ecosystem.
> +They come from a wide range of backgrounds from unpaid hobbyists
> +working in their spare time to employees who work on the project as
> +part of their job. Maintainer activities include:
> +
> +  - reviewing patches and suggesting changes
> +  - preparing pull requests for their subsystems
> +  - participating other project activities

participating in

I think this doesn't quite do justice to what we expect maintainers to
do.

Besides shepherding patches, we expect maintainers to guard the
integrity of their subsystem and the "health" of the developer
community.

We generally defer to a maintainer's reasoned judgement.  This means a
maintainer has a certain power to say no.  With power comes
responsibility.

> +
> +They are also human and subject to the same pressures as everyone else
> +including overload and burn out. Like everyone else they are subject

burnout

> +to projects :ref:`code_of_conduct`.

Arguably even more so than "ordinary" contributors, because by their
visibility they necessarily serve as role models.  With power comes
responsibility.

Should we add something on how a maintainer could get advice?  Say when
they have to deal with bad behavior.

> +
> +The MAINTAINERS file
> +
> +
> +The `MAINTAINERS
> +`__
> +file contains the canonical list of who is a maintainer. The file
> +is machine readable so an appropriately configured git (see
> +:ref:`cc_the_relevant_maintainer`) can automatically Cc them on
> +patches that touch their area of code.
> +
> +The file also describes the status of the area of code to give an idea
> +of how actively that section is maintained.
> +
> +.. list-table:: Meaning of support status in MAINTAINERS
> +   :widths: 25 75
> +   :header-rows: 1
> +
> +   * - Status
> + - Meaning
> +   * - Supported
> + - Someone is actually paid to look after this.
> +   * - Maintained
> + - Someone actually looks after it.
> +   * - Odd Fixes
> + - It has a maintainer but they don't have time to do
> +   much other than throw the odd patch in.
> +   * - Orphan
> + - No current maintainer.
> +   * - Obsolete
> + - Old obsolete code, should use something else.
> +
> +Please bare in mind that even if someone is paid to support something

bear in mind

> +it does not mean they are paid to support you. This is open source and
> +the code comes with no warranty and the project makes no guarantees
> +about dealing with bugs or features requests.
> +
> +Becoming a maintainer
> +-
> +
> +Maintainers are volunteers who put themselves forward to keep an eye
> +on an area of code.

"Volunteers who put themselves forward"...  The press gangs wielding
clubs are a figment of your drunken imagination!

>  They are generally accepted by the community to
> +have a good understanding of the subsystem and able to make a positive
> +contribution to the project.
> +
> +The process is simple - simply sent a patch to the list that updates
> +the ``MAINTAINERS`` file. Sometimes this is done as part of a larger
> +series when a new sub-system is being added to the code base. This can
> +also be done by a retiring maintainer who nominates their replacement
> +after discuss

Re: [PATCH v7 4/5] hw/mem/cxl-type3: Add CXL CDAT Data Object Exchange

On Wed, 12 Oct 2022 14:21:15 -0400
Gregory Price  wrote:

> Included in this response is a recommended patch set on top of this
> patch that resolves a number of issues, including style and a heap
> corruption bug.
> 
> The purpose of this patch set is to refactor the CDAT initialization
> code to support future patch sets that will introduce multi-region
> support in CXL Type3 devices.
> 
> 1) Checkpatch errors in the immediately prior patch
> 2) Flatting of code in cdat initialization
> 3) Changes in allocation and error checking for cleanliness
> 4) Change in the allocation/free strategy of CDAT sub-tables to simplify
>multi-region allocation in the future.  Also resolves a heap
>corruption bug
> 5) Refactor of CDAT initialization code into a function that initializes
>sub-tables per memory-region.
> 
> Gregory Price (5):
>   hw/mem/cxl_type3: fix checkpatch errors
>   hw/mem/cxl_type3: Pull validation checks ahead of functional code
>   hw/mem/cxl_type3: CDAT pre-allocate and check resources prior to work
>   hw/mem/cxl_type3: Change the CDAT allocation/free strategy
>   hw/mem/cxl_type3: Refactor CDAT sub-table entry initialization into a
> function
> 
>  hw/mem/cxl_type3.c | 240 +++--
>  1 file changed, 122 insertions(+), 118 deletions(-)
> 

Thanks, I'm going to roll this stuff into the original patch set for v8.
Some of this I already have (like the check patch stuff).
Some I may disagree with in which case  I'll reply to the patches - note
I haven't looked at them in detail yet!

Jonathan

Re: MultiFD and default channel out of order mapping on receive side.

2022-10-13 Thread manish.mishra

On 13/10/22 1:45 pm, Daniel P. Berrangé wrote:

On Thu, Oct 13, 2022 at 01:23:40AM +0530, manish.mishra wrote:

Hi Everyone,
Hope everyone is doing great. I have seen some live migration issues with
qemu-4.2 when using multiFD. Signature of issue is something like this.
2022-10-01T09:57:53.972864Z qemu-kvm: failed to receive packet via multifd
channel 0: multifd: received packet magic 5145564d expected 11223344

Basically default live migration channel packet is received on multiFD channel.
I see a older patch explaining potential reason for this behavior.
https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.gnu.org_archive_html_qemu-2Ddevel_2019-2D10_msg05920.html&d=DwIBaQ&c=s883GpUCOChKOHiocYtGcg&r=c4KON2DiMd-szjwjggQcuUvTsPWblztAL0gVzaHnNmc&m=LZBcU_C3HMbpUCFZgqxkS-pV8C2mHOjqUTzt45LlLwa26DA0pCAjJVDoamnX8vnC&s=B-b_HMnn_ee6JeA87-PVNBrBqxzdWYgo5PpaP91dqT8&e=

[PATCH 3/3] migration/multifd: fix potential wrong acception order of IO.

But i see this patch was not merged. By looking at qemu master code, i
could not find any other patch too which can handle this issue. So as
per my understanding this is still a potential issue even in qemu
master. I mainly wanted to check why this patch was dropped?

See my repllies in that message - it broke compatilibity of data on
the wire, meaning old QEMU can't talk to new QEMU and vica-verca.

We need a fix for this issue, but it needs to take into account
wire compatibility.

With regards,
Daniel

ok got it, thank you so much Daniel, in that case i will try to create some
patch considering backward compatibility and send for review. Mainly i wanted
to understand if it is handled somehow differently in upstream master, but
manually looking code it did not look like that, so just wanted to confirm.

Thanks

Manish Mishra

Re: [RFC PATCH 4/4] docs/devel: try and improve the language around patch review

2022-10-13 Thread Markus Armbruster

Alex Bennée  writes:

> It is important that contributors take the review process seriously
> and we collaborate in a respectful way while avoiding personal
> attacks. Try and make this clear in the language.
>
> Signed-off-by: Alex Bennée 

Reviewed-by: Markus Armbruster

Re: [PATCH 2/5] hw/mem/cxl_type3: Pull validation checks ahead of functional code

On Wed, 12 Oct 2022 14:21:17 -0400
Gregory Price  wrote:

> For style - pulling these validations ahead flattens the code.

True, but at the cost of separating the check from where it is
obvious why we have the check.  I'd prefer to see it next to the
use. 

Inverting the hostmem check is resonable so I'll make that change.

My original thinking is that doing so would make adding non volatile
support messier but given you plan to factor out most of this the
change won't be too bad anyway.


> 
> Signed-off-by: Gregory Price 
> ---
>  hw/mem/cxl_type3.c | 193 ++---
>  1 file changed, 96 insertions(+), 97 deletions(-)
> 
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index 94bc439d89..43b2b9e041 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -32,107 +32,106 @@ static int ct3_build_cdat_table(CDATSubHeader 
> ***cdat_table,
>  int dslbis_nonvolatile_num = 4;
>  MemoryRegion *mr;
>  
> +if (!ct3d->hostmem) {
> +return len;
> +}
> +
> +mr = host_memory_backend_get_memory(ct3d->hostmem);
> +if (!mr) {
> +return -EINVAL;
> +}
> +
>  /* Non volatile aspects */
> -if (ct3d->hostmem) {
> -dsmas_nonvolatile = g_malloc(sizeof(*dsmas_nonvolatile));
> -if (!dsmas_nonvolatile) {
> -return -ENOMEM;
> -}
> -nonvolatile_dsmad = next_dsmad_handle++;
> -mr = host_memory_backend_get_memory(ct3d->hostmem);
> -if (!mr) {
> -return -EINVAL;
> -}
> -*dsmas_nonvolatile = (CDATDsmas) {
> -.header = {
> -.type = CDAT_TYPE_DSMAS,
> -.length = sizeof(*dsmas_nonvolatile),
> -},
> -.DSMADhandle = nonvolatile_dsmad,
> -.flags = CDAT_DSMAS_FLAG_NV,
> -.DPA_base = 0,
> -.DPA_length = int128_get64(mr->size),
> -};
> -len++;
> -
> -/* For now, no memory side cache, plausiblish numbers */
> -dslbis_nonvolatile =
> -g_malloc(sizeof(*dslbis_nonvolatile) * dslbis_nonvolatile_num);
> -if (!dslbis_nonvolatile) {
> -return -ENOMEM;
> -}
> +dsmas_nonvolatile = g_malloc(sizeof(*dsmas_nonvolatile));
> +if (!dsmas_nonvolatile) {
> +return -ENOMEM;
> +}
> +nonvolatile_dsmad = next_dsmad_handle++;
> +*dsmas_nonvolatile = (CDATDsmas) {
> +.header = {
> +.type = CDAT_TYPE_DSMAS,
> +.length = sizeof(*dsmas_nonvolatile),
> +},
> +.DSMADhandle = nonvolatile_dsmad,
> +.flags = CDAT_DSMAS_FLAG_NV,
> +.DPA_base = 0,
> +.DPA_length = int128_get64(mr->size),
> +};
> +len++;
>  
> -dslbis_nonvolatile[0] = (CDATDslbis) {
> -.header = {
> -.type = CDAT_TYPE_DSLBIS,
> -.length = sizeof(*dslbis_nonvolatile),
> -},
> -.handle = nonvolatile_dsmad,
> -.flags = HMAT_LB_MEM_MEMORY,
> -.data_type = HMAT_LB_DATA_READ_LATENCY,
> -.entry_base_unit = 1, /* 10ns base */
> -.entry[0] = 15, /* 150ns */
> -};
> -len++;
> -
> -dslbis_nonvolatile[1] = (CDATDslbis) {
> -.header = {
> -.type = CDAT_TYPE_DSLBIS,
> -.length = sizeof(*dslbis_nonvolatile),
> -},
> -.handle = nonvolatile_dsmad,
> -.flags = HMAT_LB_MEM_MEMORY,
> -.data_type = HMAT_LB_DATA_WRITE_LATENCY,
> -.entry_base_unit = 1,
> -.entry[0] = 25, /* 250ns */
> -};
> -len++;
> -
> -dslbis_nonvolatile[2] = (CDATDslbis) {
> -.header = {
> -.type = CDAT_TYPE_DSLBIS,
> -.length = sizeof(*dslbis_nonvolatile),
> -},
> -.handle = nonvolatile_dsmad,
> -.flags = HMAT_LB_MEM_MEMORY,
> -.data_type = HMAT_LB_DATA_READ_BANDWIDTH,
> -.entry_base_unit = 1000, /* GB/s */
> -.entry[0] = 16,
> -};
> -len++;
> -
> -dslbis_nonvolatile[3] = (CDATDslbis) {
> -.header = {
> -.type = CDAT_TYPE_DSLBIS,
> -.length = sizeof(*dslbis_nonvolatile),
> -},
> -.handle = nonvolatile_dsmad,
> -.flags = HMAT_LB_MEM_MEMORY,
> -.data_type = HMAT_LB_DATA_WRITE_BANDWIDTH,
> -.entry_base_unit = 1000, /* GB/s */
> -.entry[0] = 16,
> -};
> -len++;
> -
> -mr = host_memory_backend_get_memory(ct3d->hostmem);
> -if (!mr) {
> -return -EINVAL;
> -}
> -dsemts_nonvolatile = g_malloc(sizeof(*dsemts_nonvolatile));
> -*dsemts_nonvolatile = (CDATDsemts) {
> -.header = {
> -.type = CDAT_TYPE_DSEMTS,
> -.length = sizeof(*dsemts_nonvolatile),
> -

[PATCH v4 7/7] qga: Add HW address getting for FreeBSD

Replace a dumb function in commands-bsd.c by the code of HW address
getting.

Reviewed-by: Konstantin Kostiuk 
Reviewed-by: Marc-André Lureau 
Signed-off-by: Alexander Ivanov 
---
 qga/commands-bsd.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/qga/commands-bsd.c b/qga/commands-bsd.c
index ebf0fb8b0f..15cade2d4c 100644
--- a/qga/commands-bsd.c
+++ b/qga/commands-bsd.c
@@ -20,6 +20,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
 
 #if defined(CONFIG_FSFREEZE) || defined(CONFIG_FSTRIM)
@@ -179,7 +181,20 @@ GuestCpuStatsList *qmp_guest_get_cpustats(Error **errp)
 bool guest_get_hw_addr(struct ifaddrs *ifa, unsigned char *buf,
bool *obtained, Error **errp)
 {
+struct sockaddr_dl *sdp;
+
 *obtained = false;
+
+if (ifa->ifa_addr->sa_family != AF_LINK) {
+/* We can get HW address only for AF_LINK family. */
+g_debug("failed to get MAC address of %s", ifa->ifa_name);
+return true;
+}
+
+sdp = (struct sockaddr_dl *)ifa->ifa_addr;
+memcpy(buf, sdp->sdl_data + sdp->sdl_nlen, ETHER_ADDR_LEN);
+*obtained = true;
+
 return true;
 }
 #endif /* HAVE_GETIFADDRS */
-- 
2.34.1

[PATCH v2] migration/channel-block: fix return value for qio_channel_block_{readv, writev}

2022-10-13 Thread Fiona Ebner

in the error case. The documentation in include/io/channel.h states
that -1 or QIO_CHANNEL_ERR_BLOCK should be returned upon error. Simply
passing along the return value from the bdrv-functions has the
potential to confuse the call sides. Non-blocking mode is not
implemented currently, so -1 it is.

Signed-off-by: Fiona Ebner 
---

v1 -> v2:
* Use error_setg_errno() instead of error_setg().
* Use "failed" instead of "returned error" in error message. Now
  that no numerical error code is used, this sounds a bit nicer
  IMHO.

 migration/channel-block.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/migration/channel-block.c b/migration/channel-block.c
index c55c8c93ce..f4ab53acdb 100644
--- a/migration/channel-block.c
+++ b/migration/channel-block.c
@@ -62,7 +62,8 @@ qio_channel_block_readv(QIOChannel *ioc,
 qemu_iovec_init_external(&qiov, (struct iovec *)iov, niov);
 ret = bdrv_readv_vmstate(bioc->bs, &qiov, bioc->offset);
 if (ret < 0) {
-return ret;
+error_setg_errno(errp, -ret, "bdrv_readv_vmstate failed");
+return -1;
 }
 
 bioc->offset += qiov.size;
@@ -86,7 +87,8 @@ qio_channel_block_writev(QIOChannel *ioc,
 qemu_iovec_init_external(&qiov, (struct iovec *)iov, niov);
 ret = bdrv_writev_vmstate(bioc->bs, &qiov, bioc->offset);
 if (ret < 0) {
-return ret;
+error_setg_errno(errp, -ret, "bdrv_writev_vmstate failed");
+return -1;
 }
 
 bioc->offset += qiov.size;
-- 
2.30.2

Re: [PATCH] configure: Avoid using strings binary

2022-10-13 Thread Michal Prívozník

On 10/13/22 10:37, Michal Privoznik wrote:
> When determining the endiandness of the target architecture we're
> building for a small program is compiled, which in an obfuscated
> way declares two strings. Then, we look which string is in
> correct order (using strings binary) and deduct the endiandness.
> But using the strings binary is problematic, because it's part of
> toolchain (strings is just a symlink to
> x86_64-pc-linux-gnu-strings or llvm-strings). And when
> (cross-)compiling, it requires users to set the symlink to the
> correct toolchain.
> 
> Fortunately, we have a better alternative anyways. Since we
> require either clang or gcc we can rely on macros they declare.
> 
> Bug: https://bugs.gentoo.org/876933
> Signed-off-by: Michal Privoznik 
> ---
>  configure | 33 ++---
>  1 file changed, 18 insertions(+), 15 deletions(-)
> 
> diff --git a/configure b/configure
> index 45ee6f4eb3..91e04635cb 100755
> --- a/configure
> +++ b/configure
> @@ -1426,27 +1426,30 @@ fi
>  # ---
>  # big/little endian test
>  cat > $TMPC << EOF
> -#include 
> -short big_endian[] = { 0x4269, 0x4765, 0x4e64, 0x4961, 0x4e00, 0, };
> -short little_endian[] = { 0x694c, 0x7454, 0x654c, 0x6e45, 0x6944, 0x6e41, 0, 
> };
> -int main(int argc, char *argv[])
> -{
> -return printf("%s %s\n", (char *)big_endian, (char *)little_endian);
> -}
> +#if defined(__BYTE_ORDER) && __BYTE_ORDER == __BIG_ENDIAN || \

Actually, this needs to be __BYTE_ORDER__ (missing those two underscores
at the end).

> +defined(__BIG_ENDIAN__)
> +# error BIG
> +#endif
> +int main(void) { return 0; }
>  EOF
>  
>  if compile_prog ; then
> -if strings -a $TMPE | grep -q BiGeNdIaN ; then
> -bigendian="yes"
> -elif strings -a $TMPE | grep -q LiTtLeEnDiAn ; then
> -bigendian="no"
> -else
> -echo big/little test failed
> -exit 1
> -fi
> +  bigendian="yes"

And this needs to be no. Will post v2 shortly.

Michal

[PATCH v4 3/7] qga: Add UFS freeze/thaw support for FreeBSD

UFS supports FS freezing through ioctl UFSSUSPEND on /dev/ufssuspend.
Frozen FS can be thawed by closing /dev/ufssuspend file descriptior.

Use getmntinfo to get a list of mounted FS.

Reviewed-by: Konstantin Kostiuk 
Reviewed-by: Marc-André Lureau 
Signed-off-by: Alexander Ivanov 
---
 qga/commands-bsd.c| 169 +++
 qga/commands-common.h |  11 ++
 qga/commands-posix.c  | 308 --
 qga/main.c|   7 +-
 qga/meson.build   |   3 +
 5 files changed, 334 insertions(+), 164 deletions(-)
 create mode 100644 qga/commands-bsd.c

diff --git a/qga/commands-bsd.c b/qga/commands-bsd.c
new file mode 100644
index 00..ca06692179
--- /dev/null
+++ b/qga/commands-bsd.c
@@ -0,0 +1,169 @@
+/*
+ * QEMU Guest Agent BSD-specific command implementations
+ *
+ * Copyright (c) Virtuozzo International GmbH.
+ *
+ * Authors:
+ *  Alexander Ivanov  
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qga-qapi-commands.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi/error.h"
+#include "qemu/queue.h"
+#include "commands-common.h"
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#if defined(CONFIG_FSFREEZE) || defined(CONFIG_FSTRIM)
+bool build_fs_mount_list(FsMountList *mounts, Error **errp)
+{
+FsMount *mount;
+struct statfs *mntbuf, *mntp;
+struct stat statbuf;
+int i, count, ret;
+
+count = getmntinfo(&mntbuf, MNT_NOWAIT);
+if (count == 0) {
+error_setg_errno(errp, errno, "getmntinfo failed");
+return false;
+}
+
+for (i = 0; i < count; i++) {
+mntp = &mntbuf[i];
+ret = stat(mntp->f_mntonname, &statbuf);
+if (ret != 0) {
+error_setg_errno(errp, errno, "stat failed on %s",
+ mntp->f_mntonname);
+return false;
+}
+
+mount = g_new0(FsMount, 1);
+
+mount->dirname = g_strdup(mntp->f_mntonname);
+mount->devtype = g_strdup(mntp->f_fstypename);
+mount->devmajor = major(mount->dev);
+mount->devminor = minor(mount->dev);
+mount->fsid = mntp->f_fsid;
+mount->dev = statbuf.st_dev;
+
+QTAILQ_INSERT_TAIL(mounts, mount, next);
+}
+return true;
+}
+#endif /* CONFIG_FSFREEZE || CONFIG_FSTRIM */
+
+#if defined(CONFIG_FSFREEZE)
+static int ufssuspend_fd = -1;
+static int ufssuspend_cnt;
+
+int64_t qmp_guest_fsfreeze_do_freeze_list(bool has_mountpoints,
+  strList *mountpoints,
+  FsMountList mounts,
+  Error **errp)
+{
+int ret;
+strList *list;
+struct FsMount *mount;
+
+if (ufssuspend_fd != -1) {
+error_setg(errp, "filesystems have already frozen");
+return -1;
+}
+
+ufssuspend_cnt = 0;
+ufssuspend_fd = qemu_open(_PATH_UFSSUSPEND, O_RDWR, errp);
+if (ufssuspend_fd == -1) {
+return -1;
+}
+
+QTAILQ_FOREACH_REVERSE(mount, &mounts, next) {
+/*
+ * To issue fsfreeze in the reverse order of mounts, check if the
+ * mount is listed in the list here
+ */
+if (has_mountpoints) {
+for (list = mountpoints; list; list = list->next) {
+if (g_str_equal(list->value, mount->dirname)) {
+break;
+}
+}
+if (!list) {
+continue;
+}
+}
+
+/* Only UFS supports suspend */
+if (!g_str_equal(mount->devtype, "ufs")) {
+continue;
+}
+
+ret = ioctl(ufssuspend_fd, UFSSUSPEND, &mount->fsid);
+if (ret == -1) {
+/*
+ * ioctl returns EBUSY for all the FS except the first one
+ * that was suspended
+ */
+if (errno == EBUSY) {
+continue;
+}
+error_setg_errno(errp, errno, "failed to freeze %s",
+ mount->dirname);
+goto error;
+}
+ufssuspend_cnt++;
+}
+return ufssuspend_cnt;
+error:
+close(ufssuspend_fd);
+ufssuspend_fd = -1;
+return -1;
+
+}
+
+/*
+ * We don't need to call UFSRESUME ioctl because all the frozen FS
+ * are thawed on /dev/ufssuspend closing.
+ */
+int qmp_guest_fsfreeze_do_thaw(Error **errp)
+{
+int ret = ufssuspend_cnt;
+ufssuspend_cnt = 0;
+if (ufssuspend_fd != -1) {
+close(ufssuspend_fd);
+ufssuspend_fd = -1;
+}
+return ret;
+}
+
+GuestFilesystemInfoList *qmp_guest_get_fsinfo(Error **errp)
+{
+error_setg(errp, QERR_UNSUPPORTED);
+return NULL;
+}
+
+GuestDiskInfoList *qmp_guest_get_disks(Error **errp)
+{
+error_setg(errp, QERR_UNSUPPORTED);
+return NULL;
+}
+
+GuestDiskStatsInfoList *qmp_guest_get_diskstats(Error **errp)
+{
+error_setg

[PATCH v4 6/7] qga: Move HW address getting to a separate function

In the next patch FreeBSD support for guest-network-get-interfaces will be
added. Previously move Linux-specific code of HW address getting to a
separate functions and add a dumb function to commands-bsd.c.

Reviewed-by: Konstantin Kostiuk 
Reviewed-by: Marc-André Lureau 
Signed-off-by: Alexander Ivanov 
---
 qga/commands-bsd.c| 16 +++
 qga/commands-common.h |  6 +++
 qga/commands-posix.c  | 98 ---
 3 files changed, 78 insertions(+), 42 deletions(-)

diff --git a/qga/commands-bsd.c b/qga/commands-bsd.c
index ca06692179..ebf0fb8b0f 100644
--- a/qga/commands-bsd.c
+++ b/qga/commands-bsd.c
@@ -167,3 +167,19 @@ GuestCpuStatsList *qmp_guest_get_cpustats(Error **errp)
 return NULL;
 }
 #endif /* CONFIG_FSFREEZE */
+
+#ifdef HAVE_GETIFADDRS
+/*
+ * Fill "buf" with MAC address by ifaddrs. Pointer buf must point to a
+ * buffer with ETHER_ADDR_LEN length at least.
+ *
+ * Returns false in case of an error, otherwise true. "obtained" arguument
+ * is true if a MAC address was obtained successful, otherwise false.
+ */
+bool guest_get_hw_addr(struct ifaddrs *ifa, unsigned char *buf,
+   bool *obtained, Error **errp)
+{
+*obtained = false;
+return true;
+}
+#endif /* HAVE_GETIFADDRS */
diff --git a/qga/commands-common.h b/qga/commands-common.h
index 2d9878a634..05d1f7ccdd 100644
--- a/qga/commands-common.h
+++ b/qga/commands-common.h
@@ -56,6 +56,12 @@ int64_t qmp_guest_fsfreeze_do_freeze_list(bool 
has_mountpoints,
 int qmp_guest_fsfreeze_do_thaw(Error **errp);
 #endif /* CONFIG_FSFREEZE */
 
+#ifdef HAVE_GETIFADDRS
+#include 
+bool guest_get_hw_addr(struct ifaddrs *ifa, unsigned char *buf,
+   bool *obtained, Error **errp);
+#endif
+
 typedef struct GuestFileHandle GuestFileHandle;
 
 GuestFileHandle *guest_file_handle_find(int64_t id, Error **errp);
diff --git a/qga/commands-posix.c b/qga/commands-posix.c
index f5b9e5c530..787ffb1562 100644
--- a/qga/commands-posix.c
+++ b/qga/commands-posix.c
@@ -41,20 +41,12 @@
 #endif
 #endif
 
-#ifdef __FreeBSD__
-/*
- * The code under HAVE_GETIFADDRS condition can't be compiled in FreeBSD.
- * Fix it in one of the following patches.
- */
-#undef HAVE_GETIFADDRS
-#endif
-
 #ifdef HAVE_GETIFADDRS
 #include 
 #include 
 #include 
+#include 
 #include 
-#include 
 #ifdef CONFIG_SOLARIS
 #include 
 #endif
@@ -2889,6 +2881,57 @@ static int guest_get_network_stats(const char *name,
 return -1;
 }
 
+#ifndef __FreeBSD__
+/*
+ * Fill "buf" with MAC address by ifaddrs. Pointer buf must point to a
+ * buffer with ETHER_ADDR_LEN length at least.
+ *
+ * Returns false in case of an error, otherwise true. "obtained" argument
+ * is true if a MAC address was obtained successful, otherwise false.
+ */
+bool guest_get_hw_addr(struct ifaddrs *ifa, unsigned char *buf,
+   bool *obtained, Error **errp)
+{
+struct ifreq ifr;
+int sock;
+
+*obtained = false;
+
+/* we haven't obtained HW address yet */
+sock = socket(PF_INET, SOCK_STREAM, 0);
+if (sock == -1) {
+error_setg_errno(errp, errno, "failed to create socket");
+return false;
+}
+
+memset(&ifr, 0, sizeof(ifr));
+pstrcpy(ifr.ifr_name, IF_NAMESIZE, ifa->ifa_name);
+if (ioctl(sock, SIOCGIFHWADDR, &ifr) == -1) {
+/*
+ * We can't get the hw addr of this interface, but that's not a
+ * fatal error.
+ */
+if (errno == EADDRNOTAVAIL) {
+/* The interface doesn't have a hw addr (e.g. loopback). */
+g_debug("failed to get MAC address of %s: %s",
+ifa->ifa_name, strerror(errno));
+} else{
+g_warning("failed to get MAC address of %s: %s",
+  ifa->ifa_name, strerror(errno));
+}
+} else {
+#ifdef CONFIG_SOLARIS
+memcpy(buf, &ifr.ifr_addr.sa_data, ETHER_ADDR_LEN);
+#else
+memcpy(buf, &ifr.ifr_hwaddr.sa_data, ETHER_ADDR_LEN);
+#endif
+*obtained = true;
+}
+close(sock);
+return true;
+}
+#endif /* __FreeBSD__ */
+
 /*
  * Build information about guest interfaces
  */
@@ -2909,9 +2952,8 @@ GuestNetworkInterfaceList 
*qmp_guest_network_get_interfaces(Error **errp)
 GuestNetworkInterfaceStat *interface_stat = NULL;
 char addr4[INET_ADDRSTRLEN];
 char addr6[INET6_ADDRSTRLEN];
-int sock;
-struct ifreq ifr;
-unsigned char *mac_addr;
+unsigned char mac_addr[ETHER_ADDR_LEN];
+bool obtained;
 void *p;
 
 g_debug("Processing %s interface", ifa->ifa_name);
@@ -2926,45 +2968,17 @@ GuestNetworkInterfaceList 
*qmp_guest_network_get_interfaces(Error **errp)
 }
 
 if (!info->has_hardware_address) {
-/* we haven't obtained HW address yet */
-sock = socket(PF_INET, SOCK_STREAM, 0);
-if (sock == -1) {
-error_setg_errno(errp, errno, "failed to create socket");
+if (!guest_get_

[PATCH] qapi-gen: mark coroutine QMP command functions as coroutine_fn

Coroutine commands have to be declared as coroutine_fn, but the
marker does not show up in the qapi-comands-* headers; likewise, the
marshaling function calls the command and therefore must be coroutine_fn.
Static analysis would want coroutine_fn to match between prototype and
declaration, because in principle coroutines might be compiled to a
completely different calling convention.  So we would like to add the
marker to the header.

Unfortunately, doing so causes lots of files to fail to compile because
they do not include qemu/coroutine.h; which in principle is legitimate
because the files could be only dealing with non-coroutine commands.
There are three ways to deal with this:

- include qemu/coroutine.h in all the files that include the qapi-commands-*
  headers.  This would be a large change and in many case unnecessary,
  because only very few files deal with coroutine commands

- include qemu/coroutine.h from the headers themselves.  This is
  ugly for the same reason, and also because headers-including-headers
  make it harder to avoid world rebuilds

- only define the affected prototypes if coroutine_fn is defined,
  meaning that the .c file has already included qemu/coroutine.h.
  This is what the patch goes for.

Signed-off-by: Paolo Bonzini 
---
 scripts/qapi/commands.py | 38 ++
 1 file changed, 26 insertions(+), 12 deletions(-)

diff --git a/scripts/qapi/commands.py b/scripts/qapi/commands.py
index 38ca38a7b9..31833f172f 100644
--- a/scripts/qapi/commands.py
+++ b/scripts/qapi/commands.py
@@ -41,11 +41,13 @@
 def gen_command_decl(name: str,
  arg_type: Optional[QAPISchemaObjectType],
  boxed: bool,
- ret_type: Optional[QAPISchemaType]) -> str:
+ ret_type: Optional[QAPISchemaType],
+ coroutine: bool) -> str:
 return mcgen('''
-%(c_type)s qmp_%(c_name)s(%(params)s);
+%(c_type)s %(coroutine_fn)sqmp_%(c_name)s(%(params)s);
 ''',
  c_type=(ret_type and ret_type.c_type()) or 'void',
+ coroutine_fn='coroutine_fn ' if coroutine else '',
  c_name=c_name(name),
  params=build_params(arg_type, boxed, 'Error **errp'))
 
@@ -157,16 +159,21 @@ def gen_marshal_output(ret_type: QAPISchemaType) -> str:
  c_type=ret_type.c_type(), c_name=ret_type.c_name())
 
 
-def build_marshal_proto(name: str) -> str:
-return ('void qmp_marshal_%s(QDict *args, QObject **ret, Error **errp)'
-% c_name(name))
+def build_marshal_proto(name: str,
+coroutine: bool) -> str:
+return ('void %(coroutine_fn)sqmp_marshal_%(c_name)s(%(params)s)' % {
+'coroutine_fn': 'coroutine_fn ' if coroutine else '',
+'c_name': c_name(name),
+'params': 'QDict *args, QObject **ret, Error **errp',
+})
 
 
-def gen_marshal_decl(name: str) -> str:
+def gen_marshal_decl(name: str,
+ coroutine: bool) -> str:
 return mcgen('''
 %(proto)s;
 ''',
- proto=build_marshal_proto(name))
+ proto=build_marshal_proto(name, coroutine))
 
 
 def gen_trace(name: str) -> str:
@@ -181,7 +188,8 @@ def gen_marshal(name: str,
 arg_type: Optional[QAPISchemaObjectType],
 boxed: bool,
 ret_type: Optional[QAPISchemaType],
-gen_tracing: bool) -> str:
+gen_tracing: bool,
+coroutine: bool) -> str:
 have_args = boxed or (arg_type and not arg_type.is_empty())
 if have_args:
 assert arg_type is not None
@@ -195,7 +203,7 @@ def gen_marshal(name: str,
 bool ok = false;
 Visitor *v;
 ''',
-proto=build_marshal_proto(name))
+proto=build_marshal_proto(name, coroutine))
 
 if ret_type:
 ret += mcgen('''
@@ -314,6 +322,7 @@ def _begin_user_module(self, name: str) -> None:
 #include "qapi/qmp/qdict.h"
 #include "qapi/dealloc-visitor.h"
 #include "qapi/error.h"
+#include "qemu/coroutine.h"
 #include "%(visit)s.h"
 #include "%(commands)s.h"
 
@@ -388,10 +397,15 @@ def visit_command(self,
self._genh, self._genc):
 self._genc.add(gen_marshal_output(ret_type))
 with ifcontext(ifcond, self._genh, self._genc):
-self._genh.add(gen_command_decl(name, arg_type, boxed, ret_type))
-self._genh.add(gen_marshal_decl(name))
+if coroutine:
+self._genh.add('#ifdef coroutine_fn\n')
+self._genh.add(gen_command_decl(name, arg_type, boxed,
+ret_type, coroutine))
+self._genh.add(gen_marshal_decl(name, coroutine))
+if coroutine:
+self._genh.add('#endif\n')
 self._genc.add(gen_marshal(name, arg_type, boxed, ret_type,
-   self._gen_tracing))
+

[PATCH v4 2/7] qga: Move Linux-specific FS freeze/thaw code to a separate file

In the next patches we are going to add FreeBSD support for QEMU Guest
Agent. In the result, code in commands-posix.c will be too cumbersome.

Move Linux-specific FS freeze/thaw code to a separate file commands-linux.c
keeping common POSIX code in commands-posix.c.

Reviewed-by: Konstantin Kostiuk 
Reviewed-by: Marc-André Lureau 
Signed-off-by: Alexander Ivanov 
---
 qga/commands-common.h |  35 +
 qga/commands-linux.c  | 286 +
 qga/commands-posix.c  | 289 +++---
 qga/meson.build   |   3 +
 4 files changed, 340 insertions(+), 273 deletions(-)
 create mode 100644 qga/commands-linux.c

diff --git a/qga/commands-common.h b/qga/commands-common.h
index d0e4a9696f..181fc330aa 100644
--- a/qga/commands-common.h
+++ b/qga/commands-common.h
@@ -10,6 +10,40 @@
 #define QGA_COMMANDS_COMMON_H
 
 #include "qga-qapi-types.h"
+#include "guest-agent-core.h"
+#include "qemu/queue.h"
+
+#if defined(__linux__)
+#include 
+#ifdef FIFREEZE
+#define CONFIG_FSFREEZE
+#endif
+#ifdef FITRIM
+#define CONFIG_FSTRIM
+#endif
+#endif /* __linux__ */
+
+#if defined(CONFIG_FSFREEZE) || defined(CONFIG_FSTRIM)
+typedef struct FsMount {
+char *dirname;
+char *devtype;
+unsigned int devmajor, devminor;
+QTAILQ_ENTRY(FsMount) next;
+} FsMount;
+
+typedef QTAILQ_HEAD(FsMountList, FsMount) FsMountList;
+
+bool build_fs_mount_list(FsMountList *mounts, Error **errp);
+void free_fs_mount_list(FsMountList *mounts);
+#endif /* CONFIG_FSFREEZE || CONFIG_FSTRIM */
+
+#if defined(CONFIG_FSFREEZE)
+int64_t qmp_guest_fsfreeze_do_freeze_list(bool has_mountpoints,
+  strList *mountpoints,
+  FsMountList mounts,
+  Error **errp);
+int qmp_guest_fsfreeze_do_thaw(Error **errp);
+#endif /* CONFIG_FSFREEZE */
 
 typedef struct GuestFileHandle GuestFileHandle;
 
@@ -29,4 +63,5 @@ GuestFileRead *guest_file_read_unsafe(GuestFileHandle *gfh,
  */
 char *qga_get_host_name(Error **errp);
 
+void ga_wait_child(pid_t pid, int *status, Error **errp);
 #endif
diff --git a/qga/commands-linux.c b/qga/commands-linux.c
new file mode 100644
index 00..214e408fcd
--- /dev/null
+++ b/qga/commands-linux.c
@@ -0,0 +1,286 @@
+/*
+ * QEMU Guest Agent Linux-specific command implementations
+ *
+ * Copyright IBM Corp. 2011
+ *
+ * Authors:
+ *  Michael Roth  
+ *  Michal Privoznik  
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "commands-common.h"
+#include "cutils.h"
+#include 
+#include 
+
+#if defined(CONFIG_FSFREEZE) || defined(CONFIG_FSTRIM)
+static int dev_major_minor(const char *devpath,
+   unsigned int *devmajor, unsigned int *devminor)
+{
+struct stat st;
+
+*devmajor = 0;
+*devminor = 0;
+
+if (stat(devpath, &st) < 0) {
+slog("failed to stat device file '%s': %s", devpath, strerror(errno));
+return -1;
+}
+if (S_ISDIR(st.st_mode)) {
+/* It is bind mount */
+return -2;
+}
+if (S_ISBLK(st.st_mode)) {
+*devmajor = major(st.st_rdev);
+*devminor = minor(st.st_rdev);
+return 0;
+}
+return -1;
+}
+
+static bool build_fs_mount_list_from_mtab(FsMountList *mounts, Error **errp)
+{
+struct mntent *ment;
+FsMount *mount;
+char const *mtab = "/proc/self/mounts";
+FILE *fp;
+unsigned int devmajor, devminor;
+
+fp = setmntent(mtab, "r");
+if (!fp) {
+error_setg(errp, "failed to open mtab file: '%s'", mtab);
+return false;
+}
+
+while ((ment = getmntent(fp))) {
+/*
+ * An entry which device name doesn't start with a '/' is
+ * either a dummy file system or a network file system.
+ * Add special handling for smbfs and cifs as is done by
+ * coreutils as well.
+ */
+if ((ment->mnt_fsname[0] != '/') ||
+(strcmp(ment->mnt_type, "smbfs") == 0) ||
+(strcmp(ment->mnt_type, "cifs") == 0)) {
+continue;
+}
+if (dev_major_minor(ment->mnt_fsname, &devmajor, &devminor) == -2) {
+/* Skip bind mounts */
+continue;
+}
+
+mount = g_new0(FsMount, 1);
+mount->dirname = g_strdup(ment->mnt_dir);
+mount->devtype = g_strdup(ment->mnt_type);
+mount->devmajor = devmajor;
+mount->devminor = devminor;
+
+QTAILQ_INSERT_TAIL(mounts, mount, next);
+}
+
+endmntent(fp);
+return true;
+}
+
+static void decode_mntname(char *name, int len)
+{
+int i, j = 0;
+for (i = 0; i <= len; i++) {
+if (name[i] != '\\') {
+name[j++] = name[i];
+} else if (name[i + 1] == '\\') {
+name[j++] = '\\';
+i++;
+} else if (name[i + 1

[PATCH] blkdebug: ignore invalid rules in non-coroutine context

blkdebug events can be called from either non-coroutine or coroutine
contexts.  However, some actions (specifically suspend actions and
errors reported with immediately=off) only make sense from within
a coroutine.

Currently, using those action would lead to an abort() in
qemu_coroutine_yield() ("Co-routine is yielding to no one").
Catch them and print an error instead.

Signed-off-by: Paolo Bonzini 
---
 block/blkdebug.c | 17 ++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/block/blkdebug.c b/block/blkdebug.c
index bbf2948703..bf0aedb17d 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -31,6 +31,7 @@
 #include "block/qdict.h"
 #include "qemu/module.h"
 #include "qemu/option.h"
+#include "qemu/error-report.h"
 #include "qapi/qapi-visit-block-core.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qlist.h"
@@ -623,8 +624,13 @@ static int rule_check(BlockDriverState *bs, uint64_t 
offset, uint64_t bytes,
 
 qemu_mutex_unlock(&s->lock);
 if (!immediately) {
-aio_co_schedule(qemu_get_current_aio_context(), qemu_coroutine_self());
-qemu_coroutine_yield();
+if (qemu_in_coroutine()) {
+aio_co_schedule(qemu_get_current_aio_context(), 
qemu_coroutine_self());
+qemu_coroutine_yield();
+} else {
+error_report("Non-coroutine event %s needs immediately = off\n",
+ BlkdebugEvent_lookup.array[rule->event]);
+}
 }
 
 return -error;
@@ -858,7 +864,12 @@ static void blkdebug_debug_event(BlockDriverState *bs, 
BlkdebugEvent event)
 }
 
 while (actions_count[ACTION_SUSPEND] > 0) {
-qemu_coroutine_yield();
+if (qemu_in_coroutine()) {
+qemu_coroutine_yield();
+} else {
+error_report("Non-coroutine event %s cannot suspend\n",
+ BlkdebugEvent_lookup.array[event]);
+}
 actions_count[ACTION_SUSPEND]--;
 }
 }
-- 
2.37.3

[PATCH v4 1/7] qga: Add initial FreeBSD support

- Fix device path.
- Fix virtio-serial channel initialization.
- Make the code buildable in FreeBSD.

Reviewed-by: Konstantin Kostiuk 
Acked-by: Marc-André Lureau 
Signed-off-by: Alexander Ivanov 
---
 meson.build  |  2 +-
 qga/channel-posix.c  | 19 +++
 qga/commands-posix.c |  8 
 qga/main.c   |  6 +-
 4 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/meson.build b/meson.build
index b686dfef75..71fe72ea06 100644
--- a/meson.build
+++ b/meson.build
@@ -75,7 +75,7 @@ have_tools = get_option('tools') \
   .allowed()
 have_ga = get_option('guest_agent') \
   .disable_auto_if(not have_system and not have_tools) \
-  .require(targetos in ['sunos', 'linux', 'windows'],
+  .require(targetos in ['sunos', 'linux', 'windows', 'freebsd'],
error_message: 'unsupported OS for QEMU guest agent') \
   .allowed()
 have_block = have_system or have_tools
diff --git a/qga/channel-posix.c b/qga/channel-posix.c
index 6796a02cff..568350ded4 100644
--- a/qga/channel-posix.c
+++ b/qga/channel-posix.c
@@ -149,6 +149,25 @@ static gboolean ga_channel_open(GAChannel *c, const gchar 
*path,
 return false;
 }
 #endif
+#ifdef __FreeBSD__
+/*
+ * In the default state channel sends echo of every command to a
+ * client. The client programm doesn't expect this and raises an
+ * error. Suppress echo by resetting ECHO terminal flag.
+ */
+struct termios tio;
+if (tcgetattr(fd, &tio) < 0) {
+error_setg_errno(errp, errno, "error getting channel termios 
attrs");
+close(fd);
+return false;
+}
+tio.c_lflag &= ~ECHO;
+if (tcsetattr(fd, TCSAFLUSH, &tio) < 0) {
+error_setg_errno(errp, errno, "error setting channel termios 
attrs");
+close(fd);
+return false;
+}
+#endif /* __FreeBSD__ */
 ret = ga_channel_client_add(c, fd);
 if (ret) {
 error_setg(errp, "error adding channel to main loop");
diff --git a/qga/commands-posix.c b/qga/commands-posix.c
index eea819cff0..16d67e9f6d 100644
--- a/qga/commands-posix.c
+++ b/qga/commands-posix.c
@@ -51,6 +51,14 @@
 #endif
 #endif
 
+#ifdef __FreeBSD__
+/*
+ * The code under HAVE_GETIFADDRS condition can't be compiled in FreeBSD.
+ * Fix it in one of the following patches.
+ */
+#undef HAVE_GETIFADDRS
+#endif
+
 #ifdef HAVE_GETIFADDRS
 #include 
 #include 
diff --git a/qga/main.c b/qga/main.c
index 5a9d8252e0..0d27c97d38 100644
--- a/qga/main.c
+++ b/qga/main.c
@@ -45,9 +45,13 @@
 #endif
 
 #ifndef _WIN32
+#ifdef __FreeBSD__
+#define QGA_VIRTIO_PATH_DEFAULT "/dev/vtcon/org.qemu.guest_agent.0"
+#else /* __FreeBSD__ */
 #define QGA_VIRTIO_PATH_DEFAULT "/dev/virtio-ports/org.qemu.guest_agent.0"
-#define QGA_STATE_RELATIVE_DIR  "run"
+#endif /* __FreeBSD__ */
 #define QGA_SERIAL_PATH_DEFAULT "/dev/ttyS0"
+#define QGA_STATE_RELATIVE_DIR  "run"
 #else
 #define QGA_VIRTIO_PATH_DEFAULT ".\\Global\\org.qemu.guest_agent.0"
 #define QGA_STATE_RELATIVE_DIR  "qemu-ga"
-- 
2.34.1

[PATCH v4 4/7] qga: Add shutdown/halt/reboot support for FreeBSD

Add appropriate shutdown command arguments to qmp_guest_shutdown()
for FreeBSD.

Reviewed-by: Konstantin Kostiuk 
Reviewed-by: Marc-André Lureau 
Signed-off-by: Alexander Ivanov 
---
 qga/commands-posix.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/qga/commands-posix.c b/qga/commands-posix.c
index 49f9996a9c..88e0d0fe24 100644
--- a/qga/commands-posix.c
+++ b/qga/commands-posix.c
@@ -90,6 +90,10 @@ void qmp_guest_shutdown(bool has_mode, const char *mode, 
Error **errp)
 const char *powerdown_flag = "-i5";
 const char *halt_flag = "-i0";
 const char *reboot_flag = "-i6";
+#elif defined(CONFIG_BSD)
+const char *powerdown_flag = "-p";
+const char *halt_flag = "-h";
+const char *reboot_flag = "-r";
 #else
 const char *powerdown_flag = "-P";
 const char *halt_flag = "-H";
@@ -120,6 +124,9 @@ void qmp_guest_shutdown(bool has_mode, const char *mode, 
Error **errp)
 #ifdef CONFIG_SOLARIS
 execl("/sbin/shutdown", "shutdown", shutdown_flag, "-g0", "-y",
   "hypervisor initiated shutdown", (char *)NULL);
+#elif defined(CONFIG_BSD)
+execl("/sbin/shutdown", "shutdown", shutdown_flag, "+0",
+   "hypervisor initiated shutdown", (char *)NULL);
 #else
 execl("/sbin/shutdown", "shutdown", "-h", shutdown_flag, "+0",
"hypervisor initiated shutdown", (char *)NULL);
-- 
2.34.1

[PATCH v4 5/7] qga: Add support for user password setting in FreeBSD

Move qmp_guest_set_user_password() from __linux__ condition to
(__linux__ || __FreeBSD__) condition. Add command and arguments
for password setting in FreeBSD.

Reviewed-by: Konstantin Kostiuk 
Reviewed-by: Marc-André Lureau 
Signed-off-by: Alexander Ivanov 
---
 qga/commands-posix.c | 35 +--
 1 file changed, 25 insertions(+), 10 deletions(-)

diff --git a/qga/commands-posix.c b/qga/commands-posix.c
index 88e0d0fe24..f5b9e5c530 100644
--- a/qga/commands-posix.c
+++ b/qga/commands-posix.c
@@ -2122,7 +2122,9 @@ int64_t qmp_guest_set_vcpus(GuestLogicalProcessorList 
*vcpus, Error **errp)
 
 return processed;
 }
+#endif /* __linux__ */
 
+#if defined(__linux__) || defined(__FreeBSD__)
 void qmp_guest_set_user_password(const char *username,
  const char *password,
  bool crypted,
@@ -2156,10 +2158,15 @@ void qmp_guest_set_user_password(const char *username,
 goto out;
 }
 
+#ifdef __FreeBSD__
+chpasswddata = g_strdup(rawpasswddata);
+passwd_path = g_find_program_in_path("pw");
+#else
 chpasswddata = g_strdup_printf("%s:%s\n", username, rawpasswddata);
-chpasswdlen = strlen(chpasswddata);
-
 passwd_path = g_find_program_in_path("chpasswd");
+#endif
+
+chpasswdlen = strlen(chpasswddata);
 
 if (!passwd_path) {
 error_setg(errp, "cannot find 'passwd' program in PATH");
@@ -2180,11 +2187,17 @@ void qmp_guest_set_user_password(const char *username,
 reopen_fd_to_null(1);
 reopen_fd_to_null(2);
 
+#ifdef __FreeBSD__
+const char *h_arg;
+h_arg = (crypted) ? "-H" : "-h";
+execl(passwd_path, "pw", "usermod", "-n", username, h_arg, "0", NULL);
+#else
 if (crypted) {
 execl(passwd_path, "chpasswd", "-e", NULL);
 } else {
 execl(passwd_path, "chpasswd", NULL);
 }
+#endif
 _exit(EXIT_FAILURE);
 } else if (pid < 0) {
 error_setg_errno(errp, errno, "failed to create child process");
@@ -2227,7 +2240,17 @@ out:
 close(datafd[1]);
 }
 }
+#else /* __linux__ || __FreeBSD__ */
+void qmp_guest_set_user_password(const char *username,
+ const char *password,
+ bool crypted,
+ Error **errp)
+{
+error_setg(errp, QERR_UNSUPPORTED);
+}
+#endif /* __linux__ || __FreeBSD__ */
 
+#ifdef __linux__
 static void ga_read_sysfs_file(int dirfd, const char *pathname, char *buf,
int size, Error **errp)
 {
@@ -2764,14 +2787,6 @@ int64_t qmp_guest_set_vcpus(GuestLogicalProcessorList 
*vcpus, Error **errp)
 return -1;
 }
 
-void qmp_guest_set_user_password(const char *username,
- const char *password,
- bool crypted,
- Error **errp)
-{
-error_setg(errp, QERR_UNSUPPORTED);
-}
-
 GuestMemoryBlockList *qmp_guest_get_memory_blocks(Error **errp)
 {
 error_setg(errp, QERR_UNSUPPORTED);
-- 
2.34.1

[PATCH v2] qapi-gen: mark coroutine QMP command functions as coroutine_fn

Coroutine commands have to be declared as coroutine_fn, but the
marker does not show up in the qapi-comands-* headers; likewise, the
marshaling function calls the command and therefore must be coroutine_fn.
Static analysis would want coroutine_fn to match between prototype and
declaration, because in principle coroutines might be compiled to a
completely different calling convention.  So we would like to add the
marker to the header.

Unfortunately, doing so causes lots of files to fail to compile because
they do not include qemu/coroutine.h; which in principle is legitimate
because the files could be only dealing with non-coroutine commands.
There are three ways to deal with this:

- include qemu/coroutine.h in all the files that include the qapi-commands-*
  headers.  This would be a large change and in many case unnecessary,
  because only very few files deal with coroutine commands

- include qemu/coroutine.h from the headers themselves.  This is
  ugly for the same reason, and also because headers-including-headers
  make it harder to avoid world rebuilds

- only define the affected prototypes if coroutine_fn is defined,
  meaning that the .c file has already included qemu/coroutine.h.
  This is what the patch goes for.

Signed-off-by: Paolo Bonzini 
---
 scripts/qapi/commands.py | 39 +++
 ui/console.c |  1 +
 2 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/scripts/qapi/commands.py b/scripts/qapi/commands.py
index 38ca38a7b9..956a0d968f 100644
--- a/scripts/qapi/commands.py
+++ b/scripts/qapi/commands.py
@@ -41,11 +41,13 @@
 def gen_command_decl(name: str,
  arg_type: Optional[QAPISchemaObjectType],
  boxed: bool,
- ret_type: Optional[QAPISchemaType]) -> str:
+ ret_type: Optional[QAPISchemaType],
+ coroutine: bool) -> str:
 return mcgen('''
-%(c_type)s qmp_%(c_name)s(%(params)s);
+%(c_type)s %(coroutine_fn)sqmp_%(c_name)s(%(params)s);
 ''',
  c_type=(ret_type and ret_type.c_type()) or 'void',
+ coroutine_fn='coroutine_fn ' if coroutine else '',
  c_name=c_name(name),
  params=build_params(arg_type, boxed, 'Error **errp'))
 
@@ -157,16 +159,21 @@ def gen_marshal_output(ret_type: QAPISchemaType) -> str:
  c_type=ret_type.c_type(), c_name=ret_type.c_name())
 
 
-def build_marshal_proto(name: str) -> str:
-return ('void qmp_marshal_%s(QDict *args, QObject **ret, Error **errp)'
-% c_name(name))
+def build_marshal_proto(name: str,
+coroutine: bool) -> str:
+return ('void %(coroutine_fn)sqmp_marshal_%(c_name)s(%(params)s)' % {
+'coroutine_fn': 'coroutine_fn ' if coroutine else '',
+'c_name': c_name(name),
+'params': 'QDict *args, QObject **ret, Error **errp',
+})
 
 
-def gen_marshal_decl(name: str) -> str:
+def gen_marshal_decl(name: str,
+ coroutine: bool) -> str:
 return mcgen('''
 %(proto)s;
 ''',
- proto=build_marshal_proto(name))
+ proto=build_marshal_proto(name, coroutine))
 
 
 def gen_trace(name: str) -> str:
@@ -181,7 +188,8 @@ def gen_marshal(name: str,
 arg_type: Optional[QAPISchemaObjectType],
 boxed: bool,
 ret_type: Optional[QAPISchemaType],
-gen_tracing: bool) -> str:
+gen_tracing: bool,
+coroutine: bool) -> str:
 have_args = boxed or (arg_type and not arg_type.is_empty())
 if have_args:
 assert arg_type is not None
@@ -195,7 +203,7 @@ def gen_marshal(name: str,
 bool ok = false;
 Visitor *v;
 ''',
-proto=build_marshal_proto(name))
+proto=build_marshal_proto(name, coroutine))
 
 if ret_type:
 ret += mcgen('''
@@ -314,6 +322,7 @@ def _begin_user_module(self, name: str) -> None:
 #include "qapi/qmp/qdict.h"
 #include "qapi/dealloc-visitor.h"
 #include "qapi/error.h"
+#include "qemu/coroutine.h"
 #include "%(visit)s.h"
 #include "%(commands)s.h"
 
@@ -345,6 +354,7 @@ def visit_begin(self, schema: QAPISchema) -> None:
  c_prefix=c_name(self._prefix, protect=False)))
 self._genc.add(mcgen('''
 #include "qemu/osdep.h"
+#include "qemu/coroutine.h"
 #include "%(prefix)sqapi-commands.h"
 #include "%(prefix)sqapi-init-commands.h"
 
@@ -388,10 +398,15 @@ def visit_command(self,
self._genh, self._genc):
 self._genc.add(gen_marshal_output(ret_type))
 with ifcontext(ifcond, self._genh, self._genc):
-self._genh.add(gen_command_decl(name, arg_type, boxed, ret_type))
-self._genh.add(gen_marshal_decl(name))
+if coroutine:
+self._genh.add('#ifdef coroutine_fn\n')
+self._genh.add(gen_command_decl(name, arg_type, boxed,
+

Re: [PATCH] RISC-V: Add support for Ztso

2022-10-13 Thread Dr. David Alan Gilbert

* Andrea Parri (and...@rivosinc.com) wrote:
> > > > Is x86's brand of memory ordering strong enough for Ztso?
> > > > I thought x86 had an optimisation where it was allowed to store forward
> > > > within the current CPU causing stores not to be quite strictly ordered.
> 
> [...]
> 
> > then a bit further down, '8.2.3.5 Intra-Processor Forwarding Is Allowed'
> > has an example and says
> > 
> > 'The memory-ordering model allows concurrent stores by two processors 
> > to be seen in
> > different orders by those two processors; specifically, each processor 
> > may perceive
> > its own store occurring before that of the other.'
> > 
> > Having said that, I remember it's realyl difficult to trigger; it's ~10
> > years since I saw an example to trigger it, and can't remember it.
> 
> AFAICT, Ztso allows the forwarding in question too.  Simulations with
> the axiomatic formalization confirm such expectation:

OK that seems to be what it says in:
https://five-embeddev.com/riscv-isa-manual/latest/ztso.html
  'In both of these memory models, it is the that allows a hart to
forward a value from its store buffer to a subsequent (in program order)
load—that is to say that stores can be forwarded locally before they are
visible to other harts'

> RISCV intra-processor-forwarding
> {
> 0:x5=1; 0:x6=x; 0:x8=y;
> 1:x5=1; 1:x6=y; 1:x8=x;
> }
>  P0  | P1  ;
>  sw x5,0(x6) | sw x5,0(x6) ;
>  lw x9,0(x6) | lw x9,0(x6) ;
>  lw x7,0(x8) | lw x7,0(x8) ;
> exists
> (0:x7=0 /\ 1:x7=0 /\ 0:x9=1 /\ 1:x9=1)

(I'm a bit fuzzy reading this...)
So is that the interesting case - where x7 is saying neither processor
saw the other processors write yet, but they did see their own?


So from a qemu patch perspective, I think the important thing is that
the flag that's defined, is defined and commented in such a way that
it's obvious that local forwarding is allowed; we wouldn't want someone
emulating a stricter CPU (that doesn't allow local forwarding) to go and
use this flag as an indication that the host cpu is that strict.

Dave

> Test intra-processor-forwarding Allowed
> States 4
> 0:x7=0; 0:x9=1; 1:x7=0; 1:x9=1;
> 0:x7=0; 0:x9=1; 1:x7=1; 1:x9=1;
> 0:x7=1; 0:x9=1; 1:x7=0; 1:x9=1;
> 0:x7=1; 0:x9=1; 1:x7=1; 1:x9=1;
> Ok
> Witnesses
> Positive: 1 Negative: 3
> Condition exists (0:x7=0 /\ 1:x7=0 /\ 0:x9=1 /\ 1:x9=1)
> Observation intra-processor-forwarding Sometimes 1 3
> Time intra-processor-forwarding 0.00
> Hash=518e4b9b2f0770c94918ac5d7e311ba5
> 
>   Andrea
> 
-- 
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

[PATCH v4 0/7] qga: Add FreeBSD support

Add freeze/thaw, shutdown/halt/reboot, password setting and
guest-network-get-interfaces command support for FreeBSD.

v4:
6,7: Return bool instead int in guest_get_hw_addr().

v3:
1: Add a comment about echo suppressing.
5: Replace code moving by splitting the code into a few blocks under
   architecture conditions.
5,6: Move actions with dumb qmp_guest_set_user_password() to
 the appropriate patch.
6: Fix error/obtained return.

v2:
1: Reject the idea to move all the Linux-specific code to a separate file.
   First commit now adds initial support of FreeBSD. Fixed device paths
   and fixed virtio device initialization (disable echo). Add comment why
   we should disable the code under HAVE_GETIFADDRS in FreeBSD.
2: Replace the second commit (which now is the first) by moving
   Linux-specific freeze/thaw code to a separate file commands-linux.c.
3: Add error raising if stat() returns error. Replaced strcmp() calls by
   g_str_equal(). Add a comment explaining why UFSRESUME isn't necessary.
4: Replace #elifdef by #elif defined().
5: Now the code doesn't move from one file to aanother but still is
   moving inside file so the patch doesn't become easier to review. =(
   Fixed typos.
6,7: New patches. Add guest-network-get-interfaces command support.

Alexander Ivanov (7):
  qga: Add initial FreeBSD support
  qga: Move Linux-specific FS freeze/thaw code to a separate file
  qga: Add UFS freeze/thaw support for FreeBSD
  qga: Add shutdown/halt/reboot support for FreeBSD
  qga: Add support for user password setting in FreeBSD
  qga: Move HW address getting to a separate function
  qga: Add HW address getting for FreeBSD

 meson.build   |   2 +-
 qga/channel-posix.c   |  19 ++
 qga/commands-bsd.c| 200 +
 qga/commands-common.h |  52 
 qga/commands-linux.c  | 286 +++
 qga/commands-posix.c  | 641 ++
 qga/main.c|  13 +-
 qga/meson.build   |   6 +
 8 files changed, 780 insertions(+), 439 deletions(-)
 create mode 100644 qga/commands-bsd.c
 create mode 100644 qga/commands-linux.c

-- 
2.34.1

Re: [PATCH 2/5] hw/mem/cxl_type3: Pull validation checks ahead of functional code

On Thu, 13 Oct 2022 10:07:40 +0100
Jonathan Cameron  wrote:

> On Wed, 12 Oct 2022 14:21:17 -0400
> Gregory Price  wrote:
> 
> > For style - pulling these validations ahead flattens the code.  
> 
> True, but at the cost of separating the check from where it is
> obvious why we have the check.  I'd prefer to see it next to the
> use. 
That separation made a bit more sense after factoring out the code
as then we want to pass the mr in rather than the HostMemBackend.

So in the end I did what you suggested :)

Jonathan

> 
> Inverting the hostmem check is resonable so I'll make that change.
> 
> My original thinking is that doing so would make adding non volatile
> support messier but given you plan to factor out most of this the
> change won't be too bad anyway.
> 
> 
> > 
> > Signed-off-by: Gregory Price 
> > ---
> >  hw/mem/cxl_type3.c | 193 ++---
> >  1 file changed, 96 insertions(+), 97 deletions(-)
> > 
> > diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> > index 94bc439d89..43b2b9e041 100644
> > --- a/hw/mem/cxl_type3.c
> > +++ b/hw/mem/cxl_type3.c
> > @@ -32,107 +32,106 @@ static int ct3_build_cdat_table(CDATSubHeader 
> > ***cdat_table,
> >  int dslbis_nonvolatile_num = 4;
> >  MemoryRegion *mr;
> >  
> > +if (!ct3d->hostmem) {
> > +return len;
> > +}
> > +
> > +mr = host_memory_backend_get_memory(ct3d->hostmem);
> > +if (!mr) {
> > +return -EINVAL;
> > +}
> > +
> >  /* Non volatile aspects */
> > -if (ct3d->hostmem) {
> > -dsmas_nonvolatile = g_malloc(sizeof(*dsmas_nonvolatile));
> > -if (!dsmas_nonvolatile) {
> > -return -ENOMEM;
> > -}
> > -nonvolatile_dsmad = next_dsmad_handle++;
> > -mr = host_memory_backend_get_memory(ct3d->hostmem);
> > -if (!mr) {
> > -return -EINVAL;
> > -}
> > -*dsmas_nonvolatile = (CDATDsmas) {
> > -.header = {
> > -.type = CDAT_TYPE_DSMAS,
> > -.length = sizeof(*dsmas_nonvolatile),
> > -},
> > -.DSMADhandle = nonvolatile_dsmad,
> > -.flags = CDAT_DSMAS_FLAG_NV,
> > -.DPA_base = 0,
> > -.DPA_length = int128_get64(mr->size),
> > -};
> > -len++;
> > -
> > -/* For now, no memory side cache, plausiblish numbers */
> > -dslbis_nonvolatile =
> > -g_malloc(sizeof(*dslbis_nonvolatile) * dslbis_nonvolatile_num);
> > -if (!dslbis_nonvolatile) {
> > -return -ENOMEM;
> > -}
> > +dsmas_nonvolatile = g_malloc(sizeof(*dsmas_nonvolatile));
> > +if (!dsmas_nonvolatile) {
> > +return -ENOMEM;
> > +}
> > +nonvolatile_dsmad = next_dsmad_handle++;
> > +*dsmas_nonvolatile = (CDATDsmas) {
> > +.header = {
> > +.type = CDAT_TYPE_DSMAS,
> > +.length = sizeof(*dsmas_nonvolatile),
> > +},
> > +.DSMADhandle = nonvolatile_dsmad,
> > +.flags = CDAT_DSMAS_FLAG_NV,
> > +.DPA_base = 0,
> > +.DPA_length = int128_get64(mr->size),
> > +};
> > +len++;
> >  
> > -dslbis_nonvolatile[0] = (CDATDslbis) {
> > -.header = {
> > -.type = CDAT_TYPE_DSLBIS,
> > -.length = sizeof(*dslbis_nonvolatile),
> > -},
> > -.handle = nonvolatile_dsmad,
> > -.flags = HMAT_LB_MEM_MEMORY,
> > -.data_type = HMAT_LB_DATA_READ_LATENCY,
> > -.entry_base_unit = 1, /* 10ns base */
> > -.entry[0] = 15, /* 150ns */
> > -};
> > -len++;
> > -
> > -dslbis_nonvolatile[1] = (CDATDslbis) {
> > -.header = {
> > -.type = CDAT_TYPE_DSLBIS,
> > -.length = sizeof(*dslbis_nonvolatile),
> > -},
> > -.handle = nonvolatile_dsmad,
> > -.flags = HMAT_LB_MEM_MEMORY,
> > -.data_type = HMAT_LB_DATA_WRITE_LATENCY,
> > -.entry_base_unit = 1,
> > -.entry[0] = 25, /* 250ns */
> > -};
> > -len++;
> > -
> > -dslbis_nonvolatile[2] = (CDATDslbis) {
> > -.header = {
> > -.type = CDAT_TYPE_DSLBIS,
> > -.length = sizeof(*dslbis_nonvolatile),
> > -},
> > -.handle = nonvolatile_dsmad,
> > -.flags = HMAT_LB_MEM_MEMORY,
> > -.data_type = HMAT_LB_DATA_READ_BANDWIDTH,
> > -.entry_base_unit = 1000, /* GB/s */
> > -.entry[0] = 16,
> > -};
> > -len++;
> > -
> > -dslbis_nonvolatile[3] = (CDATDslbis) {
> > -.header = {
> > -.type = CDAT_TYPE_DSLBIS,
> > -.length = sizeof(*dslbis_nonvolatile),
> > -},
> > -.handle = nonvolatile_dsmad,
> > -.flags = HMAT_LB_MEM_MEMORY,
> > -.data_type = HMAT_LB_DATA_WRIT

Re: [PATCH] net: print a more actionable error when slirp is not found

2022-10-13 Thread Jakob Bohm


On 02/10/2022 15:49, Marc-André Lureau wrote:

Hi

On Fri, Sep 30, 2022 at 11:49 PM Christian Schoenebeck
 wrote:

On Donnerstag, 29. September 2022 18:32:37 CEST Marc-André Lureau wrote:

From: Marc-AndrÃ© Lureau 

If slirp is not found during compile-time, and not manually disabled,
print a friendly error message, as suggested in the "If your networking
is failing after updating to the latest git version of QEMU..." thread
by various people.

Signed-off-by: Marc-AndrÃ© Lureau 
---
  meson.build |  4 
  net/net.c   | 19 +--
  2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/meson.build b/meson.build
index 8dc661363f..4f69d7d0b4 100644
--- a/meson.build
+++ b/meson.build
@@ -657,6 +657,10 @@ if not get_option('slirp').auto() or have_system
endif
  endif

+if get_option('slirp').disabled()
+  config_host_data.set('CONFIG_SLIRP_DISABLED', true)
+endif
+
  vde = not_found
  if not get_option('vde').auto() or have_system or have_tools
vde = cc.find_library('vdeplug', has_headers: ['libvdeplug.h'],
diff --git a/net/net.c b/net/net.c
index 2db160e063..e6072a5ddd 100644
--- a/net/net.c
+++ b/net/net.c
@@ -990,14 +990,29 @@ static int net_init_nic(const Netdev *netdev, const
char *name, return idx;
  }

+#if (defined(CONFIG_SLIRP) || !defined(CONFIG_SLIRP_DISABLED))
+static int net_init_user(const Netdev *netdev, const char *name,
+ NetClientState *peer, Error **errp)
+{
+#ifdef CONFIG_SLIRP
+return net_init_slirp(netdev, name, peer, errp);
+#else
+error_setg(errp,
+   "Type 'user' is not a supported netdev backend by this QEMU
build " +   "because the libslirp development files were not
found during build " +   "of QEMU.");
+#endif
+return -1;
+}
+#endif

I just tried this, but somehow it is not working for me. net_init_user() is
never called and therefore I don't get the error message. That should be
working if the user launched QEMU without any networking arg, right?


That's because vl.c has:
if (default_net) {
...
#ifdef CONFIG_SLIRP
 qemu_opts_parse(net, "user", true, &error_abort);
#endif

Iow, it doesn't try to use slirp by default if it's not found at
compile time. We can eventually change that, but that might break
existing users who don't build with slirp.

Alternatively, it could error out only if slirp was not explicitly
disabled at configure time.


And still, I would find it better if there was also a clear build-time error
if there was no libslirp and slirp feature was not explicitly disabled.

That's not the typical way we deal with dependencies, but I can try to
do that as well.
Maybe change that ifdef section to report the error early instead of 
introducing the new

helper function, something like

 #ifdef CONFIG_SLIRP
 qemu_opts_parse(net, "user", true, &error_abort);
+// Explicit error messages, because it is not obvious to users that
+// "user" networking is based on code from libslirp.
+#elif !defined(CONFIG_SLIRP_DISABLED))
+some_error_function(
+"Type 'user' is not a supported netdev backend by this QEMU build "
+"because the libslirp development files were not found during build 
"
+"of QEMU.");
+#else
+some_error_function(
+"Type 'user' is not a supported netdev backend by this QEMU build "
+"because QEMU was explicitly built without libslirp");
 #endif

Also output these messages when the user backend is explicitly requested 
and not CONFIG_SLIRP.



  static int (* const net_client_init_fun[NET_CLIENT_DRIVER__MAX])(
  const Netdev *netdev,
  const char *name,
  NetClientState *peer, Error **errp) = {
  [NET_CLIENT_DRIVER_NIC]   = net_init_nic,
-#ifdef CONFIG_SLIRP
-[NET_CLIENT_DRIVER_USER]  = net_init_slirp,
+#if (defined(CONFIG_SLIRP) || !defined(CONFIG_SLIRP_DISABLED))
+[NET_CLIENT_DRIVER_USER]  = net_init_user,
  #endif
  [NET_CLIENT_DRIVER_TAP]   = net_init_tap,
  [NET_CLIENT_DRIVER_SOCKET]= net_init_socket,




Enjoy

Jakob
--
Jakob Bohm, CIO, Partner, WiseMo A/S.  http://www.wisemo.com
Transformervej 29, 2860 Soborg, Denmark.  Direct +45 31 13 16 10
This public discussion message is non-binding and may contain errors.
WiseMo - Remote Service Management for PCs, Phones and Embedded

Re: [PATCH] configure: Avoid using strings binary

2022-10-13 Thread Peter Maydell

On Thu, 13 Oct 2022 at 09:47, Michal Privoznik  wrote:
>
> When determining the endiandness of the target architecture we're
> building for a small program is compiled, which in an obfuscated
> way declares two strings. Then, we look which string is in
> correct order (using strings binary) and deduct the endiandness.
> But using the strings binary is problematic, because it's part of
> toolchain (strings is just a symlink to
> x86_64-pc-linux-gnu-strings or llvm-strings). And when
> (cross-)compiling, it requires users to set the symlink to the
> correct toolchain.
>
> Fortunately, we have a better alternative anyways. Since we
> require either clang or gcc we can rely on macros they declare.
>
> Bug: https://bugs.gentoo.org/876933
> Signed-off-by: Michal Privoznik 

If we can determine this just by looking at C macros, does
this really need to be a configure test at all ? Paolo?



> ---
>  configure | 33 ++---
>  1 file changed, 18 insertions(+), 15 deletions(-)
>
> diff --git a/configure b/configure
> index 45ee6f4eb3..91e04635cb 100755
> --- a/configure
> +++ b/configure
> @@ -1426,27 +1426,30 @@ fi
>  # ---
>  # big/little endian test
>  cat > $TMPC << EOF
> -#include 
> -short big_endian[] = { 0x4269, 0x4765, 0x4e64, 0x4961, 0x4e00, 0, };
> -short little_endian[] = { 0x694c, 0x7454, 0x654c, 0x6e45, 0x6944, 0x6e41, 0, 
> };
> -int main(int argc, char *argv[])
> -{
> -return printf("%s %s\n", (char *)big_endian, (char *)little_endian);
> -}
> +#if defined(__BYTE_ORDER) && __BYTE_ORDER == __BIG_ENDIAN || \
> +defined(__BIG_ENDIAN__)
> +# error BIG
> +#endif
> +int main(void) { return 0; }
>  EOF
>
>  if compile_prog ; then
> -if strings -a $TMPE | grep -q BiGeNdIaN ; then
> -bigendian="yes"
> -elif strings -a $TMPE | grep -q LiTtLeEnDiAn ; then
> -bigendian="no"
> -else
> -echo big/little test failed
> -exit 1
> -fi
> +  bigendian="yes"
>  else
> +  cat > $TMPC << EOF
> +#if defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN || \
> +defined(__LITTLE_ENDIAN__)
> +# error LITTLE
> +#endif
> +int main(void) { return 0; }
> +EOF
> +
> +  if compile_prog ; then
> +bigendian="no"
> +  else
>  echo big/little test failed
>  exit 1
> +  fi
>  fi

thanks
-- PMM

Re: [PATCH v7 4/5] hw/mem/cxl-type3: Add CXL CDAT Data Object Exchange

On Wed, 12 Oct 2022 12:01:54 -0400
Gregory Price  wrote:

> This code contains heap corruption on free, and I think should be
> refactored to pre-allocate all the entries we're interested in putting
> into the table.  This would flatten the code and simplify the error
> handling steps.
> 
> Also, should we consider making a union with all the possible entries to
> make entry allocation easier?  It may eat a few extra bytes of memory,
> but it would simplify the allocation/cleanup code here further.
> 
> Given that every allocation has to be checked, i'm also not convinced
> the use of g_autofree is worth the potential footguns associated with
> it.
> 
> > diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> > index 568c9d62f5..3fa5d70662 100644
> > --- a/hw/mem/cxl_type3.c
> > +++ b/hw/mem/cxl_type3.c
> > @@ -12,9 +12,218 @@
> > +static int ct3_build_cdat_table(CDATSubHeader ***cdat_table,
> > +void *priv)
> > +{  
> (snip)
> > +/* For now, no memory side cache, plausiblish numbers */
> > +dslbis_nonvolatile = g_malloc(sizeof(*dslbis_nonvolatile) * 
> > dslbis_nonvolatile_num);
> > +if (!dslbis_nonvolatile)
> > +return -ENOMEM;  
> 
> this allocation creates a table of entries, which is later freed
> incorrectly
> 
> > +
> > +*cdat_table = g_malloc0(len * sizeof(*cdat_table));  
> 
> this allocation needs to be checked
I just realized that sizeof should be sizeof(**cdat_table)

I've moved to a local autofree pointer after factoring out the
guts of the code so this gets simpler anyway (and was more obviously wrong!)

Jonathan

Re: [PATCH v7 4/5] hw/mem/cxl-type3: Add CXL CDAT Data Object Exchange

On Wed, 12 Oct 2022 12:01:54 -0400
Gregory Price  wrote:

> This code contains heap corruption on free, and I think should be
> refactored to pre-allocate all the entries we're interested in putting
> into the table.

Good point on the heap corruption.. (oops. Particularly as I raised
that I didn't like the complexity of your free in your previous version
and still failed to notice the current code was wrong...)

>  This would flatten the code and simplify the error
> handling steps.

I'm not so keen on this.  Error handling is pretty trivial because of
the autofree magic.  It will get a tiny bit harder once we have
two calls to the factored out function, but not too bad - we just
need to free the handed off pointers in reverse from wherever we
got to before the error.

> 
> Also, should we consider making a union with all the possible entries to
> make entry allocation easier?  It may eat a few extra bytes of memory,
> but it would simplify the allocation/cleanup code here further.

An interesting point, though gets trickier once we have variable numbers
of elements.  I'm not sure it's worth the effort to save a few lines
of code.

> 
> Given that every allocation has to be checked, i'm also not convinced
> the use of g_autofree is worth the potential footguns associated with
> it.

After rolling a version with some of your suggested changes incorporated
the autofree logic is all nice and localized so I think it's well worth
having. Only slightly messy bit is we end up with 4 separate pointers
for the bandwidth and latency elements.

Re: [PATCH] configure: Avoid using strings binary

2022-10-13 Thread Michal Prívozník

On 10/13/22 12:39, Peter Maydell wrote:
> On Thu, 13 Oct 2022 at 09:47, Michal Privoznik  wrote:
>>
>> When determining the endiandness of the target architecture we're
>> building for a small program is compiled, which in an obfuscated
>> way declares two strings. Then, we look which string is in
>> correct order (using strings binary) and deduct the endiandness.
>> But using the strings binary is problematic, because it's part of
>> toolchain (strings is just a symlink to
>> x86_64-pc-linux-gnu-strings or llvm-strings). And when
>> (cross-)compiling, it requires users to set the symlink to the
>> correct toolchain.
>>
>> Fortunately, we have a better alternative anyways. Since we
>> require either clang or gcc we can rely on macros they declare.
>>
>> Bug: https://bugs.gentoo.org/876933
>> Signed-off-by: Michal Privoznik 
> 
> If we can determine this just by looking at C macros, does
> this really need to be a configure test at all ? Paolo?

Yes, because we're using this information to generate a file for meson
that's later used during cross compilation.

Michal

Re: [PATCH] configure: Avoid using strings binary

2022-10-13 Thread Daniel P . Berrangé

On Thu, Oct 13, 2022 at 11:39:34AM +0100, Peter Maydell wrote:
> On Thu, 13 Oct 2022 at 09:47, Michal Privoznik  wrote:
> >
> > When determining the endiandness of the target architecture we're
> > building for a small program is compiled, which in an obfuscated
> > way declares two strings. Then, we look which string is in
> > correct order (using strings binary) and deduct the endiandness.
> > But using the strings binary is problematic, because it's part of
> > toolchain (strings is just a symlink to
> > x86_64-pc-linux-gnu-strings or llvm-strings). And when
> > (cross-)compiling, it requires users to set the symlink to the
> > correct toolchain.
> >
> > Fortunately, we have a better alternative anyways. Since we
> > require either clang or gcc we can rely on macros they declare.
> >
> > Bug: https://bugs.gentoo.org/876933
> > Signed-off-by: Michal Privoznik 
> 
> If we can determine this just by looking at C macros, does
> this really need to be a configure test at all ? Paolo?

We don't need to rely on CLang / GCC macros either, as this
is exposed by GLib 

$ grep BYTE_ORDER /usr/lib64/glib-2.0/include/glibconfig.h
#define G_BYTE_ORDER G_LITTLE_ENDIAN

IOW, any code that needs to know can do one of:

  #if G_BYTE_ORDER == G_LITTLE_ENDIAN

  #if G_BYTE_ORDER == G_BIG_ENDIAN


The only thing 'configure' seems to be doing with the 'bigendian'
env var it sets, is to construct a meson cross compiler spec

  if test "$cross_compile" = "yes"; then
cross_arg="--cross-file config-meson.cross"
echo "[host_machine]" >> $cross
echo "system = '$targetos'" >> $cross
case "$cpu" in
i386)
echo "cpu_family = 'x86'" >> $cross
;;
*)
echo "cpu_family = '$cpu'" >> $cross
;;
esac
echo "cpu = '$cpu'" >> $cross
if test "$bigendian" = "yes" ; then
echo "endian = 'big'" >> $cross
else
echo "endian = 'little'" >> $cross
fi

so we do need a compile time test in configure, but I'd suggest
using G_BYTE_ORDER

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [PATCH 4/5] hw/mem/cxl_type3: Change the CDAT allocation/free strategy

On Wed, 12 Oct 2022 14:21:19 -0400
Gregory Price  wrote:

> The existing code allocates a subtable for SLBIS entries, uses a
> local variable to avoid a g_autofree footgun, and the cleanup code
> causes heap corruption.

Ah good point (particularly given I moaned about how you were handling
the frees and still failed to notice the current code was broken!)


> 
> Rather than allocate a table, explicitly allocate each individual entry
> and make the sub-table size static.
> 
> Signed-off-by: Gregory Price 

I'll integrate a change in the spirit of what you have here, but
without aggregating the error handling paths.

> ---
>  hw/mem/cxl_type3.c | 49 --
>  1 file changed, 26 insertions(+), 23 deletions(-)
> 
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index 0e0ea70387..220b9f09a9 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -23,13 +23,14 @@ static int ct3_build_cdat_table(CDATSubHeader 
> ***cdat_table,
>  void *priv)
>  {
>  g_autofree CDATDsmas *dsmas_nonvolatile = NULL;
> -g_autofree CDATDslbis *dslbis_nonvolatile = NULL;
> +g_autofree CDATDslbis *dslbis_nonvolatile1 = NULL;
> +g_autofree CDATDslbis *dslbis_nonvolatile2 = NULL;
> +g_autofree CDATDslbis *dslbis_nonvolatile3 = NULL;
> +g_autofree CDATDslbis *dslbis_nonvolatile4 = NULL;
>  g_autofree CDATDsemts *dsemts_nonvolatile = NULL;
>  CXLType3Dev *ct3d = priv;
> -int i = 0;
>  int next_dsmad_handle = 0;
>  int nonvolatile_dsmad = -1;
> -int dslbis_nonvolatile_num = 4;
>  MemoryRegion *mr;
>  
>  if (!ct3d->hostmem) {
> @@ -48,10 +49,15 @@ static int ct3_build_cdat_table(CDATSubHeader 
> ***cdat_table,
>  
>  /* Non volatile aspects */
>  dsmas_nonvolatile = g_malloc(sizeof(*dsmas_nonvolatile));
> -dslbis_nonvolatile =
> -g_malloc(sizeof(*dslbis_nonvolatile) * dslbis_nonvolatile_num);
> +dslbis_nonvolatile1 = g_malloc(sizeof(*dslbis_nonvolatile1));
> +dslbis_nonvolatile2 = g_malloc(sizeof(*dslbis_nonvolatile2));
> +dslbis_nonvolatile3 = g_malloc(sizeof(*dslbis_nonvolatile3));
> +dslbis_nonvolatile4 = g_malloc(sizeof(*dslbis_nonvolatile4));
>  dsemts_nonvolatile = g_malloc(sizeof(*dsemts_nonvolatile));
> -if (!dsmas_nonvolatile || !dslbis_nonvolatile || !dsemts_nonvolatile) {
> +
> +if (!dsmas_nonvolatile || !dsemts_nonvolatile ||
> +!dslbis_nonvolatile1 || !dslbis_nonvolatile2 ||
> +!dslbis_nonvolatile3 || !dslbis_nonvolatile4) {
>  g_free(*cdat_table);
>  *cdat_table = NULL;
>  return -ENOMEM;
> @@ -70,10 +76,10 @@ static int ct3_build_cdat_table(CDATSubHeader 
> ***cdat_table,
>  };
>  
>  /* For now, no memory side cache, plausiblish numbers */
> -dslbis_nonvolatile[0] = (CDATDslbis) {
> +*dslbis_nonvolatile1 = (CDATDslbis) {
>  .header = {
>  .type = CDAT_TYPE_DSLBIS,
> -.length = sizeof(*dslbis_nonvolatile),
> +.length = sizeof(*dslbis_nonvolatile1),
>  },
>  .handle = nonvolatile_dsmad,
>  .flags = HMAT_LB_MEM_MEMORY,
> @@ -82,10 +88,10 @@ static int ct3_build_cdat_table(CDATSubHeader 
> ***cdat_table,
>  .entry[0] = 15, /* 150ns */
>  };
>  
> -dslbis_nonvolatile[1] = (CDATDslbis) {
> +*dslbis_nonvolatile2 = (CDATDslbis) {
>  .header = {
>  .type = CDAT_TYPE_DSLBIS,
> -.length = sizeof(*dslbis_nonvolatile),
> +.length = sizeof(*dslbis_nonvolatile2),
>  },
>  .handle = nonvolatile_dsmad,
>  .flags = HMAT_LB_MEM_MEMORY,
> @@ -94,10 +100,10 @@ static int ct3_build_cdat_table(CDATSubHeader 
> ***cdat_table,
>  .entry[0] = 25, /* 250ns */
>  };
>  
> -dslbis_nonvolatile[2] = (CDATDslbis) {
> +*dslbis_nonvolatile3 = (CDATDslbis) {
>  .header = {
>  .type = CDAT_TYPE_DSLBIS,
> -.length = sizeof(*dslbis_nonvolatile),
> +.length = sizeof(*dslbis_nonvolatile3),
>  },
>  .handle = nonvolatile_dsmad,
>  .flags = HMAT_LB_MEM_MEMORY,
> @@ -106,10 +112,10 @@ static int ct3_build_cdat_table(CDATSubHeader 
> ***cdat_table,
>  .entry[0] = 16,
>  };
>  
> -dslbis_nonvolatile[3] = (CDATDslbis) {
> +*dslbis_nonvolatile4 = (CDATDslbis) {
>  .header = {
>  .type = CDAT_TYPE_DSLBIS,
> -.length = sizeof(*dslbis_nonvolatile),
> +.length = sizeof(*dslbis_nonvolatile4),
>  },
>  .handle = nonvolatile_dsmad,
>  .flags = HMAT_LB_MEM_MEMORY,
> @@ -131,15 +137,12 @@ static int ct3_build_cdat_table(CDATSubHeader 
> ***cdat_table,
>  };
>  
>  /* Header always at start of structure */
> -(*cdat_table)[i++] = g_steal_pointer(&dsmas_nonvolatile);
> -
> -CDATDslbis *dslbis = g_steal_pointer(&dslbis_nonvolatile);
> -int j;
> -for (j = 0; j < dslbis_nonvolatile_num; j++

Re: [PATCH 3/5] hw/mem/cxl_type3: CDAT pre-allocate and check resources prior to work

On Wed, 12 Oct 2022 14:21:18 -0400
Gregory Price  wrote:

> Makes the size of the allocated cdat table static (6 entries),
> flattens the code, and reduces the number of exit conditions
> 
> Signed-off-by: Gregory Price 

Hmm. I don't entirely like this as it stands because it leads to more
fragile code as we don't have clear association between number
of entries and actual assignments.

So, what I've done (inspired by this) is moved to a local enum
in the factored out building function that has an element for
each of the entries (used ultimately to assign them) and
a trailing NUM_ENTRIES element we can then use in place of
the CT3_CDAT_SUBTABLE_SIZE define you have here.

I went with the 2 pass approach mentioned in a later patch, so
if cdat_table passed to the factored out code is NULL, we just
return NUM_ENTRIES directly.

> ---
>  hw/mem/cxl_type3.c | 52 --
>  1 file changed, 22 insertions(+), 30 deletions(-)
> 
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index 43b2b9e041..0e0ea70387 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -17,6 +17,7 @@
>  #include "hw/pci/msix.h"
>  
>  #define DWORD_BYTE 4
> +#define CT3_CDAT_SUBTABLE_SIZE 6

>  
>  static int ct3_build_cdat_table(CDATSubHeader ***cdat_table,
>  void *priv)
> @@ -25,7 +26,6 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table,
>  g_autofree CDATDslbis *dslbis_nonvolatile = NULL;
>  g_autofree CDATDsemts *dsemts_nonvolatile = NULL;
>  CXLType3Dev *ct3d = priv;
> -int len = 0;
>  int i = 0;
>  int next_dsmad_handle = 0;
>  int nonvolatile_dsmad = -1;
> @@ -33,7 +33,7 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table,
>  MemoryRegion *mr;
>  
>  if (!ct3d->hostmem) {
> -return len;
> +return 0;
>  }
>  
>  mr = host_memory_backend_get_memory(ct3d->hostmem);
> @@ -41,11 +41,22 @@ static int ct3_build_cdat_table(CDATSubHeader 
> ***cdat_table,
>  return -EINVAL;
>  }
>  
> +*cdat_table = g_malloc0(CT3_CDAT_SUBTABLE_SIZE * sizeof(*cdat_table));
> +if (!*cdat_table) {
> +return -ENOMEM;
> +}
> +
>  /* Non volatile aspects */
>  dsmas_nonvolatile = g_malloc(sizeof(*dsmas_nonvolatile));
> -if (!dsmas_nonvolatile) {
> +dslbis_nonvolatile =
> +g_malloc(sizeof(*dslbis_nonvolatile) * dslbis_nonvolatile_num);
> +dsemts_nonvolatile = g_malloc(sizeof(*dsemts_nonvolatile));
> +if (!dsmas_nonvolatile || !dslbis_nonvolatile || !dsemts_nonvolatile) {

I don't like aggregated error checking. It saves lines of code, but leads
to generally less mantainable code.  I prefer to do one thing, check it and 
handle
necessary errors - provides a small localized chunk of code that is easy to
review and maintain.
1. Allocate structure
2. Fill structure.

We have to leave the assignment till later as only want to steal the pointers
once we know there are no error paths.

> +g_free(*cdat_table);

We have auto free to clean this up. So if this did make sense, use a local
g_autofree CDATSubHeader **cdat_table = NULL;
and steal the pointer when assigning *cdat_table at the end of this function
after all the failure paths.

This code all ends up in the caller of the factored out code anyway so
that comment becomes irrelevant on the version I've ended up with.

Jonathan

> +*cdat_table = NULL;
>  return -ENOMEM;
>  }
> +
>  nonvolatile_dsmad = next_dsmad_handle++;
>  *dsmas_nonvolatile = (CDATDsmas) {
>  .header = {
> @@ -57,15 +68,8 @@ static int ct3_build_cdat_table(CDATSubHeader 
> ***cdat_table,
>  .DPA_base = 0,
>  .DPA_length = int128_get64(mr->size),
>  };
> -len++;
>  
>  /* For now, no memory side cache, plausiblish numbers */
> -dslbis_nonvolatile =
> -g_malloc(sizeof(*dslbis_nonvolatile) * dslbis_nonvolatile_num);
> -if (!dslbis_nonvolatile) {
> -return -ENOMEM;
> -}
> -
>  dslbis_nonvolatile[0] = (CDATDslbis) {
>  .header = {
>  .type = CDAT_TYPE_DSLBIS,
> @@ -77,7 +81,6 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table,
>  .entry_base_unit = 1, /* 10ns base */
>  .entry[0] = 15, /* 150ns */
>  };
> -len++;
>  
>  dslbis_nonvolatile[1] = (CDATDslbis) {
>  .header = {
> @@ -90,7 +93,6 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table,
>  .entry_base_unit = 1,
>  .entry[0] = 25, /* 250ns */
>  };
> -len++;
>  
>  dslbis_nonvolatile[2] = (CDATDslbis) {
>  .header = {
> @@ -103,7 +105,6 @@ static int ct3_build_cdat_table(CDATSubHeader 
> ***cdat_table,
>  .entry_base_unit = 1000, /* GB/s */
>  .entry[0] = 16,
>  };
> -len++;
>  
>  dslbis_nonvolatile[3] = (CDATDslbis) {
>  .header = {
> @@ -116,9 +117,7 @@ static int ct3_build_cdat_table(CDATSubHeader 
> ***cd

Re: [PATCH RFC 0/2] qemu-thread: Strict unlock check

2022-10-13 Thread Peter Maydell

On Wed, 12 Oct 2022 at 19:16, Peter Xu  wrote:
>
> On Tue, Oct 11, 2022 at 06:41:52PM -0400, Peter Xu wrote:
> > NOTE: mark patchset RFC because "make check" will easily fail; but I didn't
> > yet dig into why as I'm not familiar with the code paths that triggers, it
> > can be bugs hidden or something I missed.  So RFC to just have some 
> > thoughts.
>
> I just noticed (after reminded from Dave) that the reclock was actually the
> recursive lock, which definitely won't work with patch 2 at all.
>
> OTOH I also noticed PTHREAD_MUTEX_ERRORCHECK which does the same unlock
> check that we can leverage (and it'll also check re-lock from the same
> thread which causes deadlock).  I'll give that a shot instead.

We used to use PTHREAD_MUTEX_ERRORCHECK, but stopped because it
does not work with the idiom we use for handling mutexes across
fork() where you take the lock in the parent, and then unlock it
in the child after the fork. With glibc's implementation of
PTHREAD_MUTEX_ERRORCHECK the unlock in the child fails. See
commit 24fa90499f8b24bcba29 from 2015.

thanks
-- PMM

Re: [PATCH 5/5] hw/mem/cxl_type3: Refactor CDAT sub-table entry initialization into a function

On Wed, 12 Oct 2022 14:21:20 -0400
Gregory Price  wrote:

> The CDAT can contain multiple entries for multiple memory regions, this
> will allow us to re-use the initialization code when volatile memory
> region support is added.
> 
> Signed-off-by: Gregory Price 

I'm in two minds about this... We could integrate it in the original series,
but at that time the change is justified.  Or we could leave it as a first
patch in your follow on series.

Anyhow, I went with a similar refactor inspired by this.


> ---
>  hw/mem/cxl_type3.c | 137 -
>  1 file changed, 72 insertions(+), 65 deletions(-)
> 
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index 220b9f09a9..3c5485abd0 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -19,117 +19,93 @@
>  #define DWORD_BYTE 4
>  #define CT3_CDAT_SUBTABLE_SIZE 6
>  
> -static int ct3_build_cdat_table(CDATSubHeader ***cdat_table,
> -void *priv)
> +static int ct3_build_cdat_subtable(CDATSubHeader **cdat_table,
> +MemoryRegion *mr, int dsmad_handle)

subtable is particularly well defined.  Maybe
ct3_build_cdat_entries_for_mr()?

>  {
> -g_autofree CDATDsmas *dsmas_nonvolatile = NULL;
> -g_autofree CDATDslbis *dslbis_nonvolatile1 = NULL;
> -g_autofree CDATDslbis *dslbis_nonvolatile2 = NULL;
> -g_autofree CDATDslbis *dslbis_nonvolatile3 = NULL;
> -g_autofree CDATDslbis *dslbis_nonvolatile4 = NULL;
> -g_autofree CDATDsemts *dsemts_nonvolatile = NULL;
> -CXLType3Dev *ct3d = priv;
> -int next_dsmad_handle = 0;
> -int nonvolatile_dsmad = -1;
> -MemoryRegion *mr;
> -
> -if (!ct3d->hostmem) {
> -return 0;
> -}
> -
> -mr = host_memory_backend_get_memory(ct3d->hostmem);
> -if (!mr) {
> -return -EINVAL;
> -}
> -
> -*cdat_table = g_malloc0(CT3_CDAT_SUBTABLE_SIZE * sizeof(*cdat_table));
> -if (!*cdat_table) {
> -return -ENOMEM;
> -}
> -
> -/* Non volatile aspects */
> -dsmas_nonvolatile = g_malloc(sizeof(*dsmas_nonvolatile));
> -dslbis_nonvolatile1 = g_malloc(sizeof(*dslbis_nonvolatile1));
> -dslbis_nonvolatile2 = g_malloc(sizeof(*dslbis_nonvolatile2));
> -dslbis_nonvolatile3 = g_malloc(sizeof(*dslbis_nonvolatile3));
> -dslbis_nonvolatile4 = g_malloc(sizeof(*dslbis_nonvolatile4));
> -dsemts_nonvolatile = g_malloc(sizeof(*dsemts_nonvolatile));
> -
> -if (!dsmas_nonvolatile || !dsemts_nonvolatile ||
> -!dslbis_nonvolatile1 || !dslbis_nonvolatile2 ||
> -!dslbis_nonvolatile3 || !dslbis_nonvolatile4) {
> -g_free(*cdat_table);
> -*cdat_table = NULL;
> +g_autofree CDATDsmas *dsmas = NULL;
> +g_autofree CDATDslbis *dslbis1 = NULL;
> +g_autofree CDATDslbis *dslbis2 = NULL;
> +g_autofree CDATDslbis *dslbis3 = NULL;
> +g_autofree CDATDslbis *dslbis4 = NULL;
> +g_autofree CDATDsemts *dsemts = NULL;
> +
> +dsmas = g_malloc(sizeof(*dsmas));
> +dslbis1 = g_malloc(sizeof(*dslbis1));
> +dslbis2 = g_malloc(sizeof(*dslbis2));
> +dslbis3 = g_malloc(sizeof(*dslbis3));
> +dslbis4 = g_malloc(sizeof(*dslbis4));
> +dsemts = g_malloc(sizeof(*dsemts));
> +
> +if (!dsmas || !dslbis1 || !dslbis2 || !dslbis3 || !dslbis4 || !dsemts) {
>  return -ENOMEM;
>  }
>  
> -nonvolatile_dsmad = next_dsmad_handle++;
> -*dsmas_nonvolatile = (CDATDsmas) {
> +*dsmas = (CDATDsmas) {
>  .header = {
>  .type = CDAT_TYPE_DSMAS,
> -.length = sizeof(*dsmas_nonvolatile),
> +.length = sizeof(*dsmas),
>  },
> -.DSMADhandle = nonvolatile_dsmad,
> +.DSMADhandle = dsmad_handle,
>  .flags = CDAT_DSMAS_FLAG_NV,
>  .DPA_base = 0,
>  .DPA_length = int128_get64(mr->size),
>  };
>  
>  /* For now, no memory side cache, plausiblish numbers */
> -*dslbis_nonvolatile1 = (CDATDslbis) {
> +*dslbis1 = (CDATDslbis) {
>  .header = {
>  .type = CDAT_TYPE_DSLBIS,
> -.length = sizeof(*dslbis_nonvolatile1),
> +.length = sizeof(*dslbis1),
>  },
> -.handle = nonvolatile_dsmad,
> +.handle = dsmad_handle,
>  .flags = HMAT_LB_MEM_MEMORY,
>  .data_type = HMAT_LB_DATA_READ_LATENCY,
>  .entry_base_unit = 1, /* 10ns base */
>  .entry[0] = 15, /* 150ns */

If we are going to wrap this up for volatile / non-volatile 
we probably need to pass in a reasonable value for these.
Whilst not technically always true, to test the Linux handling
I'd want non-volatile to report as longer latency.

>  };
>  
> -*dslbis_nonvolatile2 = (CDATDslbis) {
> +*dslbis2 = (CDATDslbis) {
>  .header = {
>  .type = CDAT_TYPE_DSLBIS,
> -.length = sizeof(*dslbis_nonvolatile2),
> +.length = sizeof(*dslbis2),
>  },
> -.handle = nonvolatile_dsmad,
> +.handle = dsmad_handle,
>

Re: [PATCH] tests/docker: Add flex/bison to `debian-all-test`

2022-10-13 Thread Alex Bennée



Anton Johansson  writes:

> Adds flex/bison to the debian-all-test-cross container which was missed
> in the previous CI patch. These dependencies are required by the
> idef-parser patchset for target/hexagon.
>
> Signed-off-by: Anton Johansson 

Queued to testing/next, thanks.

-- 
Alex Bennée

Re: [PATCH v7 3/5] hw/cxl/cdat: CXL CDAT Data Object Exchange implementation

On Fri, 7 Oct 2022 16:21:54 +0100
Jonathan Cameron  wrote:

> From: Huai-Cheng Kuo 
> 
> The Data Object Exchange implementation of CXL Coherent Device Attribute
> Table (CDAT). This implementation is referring to "Coherent Device
> Attribute Table Specification, Rev. 1.02, Oct. 2020" and "Compute
> Express Link Specification, Rev. 2.0, Oct. 2020"
> 
> This patch adds core support that will be shared by both
> end-points and switch port emulation.
> 
> Signed-off-by: Huai-Cheng Kuo 
> Signed-off-by: Chris Browy 
> Signed-off-by: Jonathan Cameron 

Whilst doing v8 I'll uprev this to the 1.03 CDAT spec and CXL 3.0
Changes are minor and it's backwards compatible but it's hard
to get the older CXL spec now 3.0 is out.

> 
> ---
> Changes since RFC:
> - Split out libary code from specific device.
> ---
>  hw/cxl/cxl-cdat.c  | 222 +
>  hw/cxl/meson.build |   1 +
>  include/hw/cxl/cxl_cdat.h  | 165 
>  include/hw/cxl/cxl_component.h |   7 ++
>  include/hw/cxl/cxl_device.h|   3 +
>  include/hw/cxl/cxl_pci.h   |   1 +
>  6 files changed, 399 insertions(+)
> 
> diff --git a/hw/cxl/cxl-cdat.c b/hw/cxl/cxl-cdat.c
> new file mode 100644
> index 00..137178632b
> --- /dev/null
> +++ b/hw/cxl/cxl-cdat.c
> @@ -0,0 +1,222 @@
> +/*
> + * CXL CDAT Structure
> + *
> + * Copyright (C) 2021 Avery Design Systems, Inc.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "hw/pci/pci.h"
> +#include "hw/cxl/cxl.h"
> +#include "qapi/error.h"
> +#include "qemu/error-report.h"
> +
> +static void cdat_len_check(CDATSubHeader *hdr, Error **errp)
> +{
> +assert(hdr->length);
> +assert(hdr->reserved == 0);
> +
> +switch (hdr->type) {
> +case CDAT_TYPE_DSMAS:
> +assert(hdr->length == sizeof(CDATDsmas));
> +break;
> +case CDAT_TYPE_DSLBIS:
> +assert(hdr->length == sizeof(CDATDslbis));
> +break;
> +case CDAT_TYPE_DSMSCIS:
> +assert(hdr->length == sizeof(CDATDsmscis));
> +break;
> +case CDAT_TYPE_DSIS:
> +assert(hdr->length == sizeof(CDATDsis));
> +break;
> +case CDAT_TYPE_DSEMTS:
> +assert(hdr->length == sizeof(CDATDsemts));
> +break;
> +case CDAT_TYPE_SSLBIS:
> +assert(hdr->length >= sizeof(CDATSslbisHeader));
> +assert((hdr->length - sizeof(CDATSslbisHeader)) %
> +   sizeof(CDATSslbe) == 0);
> +break;
> +default:
> +error_setg(errp, "Type %d is reserved", hdr->type);
> +}
> +}
> +
> +static void ct3_build_cdat(CDATObject *cdat, Error **errp)
> +{
> +g_autofree CDATTableHeader *cdat_header = NULL;
> +g_autofree CDATEntry *cdat_st = NULL;
> +uint8_t sum = 0;
> +int ent, i;
> +
> +/* Use default table if fopen == NULL */
> +assert(cdat->build_cdat_table);
> +
> +cdat_header = g_malloc0(sizeof(*cdat_header));
> +if (!cdat_header) {
> +error_setg(errp, "Failed to allocate CDAT header");
> +return;
> +}
> +
> +cdat->built_buf_len = cdat->build_cdat_table(&cdat->built_buf, 
> cdat->private);
> +
> +if (!cdat->built_buf_len) {
> +/* Build later as not all data available yet */
> +cdat->to_update = true;
> +return;
> +}
> +cdat->to_update = false;
> +
> +cdat_st = g_malloc0(sizeof(*cdat_st) * (cdat->built_buf_len + 1));
> +if (!cdat_st) {
> +error_setg(errp, "Failed to allocate CDAT entry array");
> +return;
> +}
> +
> +/* Entry 0 for CDAT header, starts with Entry 1 */
> +for (ent = 1; ent < cdat->built_buf_len + 1; ent++) {
> +CDATSubHeader *hdr = cdat->built_buf[ent - 1];
> +uint8_t *buf = (uint8_t *)cdat->built_buf[ent - 1];
> +
> +cdat_st[ent].base = hdr;
> +cdat_st[ent].length = hdr->length;
> +
> +cdat_header->length += hdr->length;
> +for (i = 0; i < hdr->length; i++) {
> +sum += buf[i];
> +}
> +}
> +
> +/* CDAT header */
> +cdat_header->revision = CXL_CDAT_REV;
> +/* For now, no runtime updates */
> +cdat_header->sequence = 0;
> +cdat_header->length += sizeof(CDATTableHeader);
> +sum += cdat_header->revision + cdat_header->sequence +
> +cdat_header->length;
> +/* Sum of all bytes including checksum must be 0 */
> +cdat_header->checksum = ~sum + 1;
> +
> +cdat_st[0].base = g_steal_pointer(&cdat_header);
> +cdat_st[0].length = sizeof(*cdat_header);
> +cdat->entry_len = 1 + cdat->built_buf_len;
> +cdat->entry = g_steal_pointer(&cdat_st);
> +}
> +
> +static void ct3_load_cdat(CDATObject *cdat, Error **errp)
> +{
> +g_autofree CDATEntry *cdat_st = NULL;
> +uint8_t sum = 0;
> +int num_ent;
> +int i = 0, ent = 1, file_size = 0;
> +CDATSubHeader *hdr;
> +FILE *fp = NULL;
> +

Re: [PATCH] blkdebug: ignore invalid rules in non-coroutine context

2022-10-13 Thread Markus Armbruster

Paolo Bonzini  writes:

> blkdebug events can be called from either non-coroutine or coroutine
> contexts.  However, some actions (specifically suspend actions and
> errors reported with immediately=off) only make sense from within
> a coroutine.
>
> Currently, using those action would lead to an abort() in
> qemu_coroutine_yield() ("Co-routine is yielding to no one").
> Catch them and print an error instead.
>
> Signed-off-by: Paolo Bonzini 
> ---
>  block/blkdebug.c | 17 ++---
>  1 file changed, 14 insertions(+), 3 deletions(-)
>
> diff --git a/block/blkdebug.c b/block/blkdebug.c
> index bbf2948703..bf0aedb17d 100644
> --- a/block/blkdebug.c
> +++ b/block/blkdebug.c
> @@ -31,6 +31,7 @@
>  #include "block/qdict.h"
>  #include "qemu/module.h"
>  #include "qemu/option.h"
> +#include "qemu/error-report.h"
>  #include "qapi/qapi-visit-block-core.h"
>  #include "qapi/qmp/qdict.h"
>  #include "qapi/qmp/qlist.h"
> @@ -623,8 +624,13 @@ static int rule_check(BlockDriverState *bs, uint64_t 
> offset, uint64_t bytes,
>  
>  qemu_mutex_unlock(&s->lock);
>  if (!immediately) {
> -aio_co_schedule(qemu_get_current_aio_context(), 
> qemu_coroutine_self());
> -qemu_coroutine_yield();
> +if (qemu_in_coroutine()) {
> +aio_co_schedule(qemu_get_current_aio_context(), 
> qemu_coroutine_self());
> +qemu_coroutine_yield();
> +} else {
> +error_report("Non-coroutine event %s needs immediately = off\n",
> + BlkdebugEvent_lookup.array[rule->event]);

rule_check() is called from blkdebug_co_preadv(), blkdebug_co_pwritev(),
blkdebug_co_pwrite_zeroes(), blkdebug_co_pdiscard(),
blkdebug_co_block_status() (all marked coroutine_fn), and
blkdebug_co_flush() (which looks like it should be marked coroutine_fn).

Ignorant question: how could it be called outside coroutine context?

Also, code smell: reporting an error without taking an error path.  But
let's worry about that only after I understand the problem you're trying
to fix.

> +}
>  }
>  
>  return -error;
> @@ -858,7 +864,12 @@ static void blkdebug_debug_event(BlockDriverState *bs, 
> BlkdebugEvent event)
>  }
>  
>  while (actions_count[ACTION_SUSPEND] > 0) {
> -qemu_coroutine_yield();
> +if (qemu_in_coroutine()) {
> +qemu_coroutine_yield();
> +} else {
> +error_report("Non-coroutine event %s cannot suspend\n",
> + BlkdebugEvent_lookup.array[event]);
> +}
>  actions_count[ACTION_SUSPEND]--;
>  }
>  }

Re: [PATCH v7 4/5] hw/mem/cxl-type3: Add CXL CDAT Data Object Exchange

2022-10-13 Thread Gregory Price

Reading through your notes, everything seems reasonable, though I'm not
sure I agree with the two pass notion, though I'll wait to see the patch
set.

The enum is a good idea, *forehead slap*, I should have done it.  If we
have a local enum, why not just make it global (within the file) and
allocate the table as I have once we know how many MRs are present?

6 eggs/half dozen though, I'm ultimately fine with either.

On Thu, Oct 13, 2022, 4:58 AM Jonathan Cameron 
wrote:

> On Wed, 12 Oct 2022 14:21:15 -0400
> Gregory Price  wrote:
>
> > Included in this response is a recommended patch set on top of this
> > patch that resolves a number of issues, including style and a heap
> > corruption bug.
> >
> > The purpose of this patch set is to refactor the CDAT initialization
> > code to support future patch sets that will introduce multi-region
> > support in CXL Type3 devices.
> >
> > 1) Checkpatch errors in the immediately prior patch
> > 2) Flatting of code in cdat initialization
> > 3) Changes in allocation and error checking for cleanliness
> > 4) Change in the allocation/free strategy of CDAT sub-tables to simplify
> >multi-region allocation in the future.  Also resolves a heap
> >corruption bug
> > 5) Refactor of CDAT initialization code into a function that initializes
> >sub-tables per memory-region.
> >
> > Gregory Price (5):
> >   hw/mem/cxl_type3: fix checkpatch errors
> >   hw/mem/cxl_type3: Pull validation checks ahead of functional code
> >   hw/mem/cxl_type3: CDAT pre-allocate and check resources prior to work
> >   hw/mem/cxl_type3: Change the CDAT allocation/free strategy
> >   hw/mem/cxl_type3: Refactor CDAT sub-table entry initialization into a
> > function
> >
> >  hw/mem/cxl_type3.c | 240 +++--
> >  1 file changed, 122 insertions(+), 118 deletions(-)
> >
>
> Thanks, I'm going to roll this stuff into the original patch set for v8.
> Some of this I already have (like the check patch stuff).
> Some I may disagree with in which case  I'll reply to the patches - note
> I haven't looked at them in detail yet!
>
> Jonathan
>

Re: [PATCH] configure: Avoid using strings binary

2022-10-13 Thread Peter Maydell

On Thu, 13 Oct 2022 at 12:08, Daniel P. Berrangé  wrote:
>
> On Thu, Oct 13, 2022 at 11:39:34AM +0100, Peter Maydell wrote:
> > On Thu, 13 Oct 2022 at 09:47, Michal Privoznik  wrote:
> > >
> > > When determining the endiandness of the target architecture we're
> > > building for a small program is compiled, which in an obfuscated
> > > way declares two strings. Then, we look which string is in
> > > correct order (using strings binary) and deduct the endiandness.
> > > But using the strings binary is problematic, because it's part of
> > > toolchain (strings is just a symlink to
> > > x86_64-pc-linux-gnu-strings or llvm-strings). And when
> > > (cross-)compiling, it requires users to set the symlink to the
> > > correct toolchain.
> > >
> > > Fortunately, we have a better alternative anyways. Since we
> > > require either clang or gcc we can rely on macros they declare.
> > >
> > > Bug: https://bugs.gentoo.org/876933
> > > Signed-off-by: Michal Privoznik 
> >
> > If we can determine this just by looking at C macros, does
> > this really need to be a configure test at all ? Paolo?
>
> We don't need to rely on CLang / GCC macros either, as this
> is exposed by GLib
>
> $ grep BYTE_ORDER /usr/lib64/glib-2.0/include/glibconfig.h
> #define G_BYTE_ORDER G_LITTLE_ENDIAN
>
> IOW, any code that needs to know can do one of:
>
>   #if G_BYTE_ORDER == G_LITTLE_ENDIAN
>
>   #if G_BYTE_ORDER == G_BIG_ENDIAN

It would be more consistent for configure to do this the same
way that compiler.h does, though:

#define HOST_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)

thanks
-- PMM

Re: [PATCH v7 4/5] hw/mem/cxl-type3: Add CXL CDAT Data Object Exchange

On Thu, 13 Oct 2022 07:36:28 -0400
Gregory Price  wrote:

> Reading through your notes, everything seems reasonable, though I'm not
> sure I agree with the two pass notion, though I'll wait to see the patch
> set.
> 
> The enum is a good idea, *forehead slap*, I should have done it.  If we
> have a local enum, why not just make it global (within the file) and
> allocate the table as I have once we know how many MRs are present?

It's not global as we need the entries to be packed.  So if just one mr
(which ever one) the entries for that need to be at the beginning of
cdat_table.  I also don't want to bake into the outer caller that the
entries will always be the same size for different MRs.

For the two pass case...

I'll send code in a few mins, but in meantime my thought is that
the extended code for volatile + non volatile will looks something like:
(variable names made up)

if (ct3d->volatile_mem) {
volatile_mr = 
host_memory_backend_get_memory(ct3d->volatile_mem);
if (!volatile_mr) {
return -ENINVAL;
}
rc = ct3_build_cdat_entries_for_mr(NULL, dsmad++, volatile_mr);
if (rc < 0) {
return rc;
}
volatile_len = rc;
}

if (ct3d->nonvolatile_mem) {
nonvolatile_mr = 
host_memory_backend_get_memory(ct3d->nonvolatile_mem);
if (!nonvolatile_mr) {
return -ENINVAL;
}
rc = ct3_build_cdat_entries_for_mr(NULL, dmsmad++, 
nonvolatile_mr);
if (rc < 0) {
return rc;
}
nonvolatile_len = rc;
}

dsmad = 0;

table = g_malloc(0, (volatile_len + nonvolatile_len) * sizeof(*table));
if (!table) {
return -ENOMEM;
}

if (volatile_len) {
rc = ct3_build_cdat_entries_for_mr(&table[0], dmsad++, 
volatile_mr);
if (rc < 0) {
return rc;
}
}   
if (nonvolatile_len) {
rc = ct3_build_cdat_entries_for_mr(&table[volatile_len], 
dsmad++, nonvolatile_mr...);
if (rc < 0) {
/* Only place we need error handling.  Could make it 
more generic of course */
for (i = 0; i < volatile_len; i++) {
g_free(cdat_table[i]);
}
return rc;
}
}

*cdat_table = g_steal_pointer(&table);


Jonathan

> 
> 6 eggs/half dozen though, I'm ultimately fine with either.
> 
> On Thu, Oct 13, 2022, 4:58 AM Jonathan Cameron 
> wrote:
> 
> > On Wed, 12 Oct 2022 14:21:15 -0400
> > Gregory Price  wrote:
> >  
> > > Included in this response is a recommended patch set on top of this
> > > patch that resolves a number of issues, including style and a heap
> > > corruption bug.
> > >
> > > The purpose of this patch set is to refactor the CDAT initialization
> > > code to support future patch sets that will introduce multi-region
> > > support in CXL Type3 devices.
> > >
> > > 1) Checkpatch errors in the immediately prior patch
> > > 2) Flatting of code in cdat initialization
> > > 3) Changes in allocation and error checking for cleanliness
> > > 4) Change in the allocation/free strategy of CDAT sub-tables to simplify
> > >multi-region allocation in the future.  Also resolves a heap
> > >corruption bug
> > > 5) Refactor of CDAT initialization code into a function that initializes
> > >sub-tables per memory-region.
> > >
> > > Gregory Price (5):
> > >   hw/mem/cxl_type3: fix checkpatch errors
> > >   hw/mem/cxl_type3: Pull validation checks ahead of functional code
> > >   hw/mem/cxl_type3: CDAT pre-allocate and check resources prior to work
> > >   hw/mem/cxl_type3: Change the CDAT allocation/free strategy
> > >   hw/mem/cxl_type3: Refactor CDAT sub-table entry initialization into a
> > > function
> > >
> > >  hw/mem/cxl_type3.c | 240 +++--
> > >  1 file changed, 122 insertions(+), 118 deletions(-)
> > >  
> >
> > Thanks, I'm going to roll this stuff into the original patch set for v8.
> > Some of this I already have (like the check patch stuff).
> > Some I may disagree with in which case  I'll reply to the patches - note
> > I haven't looked at them in detail yet!
> >
> > Jonathan
> >  
>

[PATCH v8 4/5] hw/mem/cxl-type3: Add CXL CDAT Data Object Exchange

From: Huai-Cheng Kuo 

The CDAT can be specified in two ways. One is to add ",cdat="
in "-device cxl-type3"'s command option. The file is required to provide
the whole CDAT table in binary mode. The other is to use the default
that provides some 'reasonable' numbers based on type of memory and
size.

The DOE capability supporting CDAT is added to hw/mem/cxl_type3.c with
capability offset 0x190. The config read/write to this capability range
can be generated in the OS to request the CDAT data.

Signed-off-by: Huai-Cheng Kuo 
Signed-off-by: Chris Browy 
Signed-off-by: Jonathan Cameron 

---
Changes since v7: Thanks to Gregory Price for review + patches.
- Fix a heap corruption
- Factor out the entry buildling to a separate function that will
  soon be useful for volatile case.
- Switch to enum of entries so NUM_ENTRIES is automatically kept
  in sync with any additional elements.

Changes since RFC:
- Break out type 3 user of library as separate patch.
- Change reported data for default to be based on the options provided
  for the type 3 device.
---
 hw/mem/cxl_type3.c | 267 +
 1 file changed, 267 insertions(+)

diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 568c9d62f5..8490154824 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -12,9 +12,258 @@
 #include "qemu/range.h"
 #include "qemu/rcu.h"
 #include "sysemu/hostmem.h"
+#include "sysemu/numa.h"
 #include "hw/cxl/cxl.h"
 #include "hw/pci/msix.h"
 
+#define DWORD_BYTE 4
+
+/* If no cdat_table == NULL returns number of entries */
+static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
+ int dsmad_handle, MemoryRegion *mr)
+{
+enum {
+DSMAS,
+DSLBIS0,
+DSLBIS1,
+DSLBIS2,
+DSLBIS3,
+DSEMTS,
+NUM_ENTRIES
+};
+g_autofree CDATDsmas *dsmas = NULL;
+g_autofree CDATDslbis *dslbis0 = NULL;
+g_autofree CDATDslbis *dslbis1 = NULL;
+g_autofree CDATDslbis *dslbis2 = NULL;
+g_autofree CDATDslbis *dslbis3 = NULL;
+g_autofree CDATDsemts *dsemts = NULL;
+
+if (!cdat_table) {
+return NUM_ENTRIES;
+}
+
+dsmas = g_malloc(sizeof(*dsmas));
+if (!dsmas) {
+return -ENOMEM;
+}
+*dsmas = (CDATDsmas) {
+.header = {
+.type = CDAT_TYPE_DSMAS,
+.length = sizeof(*dsmas),
+},
+.DSMADhandle = dsmad_handle,
+.flags = CDAT_DSMAS_FLAG_NV,
+.DPA_base = 0,
+.DPA_length = int128_get64(mr->size),
+};
+
+/* For now, no memory side cache, plausiblish numbers */
+dslbis0 = g_malloc(sizeof(*dslbis0));
+if (!dslbis0) {
+return -ENOMEM;
+}
+*dslbis0 = (CDATDslbis) {
+.header = {
+.type = CDAT_TYPE_DSLBIS,
+.length = sizeof(*dslbis0),
+},
+.handle = dsmad_handle,
+.flags = HMAT_LB_MEM_MEMORY,
+.data_type = HMAT_LB_DATA_READ_LATENCY,
+.entry_base_unit = 1, /* 10ns base */
+.entry[0] = 15, /* 150ns */
+};
+
+dslbis1 = g_malloc(sizeof(*dslbis1));
+if (!dslbis1) {
+return -ENOMEM;
+}
+*dslbis1 = (CDATDslbis) {
+.header = {
+.type = CDAT_TYPE_DSLBIS,
+.length = sizeof(*dslbis1),
+},
+.handle = dsmad_handle,
+.flags = HMAT_LB_MEM_MEMORY,
+.data_type = HMAT_LB_DATA_WRITE_LATENCY,
+.entry_base_unit = 1,
+.entry[0] = 25, /* 250ns */
+};
+
+dslbis2 = g_malloc(sizeof(*dslbis2));
+if (!dslbis2) {
+return -ENOMEM;
+}
+*dslbis2 = (CDATDslbis) {
+.header = {
+.type = CDAT_TYPE_DSLBIS,
+.length = sizeof(*dslbis2),
+},
+.handle = dsmad_handle,
+.flags = HMAT_LB_MEM_MEMORY,
+.data_type = HMAT_LB_DATA_READ_BANDWIDTH,
+.entry_base_unit = 1000, /* GB/s */
+.entry[0] = 16,
+};
+
+dslbis3 = g_malloc(sizeof(*dslbis3));
+if (!dslbis3) {
+return -ENOMEM;
+}
+*dslbis3 = (CDATDslbis) {
+.header = {
+.type = CDAT_TYPE_DSLBIS,
+.length = sizeof(*dslbis3),
+},
+.handle = dsmad_handle,
+.flags = HMAT_LB_MEM_MEMORY,
+.data_type = HMAT_LB_DATA_WRITE_BANDWIDTH,
+.entry_base_unit = 1000, /* GB/s */
+.entry[0] = 16,
+};
+
+dsemts = g_malloc(sizeof(*dsemts));
+if (!dsemts) {
+return -ENOMEM;
+}
+*dsemts = (CDATDsemts) {
+.header = {
+.type = CDAT_TYPE_DSEMTS,
+.length = sizeof(*dsemts),
+},
+.DSMAS_handle = dsmad_handle,
+/* Reserved - the non volatile from DSMAS matters */
+.EFI_memory_type_attr = 2,
+.DPA_offset = 0,
+.DPA_length = int128_get64(mr->size),
+};
+
+/* Header always at start of structure */
+cdat_table[DSMAS] = g_steal_pointer(&dsmas);
+cdat_table[DSLBIS0

[PATCH v8 0/5] QEMU PCIe DOE for PCIe 4.0/5.0 and CXL 2.

Changes since v7: Details in individual patches:
Thanks to Gregory Price for reviewing!
- Fix heap corruption.
- Check allocations succeed.
- Substantial refactor of type 3 cdat table build to make it
simpler and easier to add volatile entry support.

V7 Cover letter - lightly edited.

Whilst I have carried on Huai-Cheng Kuo's series version numbering and
naming, there have been very substantial changes since v6 so I would
suggest fresh review makes sense for anyone who has looked at this before.
In particularly if the Avery design folks could check I haven't broken
anything that would be great.

For reference v6: QEMU PCIe DOE for PCIe 4.0/5.0 and CXL 2.0
https://lore.kernel.org/qemu-devel/1623330943-18290-1-git-send-email-cbr...@avery-design.com/

Summary of changes:
1) Linux headers definitions for DOE are now upstream so drop that patch.
2) Add CDAT for switch upstream port.
3) Generate 'plausible' default CDAT tables when a file is not provided.
4) General refactoring to calculate the correct table sizes and allocate
based on that rather than copying from a local static array.
5) Changes from earlier reviews such as matching QEMU type naming style.
6) Moved compliance and SPDM usecases to future patch sets.

Sign-offs on these are complex because the patches were originally developed
by Huai-Cheng Kuo, but posted by Chris Browy and then picked up by Jonathan
Cameron who made substantial changes.

Huai-Cheng Kuo confirmed they are happy to maintain this updated code.

What's here?

This series brings generic PCI Express Data Object Exchange support (DOE)
DOE is defined in the PCIe Base Spec r6.0. It consists of a mailbox in PCI
config space via a PCIe Extended Capability Structure.
The PCIe spec defines several protocols (including one to discover what
protocols a given DOE instance supports) and other specification such as
CXL define additional protocols using their own vendor IDs.

In this series we make use of the DOE to support the CXL spec defined
Table Access Protocol, specifically to provide access to CDAT - a
table specified in a specification that is hosted by the UEFI forum
and is used to provide runtime discoverability of the sort of information
that would otherwise be available in firmware tables (memory types,
latency and bandwidth information etc).

The Linux kernel gained support for DOE / CDAT on CXL type 3 EPs in 6.0.
The version merged did not support interrupts (earlier versions did
so that support in the emulation was tested a while back).

This series provides CDAT emulation for CXL switch upstream ports
and CXL type 3 memory devices. Note that to exercise the switch support
additional Linux kernel patches are needed.
https://lore.kernel.org/linux-cxl/20220503153449.4088-1-jonathan.came...@huawei.com/
(I'll post a new version of that support shortly)

Additional protocols will be supported by follow on patch sets:
* CXL compliance protocol.
* CMA / SPDM device attestation.
(Old version at https://gitlab.com/jic23/qemu/-/commits/cxl-next - will refresh
that tree next week)

Huai-Cheng Kuo (3):
hw/pci: PCIe Data Object Exchange emulation
hw/cxl/cdat: CXL CDAT Data Object Exchange implementation
hw/mem/cxl-type3: Add CXL CDAT Data Object Exchange

Jonathan Cameron (2):
hw/mem/cxl-type3: Add MSIX support
hw/pci-bridge/cxl-upstream: Add a CDAT table access DOE

--
2.37.2

[PATCH v8 1/5] hw/pci: PCIe Data Object Exchange emulation

From: Huai-Cheng Kuo 

Emulation of PCIe Data Object Exchange (DOE)
PCIE Base Specification r6.0 6.3 Data Object Exchange

Supports multiple DOE PCIe Extended Capabilities for a single PCIe
device. For each capability, a static array of DOEProtocol should be passed
to pcie_doe_init(). The protocols in that array will be registered under
the DOE capability structure. For each protocol, vendor ID, type, and
corresponding callback function (handle_request()) should be implemented.
This callback function represents how the DOE request for corresponding
protocol will be handled.

pcie_doe_{read/write}_config() must be appended to corresponding PCI
device's config_read/write() handler to enable DOE access. In
pcie_doe_read_config(), false will be returned if pci_config_read()
offset is not within DOE capability range. In pcie_doe_write_config(),
the function will have no affect if the address is not within the related
DOE PCIE extended capability.

Signed-off-by: Huai-Cheng Kuo 
Signed-off-by: Chris Browy 
Signed-off-by: Jonathan Cameron 
---

Changes since v7: None.
 MAINTAINERS|   7 +
 hw/pci/meson.build |   1 +
 hw/pci/pcie_doe.c  | 367 +
 include/hw/pci/pci_ids.h   |   3 +
 include/hw/pci/pcie.h  |   1 +
 include/hw/pci/pcie_doe.h  | 123 +
 include/hw/pci/pcie_regs.h |   4 +
 7 files changed, 506 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 8ae2e43c83..562e1d02a0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1832,6 +1832,13 @@ F: qapi/pci.json
 F: docs/pci*
 F: docs/specs/*pci*
 
+PCIE DOE
+M: Huai-Cheng Kuo 
+M: Chris Browy 
+S: Supported
+F: include/hw/pci/pcie_doe.h
+F: hw/pci/pcie_doe.c
+
 ACPI/SMBIOS
 M: Michael S. Tsirkin 
 M: Igor Mammedov 
diff --git a/hw/pci/meson.build b/hw/pci/meson.build
index bcc9c75919..5aff7ed1c6 100644
--- a/hw/pci/meson.build
+++ b/hw/pci/meson.build
@@ -13,6 +13,7 @@ pci_ss.add(files(
 # allow plugging PCIe devices into PCI buses, include them even if
 # CONFIG_PCI_EXPRESS=n.
 pci_ss.add(files('pcie.c', 'pcie_aer.c'))
+pci_ss.add(files('pcie_doe.c'))
 softmmu_ss.add(when: 'CONFIG_PCI_EXPRESS', if_true: files('pcie_port.c', 
'pcie_host.c'))
 softmmu_ss.add_all(when: 'CONFIG_PCI', if_true: pci_ss)
 
diff --git a/hw/pci/pcie_doe.c b/hw/pci/pcie_doe.c
new file mode 100644
index 00..2210f86968
--- /dev/null
+++ b/hw/pci/pcie_doe.c
@@ -0,0 +1,367 @@
+/*
+ * PCIe Data Object Exchange
+ *
+ * Copyright (C) 2021 Avery Design Systems, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "qemu/range.h"
+#include "hw/pci/pci.h"
+#include "hw/pci/pcie.h"
+#include "hw/pci/pcie_doe.h"
+#include "hw/pci/msi.h"
+#include "hw/pci/msix.h"
+
+#define DWORD_BYTE 4
+
+typedef struct DoeDiscoveryReq {
+DOEHeader header;
+uint8_t index;
+uint8_t reserved[3];
+} QEMU_PACKED DoeDiscoveryReq;
+
+typedef struct DoeDiscoveryRsp {
+DOEHeader header;
+uint16_t vendor_id;
+uint8_t data_obj_type;
+uint8_t next_index;
+} QEMU_PACKED DoeDiscoveryRsp;
+
+static bool pcie_doe_discovery(DOECap *doe_cap)
+{
+DoeDiscoveryReq *req = pcie_doe_get_write_mbox_ptr(doe_cap);
+DoeDiscoveryRsp rsp;
+uint8_t index = req->index;
+DOEProtocol *prot;
+
+/* Discard request if length does not match DoeDiscoveryReq */
+if (pcie_doe_get_obj_len(req) <
+DIV_ROUND_UP(sizeof(DoeDiscoveryReq), DWORD_BYTE)) {
+return false;
+}
+
+rsp.header = (DOEHeader) {
+.vendor_id = PCI_VENDOR_ID_PCI_SIG,
+.data_obj_type = PCI_SIG_DOE_DISCOVERY,
+.length = DIV_ROUND_UP(sizeof(DoeDiscoveryRsp), DWORD_BYTE),
+};
+
+/* Point to the requested protocol, index 0 must be Discovery */
+if (index == 0) {
+rsp.vendor_id = PCI_VENDOR_ID_PCI_SIG;
+rsp.data_obj_type = PCI_SIG_DOE_DISCOVERY;
+} else {
+if (index < doe_cap->protocol_num) {
+prot = &doe_cap->protocols[index - 1];
+rsp.vendor_id = prot->vendor_id;
+rsp.data_obj_type = prot->data_obj_type;
+} else {
+rsp.vendor_id = 0x;
+rsp.data_obj_type = 0xFF;
+}
+}
+
+if (index + 1 == doe_cap->protocol_num) {
+rsp.next_index = 0;
+} else {
+rsp.next_index = index + 1;
+}
+
+pcie_doe_set_rsp(doe_cap, &rsp);
+
+return true;
+}
+
+static void pcie_doe_reset_mbox(DOECap *st)
+{
+st->read_mbox_idx = 0;
+st->read_mbox_len = 0;
+st->write_mbox_len = 0;
+
+memset(st->read_mbox, 0, PCI_DOE_DW_SIZE_MAX * DWORD_BYTE);
+memset(st->write_mbox, 0, PCI_DOE_DW_SIZE_MAX * DWORD_BYTE);
+}
+
+void pcie_doe_init(PCIDevice *dev, DOECap *doe_cap, uint16_t offset,
+   DOEProtocol *protocols, bool intr, uint16_t vec)
+{
+pcie_a

[PATCH v8 5/5] hw/pci-bridge/cxl-upstream: Add a CDAT table access DOE

This Data Object Exchange Mailbox allows software to query the
latency and bandwidth between ports on the switch. For now
only provide information on routes between the upstream port and
each downstream port (not p2p).

Signed-off-by: Jonathan Cameron 

--
Changes since v7:
- Moved to enum for cdat_table elements to make sizing explicit.
- Handle memory allocation failures that would have been ignored
  previously.
---
 hw/pci-bridge/cxl_upstream.c | 194 ++-
 include/hw/cxl/cxl_cdat.h|   1 +
 2 files changed, 194 insertions(+), 1 deletion(-)

diff --git a/hw/pci-bridge/cxl_upstream.c b/hw/pci-bridge/cxl_upstream.c
index a83a3e81e4..f2fc22388f 100644
--- a/hw/pci-bridge/cxl_upstream.c
+++ b/hw/pci-bridge/cxl_upstream.c
@@ -10,11 +10,12 @@
 
 #include "qemu/osdep.h"
 #include "qemu/log.h"
+#include "hw/qdev-properties.h"
 #include "hw/pci/msi.h"
 #include "hw/pci/pcie.h"
 #include "hw/pci/pcie_port.h"
 
-#define CXL_UPSTREAM_PORT_MSI_NR_VECTOR 1
+#define CXL_UPSTREAM_PORT_MSI_NR_VECTOR 2
 
 #define CXL_UPSTREAM_PORT_MSI_OFFSET 0x70
 #define CXL_UPSTREAM_PORT_PCIE_CAP_OFFSET 0x90
@@ -28,6 +29,7 @@ typedef struct CXLUpstreamPort {
 
 /*< public >*/
 CXLComponentState cxl_cstate;
+DOECap doe_cdat;
 } CXLUpstreamPort;
 
 CXLComponentState *cxl_usp_to_cstate(CXLUpstreamPort *usp)
@@ -60,6 +62,9 @@ static void cxl_usp_dvsec_write_config(PCIDevice *dev, 
uint32_t addr,
 static void cxl_usp_write_config(PCIDevice *d, uint32_t address,
  uint32_t val, int len)
 {
+CXLUpstreamPort *usp = CXL_USP(d);
+
+pcie_doe_write_config(&usp->doe_cdat, address, val, len);
 pci_bridge_write_config(d, address, val, len);
 pcie_cap_flr_write_config(d, address, val, len);
 pcie_aer_write_config(d, address, val, len);
@@ -67,6 +72,18 @@ static void cxl_usp_write_config(PCIDevice *d, uint32_t 
address,
 cxl_usp_dvsec_write_config(d, address, val, len);
 }
 
+static uint32_t cxl_usp_read_config(PCIDevice *d, uint32_t address, int len)
+{
+CXLUpstreamPort *usp = CXL_USP(d);
+uint32_t val;
+
+if (pcie_doe_read_config(&usp->doe_cdat, address, len, &val)) {
+return val;
+}
+
+return pci_default_read_config(d, address, len);
+}
+
 static void latch_registers(CXLUpstreamPort *usp)
 {
 uint32_t *reg_state = usp->cxl_cstate.crb.cache_mem_registers;
@@ -119,6 +136,166 @@ static void build_dvsecs(CXLComponentState *cxl)
REG_LOC_DVSEC_REVID, dvsec);
 }
 
+static bool cxl_doe_cdat_rsp(DOECap *doe_cap)
+{
+CDATObject *cdat = &CXL_USP(doe_cap->pdev)->cxl_cstate.cdat;
+uint16_t ent;
+void *base;
+uint32_t len;
+CDATReq *req = pcie_doe_get_write_mbox_ptr(doe_cap);
+CDATRsp rsp;
+
+cxl_doe_cdat_update(&CXL_USP(doe_cap->pdev)->cxl_cstate, &error_fatal);
+assert(cdat->entry_len);
+
+/* Discard if request length mismatched */
+if (pcie_doe_get_obj_len(req) <
+DIV_ROUND_UP(sizeof(CDATReq), sizeof(uint32_t))) {
+return false;
+}
+
+ent = req->entry_handle;
+base = cdat->entry[ent].base;
+len = cdat->entry[ent].length;
+
+rsp = (CDATRsp) {
+.header = {
+.vendor_id = CXL_VENDOR_ID,
+.data_obj_type = CXL_DOE_TABLE_ACCESS,
+.reserved = 0x0,
+.length = DIV_ROUND_UP((sizeof(rsp) + len), sizeof(uint32_t)),
+},
+.rsp_code = CXL_DOE_TAB_RSP,
+.table_type = CXL_DOE_TAB_TYPE_CDAT,
+.entry_handle = (ent < cdat->entry_len - 1) ?
+ent + 1 : CXL_DOE_TAB_ENT_MAX,
+};
+
+memcpy(doe_cap->read_mbox, &rsp, sizeof(rsp));
+memcpy(doe_cap->read_mbox + DIV_ROUND_UP(sizeof(rsp), 
sizeof(uint32_t)),
+   base, len);
+
+doe_cap->read_mbox_len += rsp.header.length;
+
+return true;
+}
+
+static DOEProtocol doe_cdat_prot[] = {
+{ CXL_VENDOR_ID, CXL_DOE_TABLE_ACCESS, cxl_doe_cdat_rsp },
+{ }
+};
+
+static int build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
+{
+enum {
+SSLBIS_LATENCY,
+SSLBIS_BANDWIDTH,
+NUM_ENTRIES
+};
+g_autofree CDATSslbis *sslbis_latency = NULL;
+g_autofree CDATSslbis *sslbis_bandwidth = NULL;
+CXLUpstreamPort *us = CXL_USP(priv);
+PCIBus *bus = &PCI_BRIDGE(us)->sec_bus;
+int devfn, sslbis_size, i;
+int count = 0;
+uint16_t port_ids[256];
+
+for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) {
+PCIDevice *d = bus->devices[devfn];
+PCIEPort *port;
+
+if (!d || !pci_is_express(d) || !d->exp.exp_cap) {
+continue;
+}
+
+/*
+ * Whilst the PCI express spec doesn't allow anything other than
+ * downstream ports on this bus, let us be a little paranoid
+ */
+if (!object_dynamic_cast(OBJECT(d), TYPE_PCIE_PORT)) {
+continue;
+}
+
+port = PCIE_PORT(d);
+port_ids[count] = port->port;
+cou

[PATCH v8 3/5] hw/cxl/cdat: CXL CDAT Data Object Exchange implementation

From: Huai-Cheng Kuo 

The Data Object Exchange implementation of CXL Coherent Device Attribute
Table (CDAT). This implementation is referring to "Coherent Device
Attribute Table Specification, Rev. 1.03, July. 2022" and "Compute
Express Link Specification, Rev. 3.0, July. 2022"

This patch adds core support that will be shared by both
end-points and switch port emulation.

Signed-off-by: Huai-Cheng Kuo 
Signed-off-by: Chris Browy 
Signed-off-by: Jonathan Cameron 

---
Changes since v7:
- Up rev spec references and version number.
Changes since RFC:
- Split out libary code from specific device.
---
 hw/cxl/cxl-cdat.c  | 224 +
 hw/cxl/meson.build |   1 +
 include/hw/cxl/cxl_cdat.h  | 165 
 include/hw/cxl/cxl_component.h |   7 ++
 include/hw/cxl/cxl_device.h|   3 +
 include/hw/cxl/cxl_pci.h   |   1 +
 6 files changed, 401 insertions(+)

diff --git a/hw/cxl/cxl-cdat.c b/hw/cxl/cxl-cdat.c
new file mode 100644
index 00..3653aa56f0
--- /dev/null
+++ b/hw/cxl/cxl-cdat.c
@@ -0,0 +1,224 @@
+/*
+ * CXL CDAT Structure
+ *
+ * Copyright (C) 2021 Avery Design Systems, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/pci/pci.h"
+#include "hw/cxl/cxl.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+
+static void cdat_len_check(CDATSubHeader *hdr, Error **errp)
+{
+assert(hdr->length);
+assert(hdr->reserved == 0);
+
+switch (hdr->type) {
+case CDAT_TYPE_DSMAS:
+assert(hdr->length == sizeof(CDATDsmas));
+break;
+case CDAT_TYPE_DSLBIS:
+assert(hdr->length == sizeof(CDATDslbis));
+break;
+case CDAT_TYPE_DSMSCIS:
+assert(hdr->length == sizeof(CDATDsmscis));
+break;
+case CDAT_TYPE_DSIS:
+assert(hdr->length == sizeof(CDATDsis));
+break;
+case CDAT_TYPE_DSEMTS:
+assert(hdr->length == sizeof(CDATDsemts));
+break;
+case CDAT_TYPE_SSLBIS:
+assert(hdr->length >= sizeof(CDATSslbisHeader));
+assert((hdr->length - sizeof(CDATSslbisHeader)) %
+   sizeof(CDATSslbe) == 0);
+break;
+default:
+error_setg(errp, "Type %d is reserved", hdr->type);
+}
+}
+
+static void ct3_build_cdat(CDATObject *cdat, Error **errp)
+{
+g_autofree CDATTableHeader *cdat_header = NULL;
+g_autofree CDATEntry *cdat_st = NULL;
+uint8_t sum = 0;
+int ent, i;
+
+/* Use default table if fopen == NULL */
+assert(cdat->build_cdat_table);
+
+cdat_header = g_malloc0(sizeof(*cdat_header));
+if (!cdat_header) {
+error_setg(errp, "Failed to allocate CDAT header");
+return;
+}
+
+cdat->built_buf_len = cdat->build_cdat_table(&cdat->built_buf, 
cdat->private);
+
+if (!cdat->built_buf_len) {
+/* Build later as not all data available yet */
+cdat->to_update = true;
+return;
+}
+cdat->to_update = false;
+
+cdat_st = g_malloc0(sizeof(*cdat_st) * (cdat->built_buf_len + 1));
+if (!cdat_st) {
+error_setg(errp, "Failed to allocate CDAT entry array");
+return;
+}
+
+/* Entry 0 for CDAT header, starts with Entry 1 */
+for (ent = 1; ent < cdat->built_buf_len + 1; ent++) {
+CDATSubHeader *hdr = cdat->built_buf[ent - 1];
+uint8_t *buf = (uint8_t *)cdat->built_buf[ent - 1];
+
+cdat_st[ent].base = hdr;
+cdat_st[ent].length = hdr->length;
+
+cdat_header->length += hdr->length;
+for (i = 0; i < hdr->length; i++) {
+sum += buf[i];
+}
+}
+
+/* CDAT header */
+cdat_header->revision = CXL_CDAT_REV;
+/* For now, no runtime updates */
+cdat_header->sequence = 0;
+cdat_header->length += sizeof(CDATTableHeader);
+sum += cdat_header->revision + cdat_header->sequence +
+cdat_header->length;
+/* Sum of all bytes including checksum must be 0 */
+cdat_header->checksum = ~sum + 1;
+
+cdat_st[0].base = g_steal_pointer(&cdat_header);
+cdat_st[0].length = sizeof(*cdat_header);
+cdat->entry_len = 1 + cdat->built_buf_len;
+cdat->entry = g_steal_pointer(&cdat_st);
+}
+
+static void ct3_load_cdat(CDATObject *cdat, Error **errp)
+{
+g_autofree CDATEntry *cdat_st = NULL;
+uint8_t sum = 0;
+int num_ent;
+int i = 0, ent = 1, file_size = 0;
+CDATSubHeader *hdr;
+FILE *fp = NULL;
+
+/* Read CDAT file and create its cache */
+fp = fopen(cdat->filename, "r");
+if (!fp) {
+error_setg(errp, "CDAT: Unable to open file");
+return;
+}
+
+fseek(fp, 0, SEEK_END);
+file_size = ftell(fp);
+fseek(fp, 0, SEEK_SET);
+cdat->buf = g_malloc0(file_size);
+
+if (fread(cdat->buf, file_size, 1, fp) == 0) {
+error_setg(errp, "CDAT: File read failed");
+return;
+}
+
+fclose(fp);
+
+

[PATCH v8 2/5] hw/mem/cxl-type3: Add MSIX support

This will be used by several upcoming patch sets so break it out
such that it doesn't matter which one lands first.

Signed-off-by: Jonathan Cameron 
---
 hw/mem/cxl_type3.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index a71bf1afeb..568c9d62f5 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -13,6 +13,7 @@
 #include "qemu/rcu.h"
 #include "sysemu/hostmem.h"
 #include "hw/cxl/cxl.h"
+#include "hw/pci/msix.h"
 
 /*
  * Null value of all Fs suggested by IEEE RA guidelines for use of
@@ -146,6 +147,8 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp)
 ComponentRegisters *regs = &cxl_cstate->crb;
 MemoryRegion *mr = ®s->component_registers;
 uint8_t *pci_conf = pci_dev->config;
+unsigned short msix_num = 1;
+int i;
 
 if (!cxl_setup_memory(ct3d, errp)) {
 return;
@@ -180,6 +183,12 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp)
  PCI_BASE_ADDRESS_SPACE_MEMORY |
  PCI_BASE_ADDRESS_MEM_TYPE_64,
  &ct3d->cxl_dstate.device_registers);
+
+/* MSI(-X) Initailization */
+msix_init_exclusive_bar(pci_dev, msix_num, 4, NULL);
+for (i = 0; i < msix_num; i++) {
+msix_vector_use(pci_dev, i);
+}
 }
 
 static void ct3_exit(PCIDevice *pci_dev)
-- 
2.37.2

Re: [PATCH] configure: Avoid using strings binary

2022-10-13 Thread Marc-André Lureau

Hi


On Thu, Oct 13, 2022 at 3:50 PM Peter Maydell 
wrote:

> On Thu, 13 Oct 2022 at 12:08, Daniel P. Berrangé 
> wrote:
> >
> > On Thu, Oct 13, 2022 at 11:39:34AM +0100, Peter Maydell wrote:
> > > On Thu, 13 Oct 2022 at 09:47, Michal Privoznik 
> wrote:
> > > >
> > > > When determining the endiandness of the target architecture we're
> > > > building for a small program is compiled, which in an obfuscated
> > > > way declares two strings. Then, we look which string is in
> > > > correct order (using strings binary) and deduct the endiandness.
> > > > But using the strings binary is problematic, because it's part of
> > > > toolchain (strings is just a symlink to
> > > > x86_64-pc-linux-gnu-strings or llvm-strings). And when
> > > > (cross-)compiling, it requires users to set the symlink to the
> > > > correct toolchain.
> > > >
> > > > Fortunately, we have a better alternative anyways. Since we
> > > > require either clang or gcc we can rely on macros they declare.
> > > >
> > > > Bug: https://bugs.gentoo.org/876933
> > > > Signed-off-by: Michal Privoznik 
> > >
> > > If we can determine this just by looking at C macros, does
> > > this really need to be a configure test at all ? Paolo?
> >
> > We don't need to rely on CLang / GCC macros either, as this
> > is exposed by GLib
> >
> > $ grep BYTE_ORDER /usr/lib64/glib-2.0/include/glibconfig.h
> > #define G_BYTE_ORDER G_LITTLE_ENDIAN
> >
> > IOW, any code that needs to know can do one of:
> >
> >   #if G_BYTE_ORDER == G_LITTLE_ENDIAN
> >
> >   #if G_BYTE_ORDER == G_BIG_ENDIAN
>
> It would be more consistent for configure to do this the same
> way that compiler.h does, though:
>
> #define HOST_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
>
>
Weird, it should have been introduced with commit e03b56863d ("Replace
config-time define HOST_WORDS_BIGENDIAN"), and it's part of commit
519655970 ("Move HOST_LONG_BITS to compiler.h")...probably my bad with a
rebase.

-- 
Marc-André Lureau

Re: [PATCH v4 2/7] qga: Move Linux-specific FS freeze/thaw code to a separate file

2022-10-13 Thread Marc-André Lureau

Hi

On Thu, Oct 13, 2022 at 1:23 PM Alexander Ivanov <
alexander.iva...@virtuozzo.com> wrote:

> In the next patches we are going to add FreeBSD support for QEMU Guest
> Agent. In the result, code in commands-posix.c will be too cumbersome.
>
> Move Linux-specific FS freeze/thaw code to a separate file commands-linux.c
> keeping common POSIX code in commands-posix.c.
>
> Reviewed-by: Konstantin Kostiuk 
> Reviewed-by: Marc-André Lureau 
> Signed-off-by: Alexander Ivanov 
> ---
>  qga/commands-common.h |  35 +
>  qga/commands-linux.c  | 286 +
>  qga/commands-posix.c  | 289 +++---
>  qga/meson.build   |   3 +
>  4 files changed, 340 insertions(+), 273 deletions(-)
>  create mode 100644 qga/commands-linux.c
>
> diff --git a/qga/commands-common.h b/qga/commands-common.h
> index d0e4a9696f..181fc330aa 100644
> --- a/qga/commands-common.h
> +++ b/qga/commands-common.h
> @@ -10,6 +10,40 @@
>  #define QGA_COMMANDS_COMMON_H
>
>  #include "qga-qapi-types.h"
> +#include "guest-agent-core.h"
> +#include "qemu/queue.h"
> +
> +#if defined(__linux__)
> +#include 
> +#ifdef FIFREEZE
> +#define CONFIG_FSFREEZE
> +#endif
> +#ifdef FITRIM
> +#define CONFIG_FSTRIM
> +#endif
> +#endif /* __linux__ */
> +
> +#if defined(CONFIG_FSFREEZE) || defined(CONFIG_FSTRIM)
> +typedef struct FsMount {
> +char *dirname;
> +char *devtype;
> +unsigned int devmajor, devminor;
> +QTAILQ_ENTRY(FsMount) next;
> +} FsMount;
> +
> +typedef QTAILQ_HEAD(FsMountList, FsMount) FsMountList;
> +
> +bool build_fs_mount_list(FsMountList *mounts, Error **errp);
> +void free_fs_mount_list(FsMountList *mounts);
> +#endif /* CONFIG_FSFREEZE || CONFIG_FSTRIM */
> +
> +#if defined(CONFIG_FSFREEZE)
> +int64_t qmp_guest_fsfreeze_do_freeze_list(bool has_mountpoints,
> +  strList *mountpoints,
> +  FsMountList mounts,
> +  Error **errp);
> +int qmp_guest_fsfreeze_do_thaw(Error **errp);
> +#endif /* CONFIG_FSFREEZE */
>
>  typedef struct GuestFileHandle GuestFileHandle;
>
> @@ -29,4 +63,5 @@ GuestFileRead *guest_file_read_unsafe(GuestFileHandle
> *gfh,
>   */
>  char *qga_get_host_name(Error **errp);
>
> +void ga_wait_child(pid_t pid, int *status, Error **errp);
>

This doesn't belong here, afaict.



>  #endif
> diff --git a/qga/commands-linux.c b/qga/commands-linux.c
> new file mode 100644
> index 00..214e408fcd
> --- /dev/null
> +++ b/qga/commands-linux.c
> @@ -0,0 +1,286 @@
> +/*
> + * QEMU Guest Agent Linux-specific command implementations
> + *
> + * Copyright IBM Corp. 2011
> + *
> + * Authors:
> + *  Michael Roth  
> + *  Michal Privoznik  
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or
> later.
> + * See the COPYING file in the top-level directory.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qapi/error.h"
> +#include "commands-common.h"
> +#include "cutils.h"
> +#include 
> +#include 
> +
> +#if defined(CONFIG_FSFREEZE) || defined(CONFIG_FSTRIM)
> +static int dev_major_minor(const char *devpath,
> +   unsigned int *devmajor, unsigned int *devminor)
> +{
> +struct stat st;
> +
> +*devmajor = 0;
> +*devminor = 0;
> +
> +if (stat(devpath, &st) < 0) {
> +slog("failed to stat device file '%s': %s", devpath,
> strerror(errno));
> +return -1;
> +}
> +if (S_ISDIR(st.st_mode)) {
> +/* It is bind mount */
> +return -2;
> +}
> +if (S_ISBLK(st.st_mode)) {
> +*devmajor = major(st.st_rdev);
> +*devminor = minor(st.st_rdev);
> +return 0;
> +}
> +return -1;
> +}
> +
> +static bool build_fs_mount_list_from_mtab(FsMountList *mounts, Error
> **errp)
> +{
> +struct mntent *ment;
> +FsMount *mount;
> +char const *mtab = "/proc/self/mounts";
> +FILE *fp;
> +unsigned int devmajor, devminor;
> +
> +fp = setmntent(mtab, "r");
> +if (!fp) {
> +error_setg(errp, "failed to open mtab file: '%s'", mtab);
> +return false;
> +}
> +
> +while ((ment = getmntent(fp))) {
> +/*
> + * An entry which device name doesn't start with a '/' is
> + * either a dummy file system or a network file system.
> + * Add special handling for smbfs and cifs as is done by
> + * coreutils as well.
> + */
> +if ((ment->mnt_fsname[0] != '/') ||
> +(strcmp(ment->mnt_type, "smbfs") == 0) ||
> +(strcmp(ment->mnt_type, "cifs") == 0)) {
> +continue;
> +}
> +if (dev_major_minor(ment->mnt_fsname, &devmajor, &devminor) ==
> -2) {
> +/* Skip bind mounts */
> +continue;
> +}
> +
> +mount = g_new0(FsMount, 1);
> +mount->dirname = g_strdup(ment->mnt_dir);
> +mount->devtype = g_strdup(ment->mnt_type);
> +mount->devmajor = de

Re: [RFC 7/7] migration: call qemu_savevm_state_pending_exact() with the guest stopped

2022-10-13 Thread Joao Martins

+Avihai, +Jason

On 03/10/2022 04:16, Juan Quintela wrote:
> HACK ahead.
> 
> There are devices that require the guest to be stopped to tell us what
> is the size of its state. 

"... and have transferred said device state" if we are talking current vfio.

We can't query size of the data_fd right now

It would need a @data_size in addition to @data_fd in
vfio_device_feature_mig_state, or getting fseek supported over the fd

> So we need to stop the vm "before" we
> cal the functions.
> 
> It is a hack because:
> - we are "starting" the guest again to stop it in migration_complete()
>   I know, I know, but it is not trivial to get all the information
>   easily to migration_complete(), so this hack.
> 

Could you expand on that? Naively, I was assuming that by 'all information' the
locally stored counters in migration_iteration_run() that aren't present in
MigrateState?

> - auto_converge test fails with this hack.  I think that it is related
>   to previous problem.  We start the guest when it is supposed to be
>   stopped for convergence reasons.
> 
> - All experiments that I did to do the proper thing failed with having
>   the iothread_locked() or try to unlock() it when not locked.
> 
> - several of the pending functions are using the iothread lock
>   themselves, so I need to split it to have two versions (one for the
>   _estimate() case with the iothread lock), and another for the
>   _exact() case without the iothread_lock().  I want comments about
>   this approach before I try to continue on this direction.
> 
> Signed-off-by: Juan Quintela 
> ---
>  migration/migration.c| 13 +
>  tests/qtest/migration-test.c |  3 ++-
>  2 files changed, 15 insertions(+), 1 deletion(-)
> 
> diff --git a/migration/migration.c b/migration/migration.c
> index 35e512887a..7374884818 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -3742,7 +3742,20 @@ static MigIterateState 
> migration_iteration_run(MigrationState *s)
>  trace_migrate_pending_estimate(pending_size, s->threshold_size, 
> pend_pre, pend_post);
>  
>  if (pend_pre <= s->threshold_size) {
> +int old_state = s->state;
> +qemu_mutex_lock_iothread();
> +// is this really necessary?  it works for me both ways.
> +qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
> +s->vm_was_running = runstate_is_running();
> +vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
> +qemu_mutex_unlock_iothread();
>  qemu_savevm_state_pending_exact(&pend_pre, &pend_post);
> +qemu_mutex_lock_iothread();
> +runstate_set(old_state);
> +if (s->vm_was_running) {
> +vm_start();
> +}
> +qemu_mutex_unlock_iothread();

Couldn't we just have an extra patch that just stores pend_pre and pending_size
in MigrateState, which would allow all this check to be moved into
migration_completion(). Or maybe that wasn't an option for some other reason?

Additionally what about having a migration helper function that
vfio_save_complete_precopy() callback needs to use into to check if the
expected-device state size meets the threshold/downtime as it is saving the
device state and otherwise fail earlier accordingly when saving beyond the
threshold?

It would allow supporting both the (current UAPI) case where you need to
transfer the state to get device state size (so checking against threshold_size
pending_pre constantly would allow to not violate the SLA) as well as any other
UAPI improvement to fseek()/data_size that lets you fail even earlier.

Seems like at least it keeps some of the rules (both under iothread lock) as
this patch

[PATCH 00/24] More coroutine_fn fixes

Most of these were extracted from Alberto's static analysis series.
After this series, the only errors reported by the analyzer are:

- a call to bs->drv->bdrv_co_drain_begin from bdrv_open_driver.  This
  relies on bdrv_co_drain_begin not having to yield, which is indeed the
  case right after opening but is iffy

- assigning coroutine_fn to non-coroutine_fn in the human monitor
  for the two coroutine commands screendump and block_resize.

Paolo

Alberto Faria (20):
  backup: remove incorrect coroutine_fn annotation
  block: remove incorrect coroutine_fn annotation
  monitor: add missing coroutine_fn annotation
  ssh: add missing coroutine_fn annotation
  block: add missing coroutine_fn annotation to prototypes
  coroutine-lock: add missing coroutine_fn annotation to prototypes
  coroutine-io: add missing coroutine_fn annotation to prototypes
  block: add missing coroutine_fn annotation to BlockDriverState
callbacks
  qcow2: add coroutine_fn annotation for indirect-called functions
  commit: switch to *_co_* functions
  block: switch to *_co_* functions
  mirror: switch to *_co_* functions
  parallels: switch to *_co_* functions
  qcow: switch to *_co_* functions
  qcow2: switch to *_co_* functions
  qed: switch to *_co_* functions
  vdi: switch to *_co_* functions
  vhdx: switch to *_co_* functions
  vmdk: switch to *_co_* functions
  monitor: switch to *_co_* functions

Paolo Bonzini (4):
  blkdebug: add missing coroutine_fn annotation for indirect-called
functions
  qcow: manually add more coroutine_fn annotations
  qcow2: manually add more coroutine_fn annotations
  vmdk: manually add more coroutine_fn annotations

 block.c  |  2 +-
 block/backup.c   |  2 +-
 block/blkdebug.c |  2 +-
 block/commit.c   |  2 +-
 block/io.c   |  8 +--
 block/mirror.c   |  4 +-
 block/monitor/block-hmp-cmds.c   |  2 +-
 block/parallels.c| 28 +-
 block/qcow.c | 56 ++--
 block/qcow2-bitmap.c |  4 +-
 block/qcow2-cluster.c| 26 -
 block/qcow2-refcount.c   | 18 +++
 block/qcow2-snapshot.c   |  6 +--
 block/qcow2.c| 32 ++--
 block/qcow2.h| 32 ++--
 block/qed-table.c|  2 +-
 block/qed.c  | 12 ++---
 block/ssh.c  |  6 +--
 block/vdi.c  | 17 +++---
 block/vhdx.c |  8 +--
 block/vmdk.c | 90 
 blockdev.c   |  2 +-
 include/block/block-hmp-cmds.h   |  2 +-
 include/block/block-io.h |  5 +-
 include/block/block_int-common.h | 12 ++---
 include/monitor/hmp.h|  3 +-
 include/qemu/coroutine.h | 18 ---
 27 files changed, 202 insertions(+), 199 deletions(-)

-- 
2.37.3

[PATCH 02/24] block: remove incorrect coroutine_fn annotation

From: Alberto Faria 

Signed-off-by: Alberto Faria 
Signed-off-by: Paolo Bonzini 
---
 block/io.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/io.c b/block/io.c
index d30073036e..236b12da2a 100644
--- a/block/io.c
+++ b/block/io.c
@@ -2739,8 +2739,8 @@ int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState 
*bs, int64_t offset,
 return (pnum == bytes) && (ret & BDRV_BLOCK_ZERO);
 }
 
-int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
-   int64_t bytes, int64_t *pnum)
+int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes,
+  int64_t *pnum)
 {
 int ret;
 int64_t dummy;
-- 
2.37.3

Re: [PATCH v7 4/5] hw/mem/cxl-type3: Add CXL CDAT Data Object Exchange

2022-10-13 Thread Gregory Price



fwiw this is what my function looked like after the prior changes, very
similar to yours proposed below

static int ct3_build_cdat_table(CDATSubHeader ***cdat_table,
void *priv)
{
CXLType3Dev *ct3d = priv;
MemoryRegion *vmr = NULL, *pmr = NULL;
uint64_t dpa_base = 0;
int dsmad_handle = 0;
int num_ents = 0;
int cur_ent = 0;
int ret = 0;

if (ct3d->hostvmem) {
vmr = host_memory_backend_get_memory(ct3d->hostvmem);
if (!vmr)
return -EINVAL;
num_ents += CT3_CDAT_SUBTABLE_SIZE;
}
if (ct3d->hostpmem) {
pmr = host_memory_backend_get_memory(ct3d->hostpmem);
if (!pmr)
return -EINVAL;
num_ents += CT3_CDAT_SUBTABLE_SIZE;
}
if (!num_ents) {
return 0;
}

*cdat_table = g_malloc0(num_ents * sizeof(*cdat_table));
if (!*cdat_table) {
return -ENOMEM;
}

/* Volatile aspects are mapped first */
if (vmr) {
ret = ct3_build_cdat_subtable(*cdat_table, vmr, dsmad_handle++,
  false, dpa_base);
if (ret < 0) {
goto error_cleanup;
}
dpa_base = vmr->size;
cur_ent += ret;
}
/* Non volatile aspects */
if (pmr) {
/* non-volatile entries follow the volatile entries */
ret = ct3_build_cdat_subtable(&(*cdat_table)[cur_ent], pmr,
  dsmad_handle, true, dpa_base);
if (ret < 0) {
goto error_cleanup;
}
cur_ent += ret;
}
assert(cur_ent == num_ents);

return ret;
error_cleanup:
int i;
for (i = 0; i < num_ents; i++) {
g_free(*cdat_table[i]);
}
g_free(*cdat_table);
return ret;
}


On Thu, Oct 13, 2022 at 12:53:13PM +0100, Jonathan Cameron wrote:
> On Thu, 13 Oct 2022 07:36:28 -0400
> Gregory Price  wrote:
> 
> > Reading through your notes, everything seems reasonable, though I'm not
> > sure I agree with the two pass notion, though I'll wait to see the patch
> > set.
> > 
> > The enum is a good idea, *forehead slap*, I should have done it.  If we
> > have a local enum, why not just make it global (within the file) and
> > allocate the table as I have once we know how many MRs are present?
> 
> It's not global as we need the entries to be packed.  So if just one mr
> (which ever one) the entries for that need to be at the beginning of
> cdat_table.  I also don't want to bake into the outer caller that the
> entries will always be the same size for different MRs.
> 
> For the two pass case...
> 
> I'll send code in a few mins, but in meantime my thought is that
> the extended code for volatile + non volatile will looks something like:
> (variable names made up)
> 
>   if (ct3d->volatile_mem) {
>   volatile_mr = 
> host_memory_backend_get_memory(ct3d->volatile_mem);
>   if (!volatile_mr) {
>   return -ENINVAL;
>   }
>   rc = ct3_build_cdat_entries_for_mr(NULL, dsmad++, volatile_mr);
>   if (rc < 0) {
>   return rc;
>   }
>   volatile_len = rc;
>   }
> 
>   if (ct3d->nonvolatile_mem) {
>   nonvolatile_mr = 
> host_memory_backend_get_memory(ct3d->nonvolatile_mem);
>   if (!nonvolatile_mr) {
>   return -ENINVAL;
>   }
>   rc = ct3_build_cdat_entries_for_mr(NULL, dmsmad++, 
> nonvolatile_mr);
>   if (rc < 0) {
>   return rc;
>   }
>   nonvolatile_len = rc;
>   }
> 
>   dsmad = 0;
> 
>   table = g_malloc(0, (volatile_len + nonvolatile_len) * sizeof(*table));
>   if (!table) {
>   return -ENOMEM;
>   }
>   
>   if (volatile_len) {
>   rc = ct3_build_cdat_entries_for_mr(&table[0], dmsad++, 
> volatile_mr);
>   if (rc < 0) {
>   return rc;
>   }
>   }   
>   if (nonvolatile_len) {
>   rc = ct3_build_cdat_entries_for_mr(&table[volatile_len], 
> dsmad++, nonvolatile_mr...);
>   if (rc < 0) {
>   /* Only place we need error handling.  Could make it 
> more generic of course */
>   for (i = 0; i < volatile_len; i++) {
>   g_free(cdat_table[i]);
>   }
>   return rc;
>   }
>   }
> 
>   *cdat_table = g_steal_pointer(&table);
> 
> 
> Jonathan
> 
> > 
> > 6 eggs/half dozen though, I'm ultimately fine with either.
> > 
> > On Thu, Oct 13, 2022, 4:58 AM Jonathan Cameron 
> > wrote:
> > 
> > > On Wed, 12 Oct 2022 14:21:15 -0400
> > > Gregory Price  wrote:
> > >  
> > > > Included in this response is a recommended patch set on top of this
> > > > patch that resolves a number of issues, including style and a heap
> > > > corruption bug.
> >

[PATCH 03/24] monitor: add missing coroutine_fn annotation

From: Alberto Faria 

hmp_block_resize and hmp_screendump are defined as a ".coroutine = true" 
command,
so they must be coroutine_fn.

Signed-off-by: Alberto Faria 
Signed-off-by: Paolo Bonzini 
---
 block/monitor/block-hmp-cmds.c | 2 +-
 include/block/block-hmp-cmds.h | 2 +-
 include/monitor/hmp.h  | 3 ++-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index 939a520d17..b6135e9bfe 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -489,7 +489,7 @@ void hmp_nbd_server_stop(Monitor *mon, const QDict *qdict)
 hmp_handle_error(mon, err);
 }
 
-void hmp_block_resize(Monitor *mon, const QDict *qdict)
+void coroutine_fn hmp_block_resize(Monitor *mon, const QDict *qdict)
 {
 const char *device = qdict_get_str(qdict, "device");
 int64_t size = qdict_get_int(qdict, "size");
diff --git a/include/block/block-hmp-cmds.h b/include/block/block-hmp-cmds.h
index 50ce0247c3..ba0593c440 100644
--- a/include/block/block-hmp-cmds.h
+++ b/include/block/block-hmp-cmds.h
@@ -38,7 +38,7 @@ void hmp_nbd_server_add(Monitor *mon, const QDict *qdict);
 void hmp_nbd_server_remove(Monitor *mon, const QDict *qdict);
 void hmp_nbd_server_stop(Monitor *mon, const QDict *qdict);
 
-void hmp_block_resize(Monitor *mon, const QDict *qdict);
+void coroutine_fn hmp_block_resize(Monitor *mon, const QDict *qdict);
 void hmp_block_stream(Monitor *mon, const QDict *qdict);
 void hmp_block_passwd(Monitor *mon, const QDict *qdict);
 void hmp_block_set_io_throttle(Monitor *mon, const QDict *qdict);
diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
index a9cf064ee8..dfbc0c9a2f 100644
--- a/include/monitor/hmp.h
+++ b/include/monitor/hmp.h
@@ -15,6 +15,7 @@
 #define HMP_H
 
 #include "qemu/readline.h"
+#include "qemu/coroutine.h"
 #include "qapi/qapi-types-common.h"
 
 bool hmp_handle_error(Monitor *mon, Error *err);
@@ -81,7 +82,7 @@ void hmp_netdev_del(Monitor *mon, const QDict *qdict);
 void hmp_getfd(Monitor *mon, const QDict *qdict);
 void hmp_closefd(Monitor *mon, const QDict *qdict);
 void hmp_sendkey(Monitor *mon, const QDict *qdict);
-void hmp_screendump(Monitor *mon, const QDict *qdict);
+void coroutine_fn hmp_screendump(Monitor *mon, const QDict *qdict);
 void hmp_chardev_add(Monitor *mon, const QDict *qdict);
 void hmp_chardev_change(Monitor *mon, const QDict *qdict);
 void hmp_chardev_remove(Monitor *mon, const QDict *qdict);
-- 
2.37.3

[PATCH 07/24] coroutine-io: add missing coroutine_fn annotation to prototypes

From: Alberto Faria 

Signed-off-by: Alberto Faria 
Signed-off-by: Paolo Bonzini 
---
 include/qemu/coroutine.h | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
index d848489b65..06d323143c 100644
--- a/include/qemu/coroutine.h
+++ b/include/qemu/coroutine.h
@@ -378,8 +378,9 @@ void qemu_coroutine_dec_pool_size(unsigned int 
additional_pool_size);
  * The same interface as qemu_sendv_recvv(), with added yielding.
  * XXX should mark these as coroutine_fn
  */
-ssize_t qemu_co_sendv_recvv(int sockfd, struct iovec *iov, unsigned iov_cnt,
-size_t offset, size_t bytes, bool do_send);
+ssize_t coroutine_fn qemu_co_sendv_recvv(int sockfd, struct iovec *iov,
+ unsigned iov_cnt, size_t offset,
+ size_t bytes, bool do_send);
 #define qemu_co_recvv(sockfd, iov, iov_cnt, offset, bytes) \
   qemu_co_sendv_recvv(sockfd, iov, iov_cnt, offset, bytes, false)
 #define qemu_co_sendv(sockfd, iov, iov_cnt, offset, bytes) \
@@ -388,7 +389,8 @@ ssize_t qemu_co_sendv_recvv(int sockfd, struct iovec *iov, 
unsigned iov_cnt,
 /**
  * The same as above, but with just a single buffer
  */
-ssize_t qemu_co_send_recv(int sockfd, void *buf, size_t bytes, bool do_send);
+ssize_t coroutine_fn qemu_co_send_recv(int sockfd, void *buf, size_t bytes,
+   bool do_send);
 #define qemu_co_recv(sockfd, buf, bytes) \
   qemu_co_send_recv(sockfd, buf, bytes, false)
 #define qemu_co_send(sockfd, buf, bytes) \
-- 
2.37.3

[PATCH 04/24] ssh: add missing coroutine_fn annotation

From: Alberto Faria 

ssh_write is only called from ssh_co_writev.

Signed-off-by: Alberto Faria 
Signed-off-by: Paolo Bonzini 
---
 block/ssh.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/block/ssh.c b/block/ssh.c
index a2dc646536..ceb4f4c5bc 100644
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -1129,9 +1129,9 @@ static coroutine_fn int ssh_co_readv(BlockDriverState *bs,
 return ret;
 }
 
-static int ssh_write(BDRVSSHState *s, BlockDriverState *bs,
- int64_t offset, size_t size,
- QEMUIOVector *qiov)
+static coroutine_fn int ssh_write(BDRVSSHState *s, BlockDriverState *bs,
+  int64_t offset, size_t size,
+  QEMUIOVector *qiov)
 {
 ssize_t r;
 size_t written;
-- 
2.37.3

[PATCH 06/24] coroutine-lock: add missing coroutine_fn annotation to prototypes

From: Alberto Faria 

The functions are marked coroutine_fn in the definition.

Signed-off-by: Alberto Faria 
Signed-off-by: Paolo Bonzini 
---
 include/qemu/coroutine.h | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
index aae33cce17..d848489b65 100644
--- a/include/qemu/coroutine.h
+++ b/include/qemu/coroutine.h
@@ -276,7 +276,7 @@ void qemu_co_rwlock_init(CoRwlock *lock);
  * of a parallel writer, control is transferred to the caller of the current
  * coroutine.
  */
-void qemu_co_rwlock_rdlock(CoRwlock *lock);
+void coroutine_fn qemu_co_rwlock_rdlock(CoRwlock *lock);
 
 /**
  * Write Locks the CoRwlock from a reader.  This is a bit more efficient than
@@ -285,7 +285,7 @@ void qemu_co_rwlock_rdlock(CoRwlock *lock);
  * to the caller of the current coroutine; another writer might run while
  * @qemu_co_rwlock_upgrade blocks.
  */
-void qemu_co_rwlock_upgrade(CoRwlock *lock);
+void coroutine_fn qemu_co_rwlock_upgrade(CoRwlock *lock);
 
 /**
  * Downgrades a write-side critical section to a reader.  Downgrading with
@@ -293,20 +293,20 @@ void qemu_co_rwlock_upgrade(CoRwlock *lock);
  * followed by @qemu_co_rwlock_rdlock.  This makes it more efficient, but
  * may also sometimes be necessary for correctness.
  */
-void qemu_co_rwlock_downgrade(CoRwlock *lock);
+void coroutine_fn qemu_co_rwlock_downgrade(CoRwlock *lock);
 
 /**
  * Write Locks the mutex. If the lock cannot be taken immediately because
  * of a parallel reader, control is transferred to the caller of the current
  * coroutine.
  */
-void qemu_co_rwlock_wrlock(CoRwlock *lock);
+void coroutine_fn qemu_co_rwlock_wrlock(CoRwlock *lock);
 
 /**
  * Unlocks the read/write lock and schedules the next coroutine that was
  * waiting for this lock to be run.
  */
-void qemu_co_rwlock_unlock(CoRwlock *lock);
+void coroutine_fn qemu_co_rwlock_unlock(CoRwlock *lock);
 
 typedef struct QemuCoSleep {
 Coroutine *to_wake;
-- 
2.37.3

[PATCH 13/24] vmdk: manually add more coroutine_fn annotations

The validity of these was double-checked with Alberto Faria's static analyzer.

Signed-off-by: Paolo Bonzini 
---
 block/vmdk.c | 34 +-
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/block/vmdk.c b/block/vmdk.c
index f7d8856dfb..c720376aa5 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -1404,13 +1404,13 @@ static void vmdk_refresh_limits(BlockDriverState *bs, 
Error **errp)
  * [@skip_start_sector, @skip_end_sector) is not copied or written, and leave
  * it for call to write user data in the request.
  */
-static int get_whole_cluster(BlockDriverState *bs,
- VmdkExtent *extent,
- uint64_t cluster_offset,
- uint64_t offset,
- uint64_t skip_start_bytes,
- uint64_t skip_end_bytes,
- bool zeroed)
+static int coroutine_fn get_whole_cluster(BlockDriverState *bs,
+  VmdkExtent *extent,
+  uint64_t cluster_offset,
+  uint64_t offset,
+  uint64_t skip_start_bytes,
+  uint64_t skip_end_bytes,
+  bool zeroed)
 {
 int ret = VMDK_OK;
 int64_t cluster_bytes;
@@ -1485,8 +1485,8 @@ exit:
 return ret;
 }
 
-static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data,
- uint32_t offset)
+static int coroutine_fn vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data,
+  uint32_t offset)
 {
 offset = cpu_to_le32(offset);
 /* update L2 table */
@@ -1537,14 +1537,14 @@ static int vmdk_L2update(VmdkExtent *extent, 
VmdkMetaData *m_data,
  *  VMDK_UNALLOC if cluster is not mapped and @allocate is false.
  *  VMDK_ERROR if failed.
  */
-static int get_cluster_offset(BlockDriverState *bs,
-  VmdkExtent *extent,
-  VmdkMetaData *m_data,
-  uint64_t offset,
-  bool allocate,
-  uint64_t *cluster_offset,
-  uint64_t skip_start_bytes,
-  uint64_t skip_end_bytes)
+static int coroutine_fn get_cluster_offset(BlockDriverState *bs,
+   VmdkExtent *extent,
+   VmdkMetaData *m_data,
+   uint64_t offset,
+   bool allocate,
+   uint64_t *cluster_offset,
+   uint64_t skip_start_bytes,
+   uint64_t skip_end_bytes)
 {
 unsigned int l1_index, l2_offset, l2_index;
 int min_index, i, j;
-- 
2.37.3

[PATCH 08/24] block: add missing coroutine_fn annotation to BlockDriverState callbacks

From: Alberto Faria 

Signed-off-by: Alberto Faria 
Signed-off-by: Paolo Bonzini 
---
 block/qcow2.h| 14 +++---
 include/block/block_int-common.h | 12 +---
 2 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/block/qcow2.h b/block/qcow2.h
index 3e7c5e80b6..ad6e7f65bd 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -991,13 +991,13 @@ int qcow2_truncate_bitmaps_check(BlockDriverState *bs, 
Error **errp);
 bool qcow2_store_persistent_dirty_bitmaps(BlockDriverState *bs,
   bool release_stored, Error **errp);
 int qcow2_reopen_bitmaps_ro(BlockDriverState *bs, Error **errp);
-bool qcow2_co_can_store_new_dirty_bitmap(BlockDriverState *bs,
- const char *name,
- uint32_t granularity,
- Error **errp);
-int qcow2_co_remove_persistent_dirty_bitmap(BlockDriverState *bs,
-const char *name,
-Error **errp);
+bool coroutine_fn qcow2_co_can_store_new_dirty_bitmap(BlockDriverState *bs,
+  const char *name,
+  uint32_t granularity,
+  Error **errp);
+int coroutine_fn qcow2_co_remove_persistent_dirty_bitmap(BlockDriverState *bs,
+ const char *name,
+ Error **errp);
 bool qcow2_supports_persistent_dirty_bitmap(BlockDriverState *bs);
 uint64_t qcow2_get_persistent_dirty_bitmap_size(BlockDriverState *bs,
 uint32_t cluster_size);
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index 8947abab76..16c45d1262 100644
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
@@ -731,13 +731,11 @@ struct BlockDriver {
 void coroutine_fn (*bdrv_co_drain_end)(BlockDriverState *bs);
 
 bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs);
-bool (*bdrv_co_can_store_new_dirty_bitmap)(BlockDriverState *bs,
-   const char *name,
-   uint32_t granularity,
-   Error **errp);
-int (*bdrv_co_remove_persistent_dirty_bitmap)(BlockDriverState *bs,
-  const char *name,
-  Error **errp);
+bool coroutine_fn (*bdrv_co_can_store_new_dirty_bitmap)(
+BlockDriverState *bs, const char *name, uint32_t granularity,
+Error **errp);
+int coroutine_fn (*bdrv_co_remove_persistent_dirty_bitmap)(
+BlockDriverState *bs, const char *name, Error **errp);
 };
 
 static inline bool block_driver_can_compress(BlockDriver *drv)
-- 
2.37.3

[PATCH 01/24] backup: remove incorrect coroutine_fn annotation

From: Alberto Faria 

The .set_speed callback is not called from coroutine.

Signed-off-by: Alberto Faria 
Signed-off-by: Paolo Bonzini 
---
 block/backup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/backup.c b/block/backup.c
index b2b649e305..6a9ad97a53 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -309,7 +309,7 @@ static void coroutine_fn backup_pause(Job *job)
 }
 }
 
-static void coroutine_fn backup_set_speed(BlockJob *job, int64_t speed)
+static void backup_set_speed(BlockJob *job, int64_t speed)
 {
 BackupBlockJob *s = container_of(job, BackupBlockJob, common);
 
-- 
2.37.3

[PATCH 10/24] blkdebug: add missing coroutine_fn annotation for indirect-called functions

Signed-off-by: Paolo Bonzini 
---
 block/blkdebug.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/blkdebug.c b/block/blkdebug.c
index bbf2948703..b159a9b825 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -672,7 +672,7 @@ blkdebug_co_pwritev(BlockDriverState *bs, int64_t offset, 
int64_t bytes,
 return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
 }
 
-static int blkdebug_co_flush(BlockDriverState *bs)
+static int coroutine_fn blkdebug_co_flush(BlockDriverState *bs)
 {
 int err = rule_check(bs, 0, 0, BLKDEBUG_IO_TYPE_FLUSH);
 
-- 
2.37.3

[PATCH 11/24] qcow: manually add more coroutine_fn annotations

get_cluster_offset() and decompress_cluster() are only called from
the read and write paths.

The validity of these was double-checked with Alberto Faria's static analyzer.

Signed-off-by: Paolo Bonzini 
---
 block/qcow.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/block/qcow.c b/block/qcow.c
index 311aaa8705..7f07c00c0f 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -92,7 +92,7 @@ typedef struct BDRVQcowState {
 
 static QemuOptsList qcow_create_opts;
 
-static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
+static int coroutine_fn decompress_cluster(BlockDriverState *bs, uint64_t 
cluster_offset);
 
 static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
 {
@@ -351,10 +351,10 @@ static int qcow_reopen_prepare(BDRVReopenState *state,
  * return 0 if not allocated, 1 if *result is assigned, and negative
  * errno on failure.
  */
-static int get_cluster_offset(BlockDriverState *bs,
-  uint64_t offset, int allocate,
-  int compressed_size,
-  int n_start, int n_end, uint64_t *result)
+static int coroutine_fn get_cluster_offset(BlockDriverState *bs,
+   uint64_t offset, int allocate,
+   int compressed_size,
+   int n_start, int n_end, uint64_t 
*result)
 {
 BDRVQcowState *s = bs->opaque;
 int min_index, i, j, l1_index, l2_index, ret;
@@ -585,7 +585,7 @@ static int decompress_buffer(uint8_t *out_buf, int 
out_buf_size,
 return 0;
 }
 
-static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
+static int coroutine_fn decompress_cluster(BlockDriverState *bs, uint64_t 
cluster_offset)
 {
 BDRVQcowState *s = bs->opaque;
 int ret, csize;
-- 
2.37.3

[PATCH 21/24] vdi: switch to _co_ functions

From: Alberto Faria 

Signed-off-by: Alberto Faria 
Signed-off-by: Paolo Bonzini 
---
 block/vdi.c | 17 +
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/block/vdi.c b/block/vdi.c
index e942325455..2ecf47216a 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -664,7 +664,8 @@ vdi_co_pwritev(BlockDriverState *bs, int64_t offset, 
int64_t bytes,
  * so this full-cluster write does not overlap a partial write
  * of the same cluster, issued from the "else" branch.
  */
-ret = bdrv_pwrite(bs->file, data_offset, s->block_size, block, 0);
+ret = bdrv_co_pwrite(bs->file, data_offset, s->block_size, block,
+ 0);
 qemu_co_rwlock_unlock(&s->bmap_lock);
 } else {
 nonallocating_write:
@@ -709,7 +710,7 @@ nonallocating_write:
 assert(VDI_IS_ALLOCATED(bmap_first));
 *header = s->header;
 vdi_header_to_le(header);
-ret = bdrv_pwrite(bs->file, 0, sizeof(*header), header, 0);
+ret = bdrv_co_pwrite(bs->file, 0, sizeof(*header), header, 0);
 g_free(header);
 
 if (ret < 0) {
@@ -726,8 +727,8 @@ nonallocating_write:
 base = ((uint8_t *)&s->bmap[0]) + bmap_first * SECTOR_SIZE;
 logout("will write %u block map sectors starting from entry %u\n",
n_sectors, bmap_first);
-ret = bdrv_pwrite(bs->file, offset * SECTOR_SIZE,
-  n_sectors * SECTOR_SIZE, base, 0);
+ret = bdrv_co_pwrite(bs->file, offset * SECTOR_SIZE,
+ n_sectors * SECTOR_SIZE, base, 0);
 }
 
 return ret;
@@ -845,7 +846,7 @@ static int coroutine_fn 
vdi_co_do_create(BlockdevCreateOptions *create_options,
 vdi_header_print(&header);
 }
 vdi_header_to_le(&header);
-ret = blk_pwrite(blk, offset, sizeof(header), &header, 0);
+ret = blk_co_pwrite(blk, offset, sizeof(header), &header, 0);
 if (ret < 0) {
 error_setg(errp, "Error writing header");
 goto exit;
@@ -866,7 +867,7 @@ static int coroutine_fn 
vdi_co_do_create(BlockdevCreateOptions *create_options,
 bmap[i] = VDI_UNALLOCATED;
 }
 }
-ret = blk_pwrite(blk, offset, bmap_size, bmap, 0);
+ret = blk_co_pwrite(blk, offset, bmap_size, bmap, 0);
 if (ret < 0) {
 error_setg(errp, "Error writing bmap");
 goto exit;
@@ -875,8 +876,8 @@ static int coroutine_fn 
vdi_co_do_create(BlockdevCreateOptions *create_options,
 }
 
 if (image_type == VDI_TYPE_STATIC) {
-ret = blk_truncate(blk, offset + blocks * block_size, false,
-   PREALLOC_MODE_OFF, 0, errp);
+ret = blk_co_truncate(blk, offset + blocks * block_size, false,
+  PREALLOC_MODE_OFF, 0, errp);
 if (ret < 0) {
 error_prepend(errp, "Failed to statically allocate file");
 goto exit;
-- 
2.37.3

[PATCH 05/24] block: add missing coroutine_fn annotation to prototypes

From: Alberto Faria 

The functions are marked coroutine_fn in the definition.

Signed-off-by: Alberto Faria 
Signed-off-by: Paolo Bonzini 
---
 include/block/block-io.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/include/block/block-io.h b/include/block/block-io.h
index 492f95fc05..770ddeb7c8 100644
--- a/include/block/block-io.h
+++ b/include/block/block-io.h
@@ -83,12 +83,13 @@ void bdrv_aio_cancel(BlockAIOCB *acb);
 void bdrv_aio_cancel_async(BlockAIOCB *acb);
 
 /* sg packet commands */
-int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf);
+int coroutine_fn bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf);
 
 /* Ensure contents are flushed to disk.  */
 int coroutine_fn bdrv_co_flush(BlockDriverState *bs);
 
-int bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes);
+int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
+  int64_t bytes);
 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs);
 int bdrv_block_status(BlockDriverState *bs, int64_t offset,
   int64_t bytes, int64_t *pnum, int64_t *map,
-- 
2.37.3

[PATCH 09/24] qcow2: add coroutine_fn annotation for indirect-called functions