Re: [PATCH v3 13/70] i386: Introduce tdx-guest object

2023-12-04 Thread Xiaoyao Li

On 12/1/2023 6:52 PM, Markus Armbruster wrote:

Xiaoyao Li  writes:


Introduce tdx-guest object which implements the interface of
CONFIDENTIAL_GUEST_SUPPORT, and will be used to create TDX VMs (TDs) by

   qemu -machine ...,confidential-guest-support=tdx0\
-object tdx-guest,id=tdx0

It has only one member 'attributes' with fixed value 0 and not
configurable so far.

Signed-off-by: Xiaoyao Li 
Acked-by: Gerd Hoffmann 
Acked-by: Markus Armbruster 


[...]


diff --git a/qapi/qom.json b/qapi/qom.json
index c53ef978ff7e..8e08257dac2f 100644
--- a/qapi/qom.json
+++ b/qapi/qom.json
@@ -878,6 +878,16 @@
  'reduced-phys-bits': 'uint32',
  '*kernel-hashes': 'bool' } }
  
+##

+# @TdxGuestProperties:
+#
+# Properties for tdx-guest objects.
+#
+# Since: 8.2


Going to be 9.0.


will update it and all others.

(I left it as 8.2 because I was not sure next version is 8.3 or 9.0)


+##
+{ 'struct': 'TdxGuestProperties',
+  'data': { }}
+
  ##
  # @ThreadContextProperties:
  #
@@ -956,6 +966,7 @@
  'sev-guest',
  'thread-context',
  's390-pv-guest',
+'tdx-guest',
  'throttle-group',
  'tls-creds-anon',
  'tls-creds-psk',
@@ -1022,6 +1033,7 @@
'secret_keyring': { 'type': 'SecretKeyringProperties',
'if': 'CONFIG_SECRET_KEYRING' },
'sev-guest':  'SevGuestProperties',
+  'tdx-guest':  'TdxGuestProperties',
'thread-context': 'ThreadContextProperties',
'throttle-group': 'ThrottleGroupProperties',
'tls-creds-anon': 'TlsCredsAnonProperties',


[...]







Re: [PATCH 2/3] hw/virtio: cleanup shared resources

2023-12-04 Thread Marc-André Lureau
Hi

On Tue, Nov 7, 2023 at 1:37 PM Albert Esteve  wrote:
>
> Ensure that we cleanup all virtio shared
> resources when the vhost devices is cleaned
> up (after a hot unplug, or a crash).
>
> To track all owned uuids of a device, add
> a GSList to the vhost_dev struct. This way
> we can avoid traversing the full table
> for every cleanup, whether they actually
> own any shared resource or not.
>
> Signed-off-by: Albert Esteve 
> ---
>  hw/virtio/vhost-user.c| 2 ++
>  hw/virtio/vhost.c | 4 
>  include/hw/virtio/vhost.h | 6 ++
>  3 files changed, 12 insertions(+)
>
> diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> index 5fdff0241f..04848d1fa0 100644
> --- a/hw/virtio/vhost-user.c
> +++ b/hw/virtio/vhost-user.c
> @@ -1598,6 +1598,7 @@ vhost_user_backend_handle_shared_object_add(struct 
> vhost_dev *dev,
>  QemuUUID uuid;
>
>  memcpy(uuid.data, object->uuid, sizeof(object->uuid));
> +dev->shared_uuids = g_slist_append(dev->shared_uuids, &uuid);

This will point to the stack variable.

>  return virtio_add_vhost_device(&uuid, dev);
>  }
>
> @@ -1623,6 +1624,7 @@ vhost_user_backend_handle_shared_object_remove(struct 
> vhost_dev *dev,
>  }
>
>  memcpy(uuid.data, object->uuid, sizeof(object->uuid));
> +dev->shared_uuids = g_slist_remove_all(dev->shared_uuids, &uuid);
>  return virtio_remove_resource(&uuid);
>  }
>
> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> index 9c9ae7109e..3aff94664b 100644
> --- a/hw/virtio/vhost.c
> +++ b/hw/virtio/vhost.c
> @@ -16,6 +16,7 @@
>  #include "qemu/osdep.h"
>  #include "qapi/error.h"
>  #include "hw/virtio/vhost.h"
> +#include "hw/virtio/virtio-dmabuf.h"
>  #include "qemu/atomic.h"
>  #include "qemu/range.h"
>  #include "qemu/error-report.h"
> @@ -1599,6 +1600,9 @@ void vhost_dev_cleanup(struct vhost_dev *hdev)
>  migrate_del_blocker(&hdev->migration_blocker);
>  g_free(hdev->mem);
>  g_free(hdev->mem_sections);
> +/* free virtio shared objects */
> +g_slist_foreach(hdev->shared_uuids, (GFunc)virtio_remove_resource, NULL);
> +g_slist_free_full(g_steal_pointer(&hdev->shared_uuids), g_free);

(and will crash here)

Imho, you should just traverse the hashtable, instead of introducing
another list.

>  if (hdev->vhost_ops) {
>  hdev->vhost_ops->vhost_backend_cleanup(hdev);
>  }
> diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
> index 5e8183f64a..376bc8446d 100644
> --- a/include/hw/virtio/vhost.h
> +++ b/include/hw/virtio/vhost.h
> @@ -118,6 +118,12 @@ struct vhost_dev {
>   */
>  uint64_t protocol_features;
>
> +/**
> + * @shared_uuids: contains the UUIDs of all the exported
> + * virtio objects owned by the vhost device.
> + */
> +GSList *shared_uuids;
> +
>  uint64_t max_queues;
>  uint64_t backend_cap;
>  /* @started: is the vhost device started? */
> --
> 2.41.0
>


-- 
Marc-André Lureau



Re: [PATCH for 8.2] hw/audio/virtio-sound: mark the device as unmigratable

2023-12-04 Thread Marc-André Lureau
Hi

On Mon, Dec 4, 2023 at 11:30 AM Volker Rümelin  wrote:
>
> The virtio-sound device is currently not migratable. QEMU crashes
> on the source machine at some point during the migration with a
> segmentation fault.
>
> Even with this bug fixed, the virtio-sound device doesn't migrate
> the state of the audio streams. For example, running streams leave
> the device on the destination machine in a broken condition.
>
> Mark the device as unmigratable until these issues have been fixed.
>
> Signed-off-by: Volker Rümelin 

Reviewed-by: Marc-André Lureau 

I'll queue this.

> ---
>  hw/audio/virtio-snd.c | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/hw/audio/virtio-snd.c b/hw/audio/virtio-snd.c
> index 2fe966e311..b10fad1228 100644
> --- a/hw/audio/virtio-snd.c
> +++ b/hw/audio/virtio-snd.c
> @@ -68,6 +68,7 @@ static const VMStateDescription vmstate_virtio_snd_device = 
> {
>
>  static const VMStateDescription vmstate_virtio_snd = {
>  .name = TYPE_VIRTIO_SND,
> +.unmigratable = 1,
>  .minimum_version_id = VIRTIO_SOUND_VM_VERSION,
>  .version_id = VIRTIO_SOUND_VM_VERSION,
>  .fields = (VMStateField[]) {
> --
> 2.35.3
>
>


-- 
Marc-André Lureau



Re: [PATCH for 8.2] hw/audio/virtio-sound: mark the device as unmigratable

2023-12-04 Thread Manos Pitsidianakis
On Mon, 4 Dec 2023 at 09:29, Volker Rümelin  wrote:
>
> The virtio-sound device is currently not migratable. QEMU crashes
> on the source machine at some point during the migration with a
> segmentation fault.
>
> Even with this bug fixed, the virtio-sound device doesn't migrate
> the state of the audio streams. For example, running streams leave
> the device on the destination machine in a broken condition.
>
> Mark the device as unmigratable until these issues have been fixed.
>
> Signed-off-by: Volker Rümelin 
> ---
>  hw/audio/virtio-snd.c | 1 +
>  1 file changed, 1 insertion(+)

Hello Volker!

I don't have the slightest clue on how migration works, but if this is
enough to prevent those bugs from happening, it sounds good to me.

Reviewed-by: Manos Pitsidianakis 



[PULL 0/3] UI patches

2023-12-04 Thread marcandre . lureau
From: Marc-André Lureau 

The following changes since commit 29b5d70cb70574b499517ec9e9f80dea496a3cc0:

  Merge tag 'pull-ppc-for-8.2-20231130' of https://gitlab.com/npiggin/qemu into 
staging (2023-12-01 07:29:52 -0500)

are available in the Git repository at:

  https://gitlab.com/marcandre.lureau/qemu.git tags/ui-pull-request

for you to fetch changes up to 551ef0fa05c11abd62f4607ee3cddbcb7dea6b66:

  hw/audio/virtio-sound: mark the device as unmigratable (2023-12-04 12:04:36 
+0400)


ui/audio fixes for 8.2



Fiona Ebner (1):
  ui/vnc-clipboard: fix inflate_buffer

Volker Rümelin (2):
  ui/gtk-egl: move function calls back to regular code path
  hw/audio/virtio-sound: mark the device as unmigratable

 hw/audio/virtio-snd.c |  1 +
 ui/gtk-egl.c  | 12 ++--
 ui/vnc-clipboard.c|  5 +
 3 files changed, 12 insertions(+), 6 deletions(-)

-- 
2.43.0




[PULL 2/3] ui/vnc-clipboard: fix inflate_buffer

2023-12-04 Thread marcandre . lureau
From: Fiona Ebner 

Commit d921fea338 ("ui/vnc-clipboard: fix infinite loop in
inflate_buffer (CVE-2023-3255)") removed this hunk, but it is still
required, because it can happen that stream.avail_in becomes zero
before coming across a return value of Z_STREAM_END in the loop.

This fixes the host->guest direction of the clipboard with noVNC and
TigerVNC as clients.

Fixes: d921fea338 ("ui/vnc-clipboard: fix infinite loop in inflate_buffer 
(CVE-2023-3255)")
Reported-by: Friedrich Weber 
Signed-off-by: Fiona Ebner 
Acked-by: Marc-André Lureau 
Message-Id: <20231122125826.228189-1-f.eb...@proxmox.com>
---
 ui/vnc-clipboard.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/ui/vnc-clipboard.c b/ui/vnc-clipboard.c
index c759be3438..124b6fbd9c 100644
--- a/ui/vnc-clipboard.c
+++ b/ui/vnc-clipboard.c
@@ -69,6 +69,11 @@ static uint8_t *inflate_buffer(uint8_t *in, uint32_t in_len, 
uint32_t *size)
 }
 }
 
+*size = stream.total_out;
+inflateEnd(&stream);
+
+return out;
+
 err_end:
 inflateEnd(&stream);
 err:
-- 
2.43.0




[PULL 3/3] hw/audio/virtio-sound: mark the device as unmigratable

2023-12-04 Thread marcandre . lureau
From: Volker Rümelin 

The virtio-sound device is currently not migratable. QEMU crashes
on the source machine at some point during the migration with a
segmentation fault.

Even with this bug fixed, the virtio-sound device doesn't migrate
the state of the audio streams. For example, running streams leave
the device on the destination machine in a broken condition.

Mark the device as unmigratable until these issues have been fixed.

Signed-off-by: Volker Rümelin 
Reviewed-by: Marc-André Lureau 
Message-Id: <20231204072837.6058-1-vr_q...@t-online.de>
---
 hw/audio/virtio-snd.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/audio/virtio-snd.c b/hw/audio/virtio-snd.c
index 2fe966e311..b10fad1228 100644
--- a/hw/audio/virtio-snd.c
+++ b/hw/audio/virtio-snd.c
@@ -68,6 +68,7 @@ static const VMStateDescription vmstate_virtio_snd_device = {
 
 static const VMStateDescription vmstate_virtio_snd = {
 .name = TYPE_VIRTIO_SND,
+.unmigratable = 1,
 .minimum_version_id = VIRTIO_SOUND_VM_VERSION,
 .version_id = VIRTIO_SOUND_VM_VERSION,
 .fields = (VMStateField[]) {
-- 
2.43.0




[PULL 1/3] ui/gtk-egl: move function calls back to regular code path

2023-12-04 Thread marcandre . lureau
From: Volker Rümelin 

Commit 6f189a08c1 ("ui/gtk-egl: Check EGLSurface before doing
scanout") introduced a regression when QEMU is running with a
virtio-gpu-gl-device on a host under X11. After the guest has
initialized the virtio-gpu-gl-device, the guest screen only
shows "Display output is not active.".

Commit 6f189a08c1 moved all function calls in
gd_egl_scanout_texture() to a code path which is only called
once after gd_egl_init() succeeds in gd_egl_scanout_texture().
Move all function calls in gd_egl_scanout_texture() back to
the regular code path so they get always called if one of the
gd_egl_init() calls was successful.

Fixes: 6f189a08c1 ("ui/gtk-egl: Check EGLSurface before doing scanout")
Signed-off-by: Volker Rümelin 
Reviewed-by: Marc-André Lureau 
Message-Id: <2023104020.26183-1-vr_q...@t-online.de>
---
 ui/gtk-egl.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/ui/gtk-egl.c b/ui/gtk-egl.c
index cd2f176502..3af5ac5bcf 100644
--- a/ui/gtk-egl.c
+++ b/ui/gtk-egl.c
@@ -249,14 +249,14 @@ void gd_egl_scanout_texture(DisplayChangeListener *dcl,
 if (!vc->gfx.esurface) {
 return;
 }
+}
 
-eglMakeCurrent(qemu_egl_display, vc->gfx.esurface,
-   vc->gfx.esurface, vc->gfx.ectx);
+eglMakeCurrent(qemu_egl_display, vc->gfx.esurface,
+   vc->gfx.esurface, vc->gfx.ectx);
 
-gtk_egl_set_scanout_mode(vc, true);
-egl_fb_setup_for_tex(&vc->gfx.guest_fb, backing_width, backing_height,
- backing_id, false);
-}
+gtk_egl_set_scanout_mode(vc, true);
+egl_fb_setup_for_tex(&vc->gfx.guest_fb, backing_width, backing_height,
+ backing_id, false);
 }
 
 void gd_egl_scanout_dmabuf(DisplayChangeListener *dcl,
-- 
2.43.0




[RFC 0/3] scripts/checkpatch: Add code spelling check

2023-12-04 Thread Zhao Liu
From: Zhao Liu 

Inspired by Linux's spelling check, QEMU could also add the similar
support in checkpatch.pl to help ease the burden on trivial's
maintainers ;-).

QEMU's checkpatch.pl is mainly based on an older version of Linux's
checkpatch.pl, so this RFC ports Linux's codespell-related functionality
to QEMU.

This RFC contains mainly the following work:
* Added default typo dictionary "selling.text". When using checkpatch.pl,
  it will be based on selling.txt for typo checking.
* Added "--codespell" and "--codespellfile" options for enhanced spell
  checking. The former will use either the system dictionary or a python
  dictionary for spell checking, while the latter allows the user to use
  a customized dictionary.
* Based on the QEMU typo statistics from v7.0.0 to v8.2.0-rc2, updated
  the default dictionary "selling.txt" to be more in line with QEMU
  developers' "typo habits".

Thanks and Best Regards,
Zhao
---
Zhao Liu (3):
  scripts/checkpatch: Check common spelling be default
  scripts/checkpatch: Add --codespell and --codespellfile options
  scripts/spelling: Add common spelling mistakes in default spelling.txt

 scripts/checkpatch.pl |  110 ++-
 scripts/spelling.txt  | 1729 +
 2 files changed, 1838 insertions(+), 1 deletion(-)
 create mode 100644 scripts/spelling.txt

-- 
2.34.1




[RFC 2/3] scripts/checkpatch: Add --codespell and --codespellfile options

2023-12-04 Thread Zhao Liu
From: Zhao Liu 

Add two spelling check options (--codespell and --codespellfile) to
enhance spelling check through dictionary, which copied the Linux
kernel's implementation in checkpatch.pl.

Signed-off-by: Zhao Liu 
---
 scripts/checkpatch.pl | 66 ++-
 1 file changed, 65 insertions(+), 1 deletion(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 755a1f683866..20ee07f016ca 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -38,7 +38,10 @@ my $summary_file = 0;
 my $root;
 my %debug;
 my $help = 0;
+my $codespell = 0;
 my $spelling_file = "$D/spelling.txt";
+my $codespellfile = "/usr/share/codespell/dictionary.txt";
+my $user_codespellfile = "";
 
 sub help {
my ($exitcode) = @_;
@@ -70,6 +73,9 @@ Options:
  is all off)
   --test-only=WORD   report only warnings/errors containing WORD
  literally
+  --codespellUse the codespell dictionary for spelling/typos
+ (default:$codespellfile)
+  --codespellfileUse this codespell dictionary
   --color[=WHEN] Use colors 'always', 'never', or only when output
  is a terminal ('auto'). Default is 'auto'.
   -h, --help, --version  display this help and exit
@@ -102,15 +108,37 @@ GetOptions(
'summary!'  => \$summary,
'mailback!' => \$mailback,
'summary-file!' => \$summary_file,
-
'debug=s'   => \%debug,
'test-only=s'   => \$tst_only,
+   'codespell!'=> \$codespell,
+   'codespellfile=s'   => \$user_codespellfile,
'color=s'   => \$color,
'no-color'  => sub { $color = 'never'; },
'h|help'=> \$help,
'version'   => \$help
 ) or help(1);
 
+if ($user_codespellfile) {
+   # Use the user provided codespell file unconditionally
+   $codespellfile = $user_codespellfile;
+} elsif (!(-f $codespellfile)) {
+   # If /usr/share/codespell/dictionary.txt is not present, try to find it
+   # under codespell's install directory: 
/data/dictionary.txt
+   if (($codespell || $help) && which("python3") ne "") {
+   my $python_codespell_dict = << "EOF";
+
+import os.path as op
+import codespell_lib
+codespell_dir = op.dirname(codespell_lib.__file__)
+codespell_file = op.join(codespell_dir, 'data', 'dictionary.txt')
+print(codespell_file, end='')
+EOF
+
+   my $codespell_dict = `python3 -c "$python_codespell_dict" 2> 
/dev/null`;
+   $codespellfile = $codespell_dict if (-f $codespell_dict);
+   }
+}
+
 help(0) if ($help);
 
 my $exit = 0;
@@ -364,6 +392,30 @@ if (open(my $spelling, '<', $spelling_file)) {
warn "No typos will be found - file '$spelling_file': $!\n";
 }
 
+if ($codespell) {
+   if (open(my $spelling, '<', $codespellfile)) {
+   while (<$spelling>) {
+   my $line = $_;
+
+   $line =~ s/\s*\n?$//g;
+   $line =~ s/^\s*//g;
+
+   next if ($line =~ m/^\s*#/);
+   next if ($line =~ m/^\s*$/);
+   next if ($line =~ m/, disabled/i);
+
+   $line =~ s/,.*$//;
+
+   my ($suspect, $fix) = split(/->/, $line);
+
+   $spelling_fix{$suspect} = $fix;
+   }
+   close($spelling);
+   } else {
+   warn "No codespell typos will be found - file '$codespellfile': 
$!\n";
+   }
+}
+
 $misspellings = join("|", sort keys %spelling_fix) if keys %spelling_fix;
 
 # This can be modified by sub possible.  Since it can be empty, be careful
@@ -506,6 +558,18 @@ sub top_of_kernel_tree {
return 1;
 }
 
+sub which {
+   my ($bin) = @_;
+
+   foreach my $path (split(/:/, $ENV{PATH})) {
+   if (-e "$path/$bin") {
+   return "$path/$bin";
+   }
+   }
+
+   return "";
+}
+
 sub expand_tabs {
my ($str) = @_;
 
-- 
2.34.1




[RFC 3/3] scripts/spelling: Add common spelling mistakes in default spelling.txt

2023-12-04 Thread Zhao Liu
From: Zhao Liu 

Select the typos in commits from 7.0.0 to 8.2.0-rc2 that were typed more
than three times to add to the default dictionary selling.txt.

The typos were counted by (Referenced Kees' command in Linux kernel's
commit 66b47b4a9dad00):

$ git log --format='%H' v7.0.0..v8.2.0-rc2 | \
   while read commit ; do \
 echo "commit $commit" ; \
 git log --format=email --stat -p -1 $commit | \
 ./scripts/checkpatch.pl --codespell - ; \
   done | \
   tee spell_v7.0.0..v8.2.0-rc2.txt | \
   grep "may be misspelled" | awk '{print $2}' | \
   tr A-Z a-z | sort | uniq -c | sort -rn

The typos were listed as follows (In addition to variable naming and
misreporting, select typos greater than three times):

   1289 'fpr'
222 'ot'
 45 'hace'
 25 'te'
 24 'parm'
 21 'sie'
 20 'nd'
 20 'clen'
 15 'sring'
 14 'hax'
 13 'hda'
 12 'endianess'
 10 'tabl'
 10 'stip'
 10 'datas'
  9 'reenable'
  8 'unitialized'
  8 'invers'
  7 'seh'
  7 'priviledge'
  7 'indentification'
  7 'existance'
  6 'varience'
  6 'unuseful'
  6 'tranfer'
  6 'tne'
  6 'octects'
  6 'informations'
  6 'indicies'
  6 'extenstion'
  6 'betwen'
  6 'ammends'
  5 'writting'
  5 'wont'
  5 'som'
  5 'responsability'
  5 'padd'
  5 'offsetp'
  5 'negotation'
  5 'necesary'
  5 'nam'
  5 'maintainance'
  5 'extnesion'
  5 'convertion'
  5 'configuraiton'
  5 'comparision'
  5 'arbitrer'
  5 'an
  5 'algorithim'
  5 'addreses'
  5 'achived'
  5 'accomodate'
  4 'undocummented'
  4 'ue'
  4 'truely'
  4 'tre'
  4 'suppoted'
  4 'separatly'
  4 'reenabled'
  4 'occured'
  4 'myu'
  4 'mone'
  4 'ment'
  4 'limitaions'
  4 'instread'
  4 'infomation'
  4 'implment'
  4 'hammmer'
  4 'desciptor'
  4 'defintions'
  4 'crypted'
  4 'constext'
  4 'acces'
  3 'wronly'
  3 'wether'
  3 'vill'
  3 'unsupport'
  3 'unneded'
  3 'transfered'
  3 'sxl'
  3 'suppor'
  3 'superceded'
  3 'succesfully'
  3 'slighly'
  3 'setted'
  3 'respectivelly'
  3 'reigster'
  3 'recive'
  3 'priviledged'
  3 'potentialy'
  3 'phyiscal'
  3 'pathes'
  3 'othe'
  3 'ontaining'
  3 'nunber'
  3 'missmatch'
  3 'minumum'
  3 'mesage'
  3 'legnth'
  3 'kenel'
  3 'intialized'
  3 'inbetween'
  3 'implemetation'
  3 'garanteed'
  3 'explictly'
  3 'enhacements'
  3 'ealier'
  3 'doen't'
  3 'diferent'
  3 'debuging'
  3 'daa'
  3 'convergance'
  3 'compatiblity'
  3 'caf'
  3 'buid'
  3 'becase'
  3 'bandwith'
  3 'atleast'
  3 'asume'
  3 'analoguous'
  3 'alse'
  3 'addtionally'
  3 'addtional'
  3 'addresss'
  3 'acknowledgement'
  3 'accross'
  3 'acceses'

Signed-off-by: Zhao Liu 
---
 scripts/spelling.txt | 16 
 1 file changed, 16 insertions(+)

diff --git a/scripts/spelling.txt b/scripts/spelling.txt
index 549ffd0f2d78..177dee2bb28d 100644
--- a/scripts/spelling.txt
+++ b/scripts/spelling.txt
@@ -28,6 +28,7 @@ accelaration||acceleration
 accelearion||acceleration
 acceleratoin||acceleration
 accelleration||acceleration
+acces||access
 accesing||accessing
 accesnt||accent
 accessable||accessible
@@ -50,6 +51,7 @@ acessable||accessible
 acess||access
 acessing||accessing
 achitecture||architecture
+achived||achieved
 acient||ancient
 acitions||actions
 acitve||active
@@ -95,6 +97,7 @@ albumns||albums
 alegorical||allegorical
 algined||aligned
 algorith||algorithm
+algorithim||algorithm
 algorithmical||algorithmically
 algoritm||algorithm
 algoritms||algorithms
@@ -124,6 +127,7 @@ altough||although
 alue||value
 ambigious||ambiguous
 ambigous||ambiguous
+ammend||amend
 amoung||among
 amount of times||number of times
 amout||amount
@@ -248,6 +252,7 @@ beeing||being
 befor||before
 begining||beginning
 beter||better
+betwen||between
 betweeen||between
 bianries||binaries
 bitmast||bitmask
@@ -350,6 +355,7 @@ comsuming||consuming
 comaptible||compatible
 compability||compatibility
 compaibility||compatibility
+comparision||comparison
 comparsion||comparison
 compatability||compatibility
 compatable||compatible
@@ -385,6 +391,7 @@ configred||configured
 configuartion||configuration
 configuation||configuration
 configued||configured
+configuraiton||configuration
 configuratoin||configuration
 configuraton||configuration
 configuretion||configuration
@@ -658,6 +665,7 @@ exteneded||extended
 extensability||extensibility
 extention||extension
 extenstion||extension
+extnesion||extension
 extracter||extractor
 faied||failed
 faield||failed
@@ -796,6 +804,7 @@ implementd||implemented
 implemetation||implementation
 implemntation||implementation
 implentation||implem

[RFC 1/3] scripts/checkpatch: Check common spelling be default

2023-12-04 Thread Zhao Liu
From: Zhao Liu 

Add the check for common spelling mistakes for QEMU, which stole
selling.txt from Linux kernel and referenced the Linux kernel's
implementation in checkpatch.pl.

This check covers common spelling mistakes, and can be updated/
extended as per QEMU's realities.

Signed-off-by: Zhao Liu 
---
 scripts/checkpatch.pl |   44 ++
 scripts/spelling.txt  | 1713 +
 2 files changed, 1757 insertions(+)
 create mode 100644 scripts/spelling.txt

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 6e4100d2a41c..755a1f683866 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -7,10 +7,13 @@
 
 use strict;
 use warnings;
+use File::Basename;
+use Cwd 'abs_path';
 use Term::ANSIColor qw(:constants);
 
 my $P = $0;
 $P =~ s@.*/@@g;
+my $D = dirname(abs_path($0));
 
 our $SrcFile= qr{\.(?:(h|c)(\.inc)?|cpp|s|S|pl|py|sh)$};
 
@@ -35,6 +38,7 @@ my $summary_file = 0;
 my $root;
 my %debug;
 my $help = 0;
+my $spelling_file = "$D/spelling.txt";
 
 sub help {
my ($exitcode) = @_;
@@ -337,6 +341,31 @@ our @typeList = (
qr{guintptr},
 );
 
+# Load common spelling mistakes and build regular expression list.
+my $misspellings;
+my %spelling_fix;
+
+if (open(my $spelling, '<', $spelling_file)) {
+   while (<$spelling>) {
+   my $line = $_;
+
+   $line =~ s/\s*\n?$//g;
+   $line =~ s/^\s*//g;
+
+   next if ($line =~ m/^\s*#/);
+   next if ($line =~ m/^\s*$/);
+
+   my ($suspect, $fix) = split(/\|\|/, $line);
+
+   $spelling_fix{$suspect} = $fix;
+   }
+   close($spelling);
+} else {
+   warn "No typos will be found - file '$spelling_file': $!\n";
+}
+
+$misspellings = join("|", sort keys %spelling_fix) if keys %spelling_fix;
+
 # This can be modified by sub possible.  Since it can be empty, be careful
 # about regexes that always match, because they can cause infinite loops.
 our @modifierList = (
@@ -1585,6 +1614,21 @@ sub process {
WARN("8-bit UTF-8 used in possible commit log\n" . 
$herecurr);
}
 
+# Check for various typo / spelling mistakes
+   if (defined($misspellings) &&
+   ($in_commit_log || $line =~ /^(?:\+|Subject:)/i)) {
+   while ($rawline =~ 
/(?:^|[^\w\-'`])($misspellings)(?:[^\w\-'`]|$)/gi) {
+   my $typo = $1;
+   my $blank = copy_spacing($rawline);
+   my $ptr = substr($blank, 0, $-[1]) . "^" x 
length($typo);
+   my $hereptr = "$hereline$ptr\n";
+   my $typo_fix = $spelling_fix{lc($typo)};
+   $typo_fix = ucfirst($typo_fix) if ($typo =~ 
/^[A-Z]/);
+   $typo_fix = uc($typo_fix) if ($typo =~ 
/^[A-Z]+$/);
+   WARN("'$typo' may be misspelled - perhaps 
'$typo_fix'?\n" . $hereptr);
+   }
+   }
+
 # ignore non-hunk lines and lines being removed
next if (!$hunk_line || $line =~ /^-/);
 
diff --git a/scripts/spelling.txt b/scripts/spelling.txt
new file mode 100644
index ..549ffd0f2d78
--- /dev/null
+++ b/scripts/spelling.txt
@@ -0,0 +1,1713 @@
+# Originally from Debian's Lintian tool. Various false positives have been
+# removed, and various additions have been made as they've been discovered
+# in the kernel source.
+#
+# License: GPLv2
+#
+# Stolen from Linux v6.7-rc4 (@15571273db93).
+#
+# The format of each line is:
+# mistake||correction
+#
+abandonning||abandoning
+abigious||ambiguous
+abitrary||arbitrary
+abitrate||arbitrate
+abnornally||abnormally
+abnrormal||abnormal
+abord||abort
+aboslute||absolute
+abov||above
+abreviated||abbreviated
+absense||absence
+absolut||absolute
+absoulte||absolute
+acccess||access
+acceess||access
+accelaration||acceleration
+accelearion||acceleration
+acceleratoin||acceleration
+accelleration||acceleration
+accesing||accessing
+accesnt||accent
+accessable||accessible
+accesss||access
+accidentaly||accidentally
+accidentually||accidentally
+acclerated||accelerated
+accoding||according
+accomodate||accommodate
+accomodates||accommodates
+accordign||according
+accoring||according
+accout||account
+accquire||acquire
+accquired||acquired
+accross||across
+accumalate||accumulate
+accumalator||accumulator
+acessable||accessible
+acess||access
+acessing||accessing
+achitecture||architecture
+acient||ancient
+acitions||actions
+acitve||active
+acknowldegement||acknowledgment
+acknowledgement||acknowledgment
+ackowledge||acknowledge
+ackowledged||acknowledged
+acording||according
+activete||activate
+actived||activated
+actualy||actually
+actvie||active
+acumulating||accumulating
+acumulative||accumulative
+acumulator||accumulator
+acutally||actually
+adapater||adapter
+adderted||asserted
+addional||additional
+additionaly||addit

Re: [PATCH for 8.2] hw/audio/virtio-sound: mark the device as unmigratable

2023-12-04 Thread Michael S. Tsirkin
On Mon, Dec 04, 2023 at 08:28:37AM +0100, Volker Rümelin wrote:
> The virtio-sound device is currently not migratable. QEMU crashes
> on the source machine at some point during the migration with a
> segmentation fault.
> 
> Even with this bug fixed, the virtio-sound device doesn't migrate
> the state of the audio streams. For example, running streams leave
> the device on the destination machine in a broken condition.
> 
> Mark the device as unmigratable until these issues have been fixed.
> 
> Signed-off-by: Volker Rümelin 

Acked-by: Michael S. Tsirkin 

> ---
>  hw/audio/virtio-snd.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/hw/audio/virtio-snd.c b/hw/audio/virtio-snd.c
> index 2fe966e311..b10fad1228 100644
> --- a/hw/audio/virtio-snd.c
> +++ b/hw/audio/virtio-snd.c
> @@ -68,6 +68,7 @@ static const VMStateDescription vmstate_virtio_snd_device = 
> {
>  
>  static const VMStateDescription vmstate_virtio_snd = {
>  .name = TYPE_VIRTIO_SND,
> +.unmigratable = 1,
>  .minimum_version_id = VIRTIO_SOUND_VM_VERSION,
>  .version_id = VIRTIO_SOUND_VM_VERSION,
>  .fields = (VMStateField[]) {
> -- 
> 2.35.3




Re: [PATCH v3 26/70] i386/tdx: Initialize TDX before creating TD vcpus

2023-12-04 Thread Xiaoyao Li

On 11/15/2023 7:01 PM, Daniel P. Berrangé wrote:

On Wed, Nov 15, 2023 at 02:14:35AM -0500, Xiaoyao Li wrote:

Invoke KVM_TDX_INIT in kvm_arch_pre_create_vcpu() that KVM_TDX_INIT
configures global TD configurations, e.g. the canonical CPUID config,
and must be executed prior to creating vCPUs.

Use kvm_x86_arch_cpuid() to setup the CPUID settings for TDX VM.

Note, this doesn't address the fact that QEMU may change the CPUID
configuration when creating vCPUs, i.e. punts on refactoring QEMU to
provide a stable CPUID config prior to kvm_arch_init().

Signed-off-by: Xiaoyao Li 
Acked-by: Gerd Hoffmann 
---
Changes in v3:
- Pass @errp in tdx_pre_create_vcpu() and pass error info to it. (Daniel)
---
  accel/kvm/kvm-all.c|  9 +++-
  target/i386/kvm/kvm.c  |  9 
  target/i386/kvm/tdx-stub.c |  5 +
  target/i386/kvm/tdx.c  | 45 ++
  target/i386/kvm/tdx.h  |  4 
  5 files changed, 71 insertions(+), 1 deletion(-)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 6b5f4d62f961..a92fff471b58 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -441,8 +441,15 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
  
  trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
  
+/*

+ * tdx_pre_create_vcpu() may call cpu_x86_cpuid(). It in turn may call
+ * kvm_vm_ioctl(). Set cpu->kvm_state in advance to avoid NULL pointer
+ * dereference.
+ */
+cpu->kvm_state = s;
  ret = kvm_arch_pre_create_vcpu(cpu, errp);
  if (ret < 0) {
+cpu->kvm_state = NULL;
  goto err;
  }
  
@@ -450,11 +457,11 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)

  if (ret < 0) {
  error_setg_errno(errp, -ret, "kvm_init_vcpu: kvm_get_vcpu failed 
(%lu)",
   kvm_arch_vcpu_id(cpu));
+cpu->kvm_state = NULL;
  goto err;
  }
  
  cpu->kvm_fd = ret;

-cpu->kvm_state = s;
  cpu->vcpu_dirty = true;
  cpu->dirty_pages = 0;
  cpu->throttle_us_per_full = 0;
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index dafe4d262977..fc840653ceb6 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -2268,6 +2268,15 @@ int kvm_arch_init_vcpu(CPUState *cs)
  return r;
  }
  
+int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp)

+{
+if (is_tdx_vm()) {
+return tdx_pre_create_vcpu(cpu, errp);
+}
+
+return 0;
+}
+
  int kvm_arch_destroy_vcpu(CPUState *cs)
  {
  X86CPU *cpu = X86_CPU(cs);
diff --git a/target/i386/kvm/tdx-stub.c b/target/i386/kvm/tdx-stub.c
index 1d866d5496bf..3877d432a397 100644
--- a/target/i386/kvm/tdx-stub.c
+++ b/target/i386/kvm/tdx-stub.c
@@ -6,3 +6,8 @@ int tdx_kvm_init(MachineState *ms, Error **errp)
  {
  return -EINVAL;
  }
+
+int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
+{
+return -EINVAL;
+}
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 1f5d8117d1a9..122a37c93de3 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -467,6 +467,49 @@ int tdx_kvm_init(MachineState *ms, Error **errp)
  return 0;
  }
  
+int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)

+{
+MachineState *ms = MACHINE(qdev_get_machine());
+X86CPU *x86cpu = X86_CPU(cpu);
+CPUX86State *env = &x86cpu->env;
+struct kvm_tdx_init_vm *init_vm;


Mark this as auto-free to avoid the g_free() requirement

   g_autofree  struct kvm_tdx_init_vm *init_vm = NULL;


+int r = 0;
+
+qemu_mutex_lock(&tdx_guest->lock);


QEMU_LOCK_GUARD(&tdx_guest->lock);

to eliminate the mutex_unlock requirement, thus eliminating all
'goto' jumps and label targets, in favour of a plain 'return -1'
everywhere.



Learned!

thanks!




Re: [PATCH 0/3] Virtio dmabuf improvements

2023-12-04 Thread Michael S. Tsirkin
On Thu, Nov 30, 2023 at 04:49:35PM +0100, Albert Esteve wrote:
> 
> 
> 
> On Tue, Nov 7, 2023 at 10:37 AM Albert Esteve  wrote:
> 
> Various improvements for the virtio-dmabuf module.
> This patch includes:
> 
> - Check for ownership before allowing a vhost device
>   to remove an object from the table.
> - Properly cleanup shared resources if a vhost device
>   object gets cleaned up.
> - Rename virtio dmabuf functions to `virtio_dmabuf_*`
> 
> Albert Esteve (3):
>   hw/virtio: check owner for removing objects
>   hw/virtio: cleanup shared resources
>   hw/virtio: rename virtio dmabuf API
> 
>  hw/display/virtio-dmabuf.c        | 14 +-
>  hw/virtio/vhost-user.c            | 33 ++-
>  hw/virtio/vhost.c                 |  5 
>  include/hw/virtio/vhost.h         |  6 +
>  include/hw/virtio/virtio-dmabuf.h | 33 ---
>  tests/unit/test-virtio-dmabuf.c   | 44 +++
>  6 files changed, 83 insertions(+), 52 deletions(-)
> 
> --
> 2.41.0
> 
> 
> 
> Bump :)
> 
> @Marc-André Lureau could you please take a look? You suggested the API
> upgrades, so would be great if you could check if it is what you had in mind.
> 
> Thanks!

All this is post releas material, right?




Re: [PATCH 00/14] virtio-net: add support for SR-IOV emulation

2023-12-04 Thread Yui Washizu



On 2023/12/02 17:08, Akihiko Odaki wrote:

On 2023/12/02 17:00, Akihiko Odaki wrote:

Introduction


This series is based on the RFC series submitted by Yui Washizu[1].
See also [2] for the context.

This series enables SR-IOV emulation for virtio-net. It is useful
to test SR-IOV support on the guest, or to expose several vDPA 
devices in a

VM. vDPA devices can also provide L2 switching feature for offloading
though it is out of scope to allow the guest to configure such a 
feature.


The new code of SR-IOV emulation for virtio-net actually resides in
virtio-pci since it's specific to PCI. Although it is written in a way
agnostic to the virtio device type, it is restricted for virtio-net 
because

of lack of validation.


I forgot to prefix this as RFC. It is the first version of the series 
and I'm open for design changes.



Thank you. I'll proceed with building and reviewing the patch content.




Re: [RFC 1/3] scripts/checkpatch: Check common spelling be default

2023-12-04 Thread Thomas Huth

On 04/12/2023 09.29, Zhao Liu wrote:

From: Zhao Liu 

Add the check for common spelling mistakes for QEMU, which stole
selling.txt from Linux kernel and referenced the Linux kernel's


You need to sellcheck^Wspellcheck the above line ;-)


implementation in checkpatch.pl.

This check covers common spelling mistakes, and can be updated/
extended as per QEMU's realities.

Signed-off-by: Zhao Liu 
---
  scripts/checkpatch.pl |   44 ++
  scripts/spelling.txt  | 1713 +
  2 files changed, 1757 insertions(+)
  create mode 100644 scripts/spelling.txt


I like the idea of having spellchecking in checkpatch.pl ... not sure though 
if we need both, this patch and support for codespell. If you ask me, I'd 
just go with the next codespell patch and avoid adding a full spelling.txt 
file here ... but if others like this patch here, too, I'm also fine with it.


 Thomas




Re: [PATCH 0/3] Virtio dmabuf improvements

2023-12-04 Thread Albert Esteve
On Mon, Dec 4, 2023 at 9:50 AM Michael S. Tsirkin  wrote:

> On Thu, Nov 30, 2023 at 04:49:35PM +0100, Albert Esteve wrote:
> >
> >
> >
> > On Tue, Nov 7, 2023 at 10:37 AM Albert Esteve 
> wrote:
> >
> > Various improvements for the virtio-dmabuf module.
> > This patch includes:
> >
> > - Check for ownership before allowing a vhost device
> >   to remove an object from the table.
> > - Properly cleanup shared resources if a vhost device
> >   object gets cleaned up.
> > - Rename virtio dmabuf functions to `virtio_dmabuf_*`
> >
> > Albert Esteve (3):
> >   hw/virtio: check owner for removing objects
> >   hw/virtio: cleanup shared resources
> >   hw/virtio: rename virtio dmabuf API
> >
> >  hw/display/virtio-dmabuf.c| 14 +-
> >  hw/virtio/vhost-user.c| 33 ++-
> >  hw/virtio/vhost.c |  5 
> >  include/hw/virtio/vhost.h |  6 +
> >  include/hw/virtio/virtio-dmabuf.h | 33 ---
> >  tests/unit/test-virtio-dmabuf.c   | 44
> +++
> >  6 files changed, 83 insertions(+), 52 deletions(-)
> >
> > --
> > 2.41.0
> >
> >
> >
> > Bump :)
> >
> > @Marc-André Lureau could you please take a look? You suggested the API
> > upgrades, so would be great if you could check if it is what you had in
> mind.
> >
> > Thanks!
>
> All this is post releas material, right?
>
>
Yes, it is.


Re: [PATCH] hostmem: Round up memory size for qemu_madvise() in host_memory_backend_memory_complete()

2023-12-04 Thread David Hildenbrand

On 01.12.23 10:07, Michal Prívozník wrote:

On 11/27/23 14:55, David Hildenbrand wrote:

On 27.11.23 14:37, David Hildenbrand wrote:

On 27.11.23 13:32, Michal Privoznik wrote:

Simple reproducer:
qemu.git $ ./build/qemu-system-x86_64 \
-m size=8389632k,slots=16,maxmem=2560k \
-object
'{"qom-type":"memory-backend-file","id":"ram-node0","mem-path":"/hugepages2M/","prealloc":true,"size":8590983168,"host-nodes":[0],"policy":"bind"}'
 \
-numa node,nodeid=0,cpus=0,memdev=ram-node0

With current master I get:

qemu-system-x86_64: cannot bind memory to host NUMA nodes: Invalid
argument

The problem is that memory size (8193MiB) is not an integer
multiple of underlying pagesize (2MiB) which triggers a check
inside of madvise(), since we can't really set a madvise() policy
just to a fraction of a page.


I thought we would just always fail create something that doesn't really
make any sense.

Why would we want to support that case?

Let me dig, I thought we would have had some check there at some point
that would make that fail (especially: RAM block not aligned to the
pagesize).



At least memory-backend-memfd properly fails for that case:

$ ./build/qemu-system-x86_64 -object
memory-backend-memfd,hugetlb=on,size=3m,id=tmp
qemu-system-x86_64: failed to resize memfd to 3145728: Invalid argument

memory-backend-file ends up creating a new file:

  $ ./build/qemu-system-x86_64 -object
memory-backend-file,share=on,mem-path=/dev/hugepages/tmp,size=3m,id=tmp

$ stat /dev/hugepages/tmp
   File: /dev/hugepages/tmp
   Size: 4194304 Blocks: 0  IO Block: 2097152 regular file

... and ends up sizing it properly aligned to the huge page size.


Seems to be due to:

     if (memory < block->page_size) {
     error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
    "or larger than page size 0x%zx",
    memory, block->page_size);
     return NULL;
     }

     memory = ROUND_UP(memory, block->page_size);

     /*
  * ftruncate is not supported by hugetlbfs in older
  * hosts, so don't bother bailing out on errors.
  * If anything goes wrong with it under other filesystems,
  * mmap will fail.
  *
  * Do not truncate the non-empty backend file to avoid corrupting
  * the existing data in the file. Disabling shrinking is not
  * enough. For example, the current vNVDIMM implementation stores
  * the guest NVDIMM labels at the end of the backend file. If the
  * backend file is later extended, QEMU will not be able to find
  * those labels. Therefore, extending the non-empty backend file
  * is disabled as well.
  */
     if (truncate && ftruncate(fd, offset + memory)) {
     perror("ftruncate");
     }

So we create a bigger file and map the bigger file and also have a
RAMBlock that is bigger. So we'll also consume more memory.

... but the memory region is smaller and we tell the VM that it has
less memory. Lot of work with no obvious benefit, and only some
memory waste :)


We better should have just rejected such memory backends right from
the start. But now it's likely too late.

I suspect other things like
  * qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE);
  * qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP);

fail, but we don't care for hugetlb at least regarding merging
and don't even log an error.

But QEMU_MADV_DONTDUMP might also be broken, because that
qemu_madvise() call will just fail.

Your fix would be correct. But I do wonder if we want to just let that
case fail and warn users that they are doing something that doesn't
make too much sense.



Yeah, what's suspicious is: if the size is smaller than page size we
error out, but if it's larger (but still not aligned) we accept that.
I'm failing to see reasoning there. Looks like the ROUND_UP() was added
in v0.13.0-rc0~1201^2~4 (though it's done with some bit magic) and the
check itself was added in v2.8.0-rc0~30^2~23. So it's a bit late, yes.


Yeah.



OTOH - if users want to waste resources, should we stop them? For


It's all inconsistent, including memfd handling or what you noted above.

For example, Having a 1025 MiB guest on gigantic pages, consuming 2 GiB 
really is just absolutely stupid.


Likely the user wants to know about such mistakes instead of making QEMU 
silence all side effects of that. :)




instance, when user requests more vCPUs than physical CPUs a warning is
printed:

$ ./build/qemu-system-x86_64 -accel kvm -smp cpus=128
qemu-system-x86_64: -accel kvm: warning: Number of SMP cpus requested
(128) exceeds the recommended cpus supported by KVM (8)


But that case is still reasonable for testing guest behavior with many 
vCPUs, or migrating from a machine with more vCPUs.


Here, the guest will actually see all vCPUs. In comparison, the memory 
waste here will never ever be consumable by the VM.




but that's about it. So maybe the error can be demoted to just a warning?


The question is what we want to do, for example, with t

RE: [PATCH v2 2/4] multifd: Implement multifd compression accelerator

2023-12-04 Thread Liu, Yuan1
> -Original Message-
> From: Fabiano Rosas 
> Sent: Saturday, December 2, 2023 2:01 AM
> To: Liu, Yuan1 ; quint...@redhat.com;
> pet...@redhat.com; leob...@redhat.com
> Cc: qemu-devel@nongnu.org; Liu, Yuan1 ; Zou, Nanhai
> 
> Subject: Re: [PATCH v2 2/4] multifd: Implement multifd compression
> accelerator
> 
> Yuan Liu  writes:
> 
> > when starting multifd live migration, if the compression method is
> > enabled, compression method can be accelerated using accelerators.
> >
> > Signed-off-by: Yuan Liu 
> > Reviewed-by: Nanhai Zou 
> > ---
> >  migration/multifd.c | 38 --
> >  migration/multifd.h |  8 
> >  2 files changed, 44 insertions(+), 2 deletions(-)
> >
> > diff --git a/migration/multifd.c b/migration/multifd.c index
> > 1fe53d3b98..7149e67867 100644
> > --- a/migration/multifd.c
> > +++ b/migration/multifd.c
> > @@ -165,6 +165,34 @@ static MultiFDMethods multifd_nocomp_ops = {
> > static MultiFDMethods *multifd_ops[MULTIFD_COMPRESSION__MAX] = {
> >  [MULTIFD_COMPRESSION_NONE] = &multifd_nocomp_ops,  };
> > +static MultiFDAccelMethods
> > +*accel_multifd_ops[MULTIFD_COMPRESSION_ACCEL__MAX];
> > +
> > +static MultiFDMethods *get_multifd_ops(void) {
> > +MultiFDCompression comp = migrate_multifd_compression();
> > +MultiFDCompressionAccel accel =
> > +migrate_multifd_compression_accel();
> > +
> > +if (comp == MULTIFD_COMPRESSION_NONE ||
> > +accel == MULTIFD_COMPRESSION_ACCEL_NONE) {
> > +return multifd_ops[comp];
> > +}
> > +if (accel == MULTIFD_COMPRESSION_ACCEL_AUTO) {
> > +for (int i = 0; i < MULTIFD_COMPRESSION_ACCEL__MAX; i++) {
> > +if (accel_multifd_ops[i] &&
> > +accel_multifd_ops[i]->is_supported(comp)) {
> > +return accel_multifd_ops[i]->get_multifd_methods();
> > +}
> > +}
> > +return multifd_ops[comp];
> > +}
> > +
> > +/* Check if a specified accelerator is available */
> > +if (accel_multifd_ops[accel] &&
> 
> The CI is complaining that we might reach here with accel=2
> when !CONFIG_QPL. It seems the assert at migrate_multifd_compression_accel
> is not enough.
I will add assert to check both comp and accel next version in the 
get_multifd_ops function.



RE: [PATCH v2 1/4] migration: Introduce multifd-compression-accel parameter

2023-12-04 Thread Liu, Yuan1
> -Original Message-
> From: Markus Armbruster 
> Sent: Friday, December 1, 2023 5:17 PM
> To: Liu, Yuan1 
> Cc: quint...@redhat.com; pet...@redhat.com; faro...@suse.de;
> leob...@redhat.com; qemu-devel@nongnu.org; Zou, Nanhai
> 
> Subject: Re: [PATCH v2 1/4] migration: Introduce multifd-compression-accel
> parameter
> 
> Yuan Liu  writes:
> 
> > Introduce the multifd-compression-accel option to enable or disable
> > live migration data (de)compression accelerator.
> >
> > The default value of multifd-compression-accel is auto, and the
> > enabling and selection of the accelerator are automatically detected.
> > By setting multifd-compression-accel=none, the acceleration function can
> be disabled.
> > Similarly, users can explicitly specify a specific accelerator name,
> > such as multifd-compression-accel=qpl.
> >
> > Signed-off-by: Yuan Liu 
> > Reviewed-by: Nanhai Zou 
> > ---
> >  hw/core/qdev-properties-system.c| 11 +++
> >  include/hw/qdev-properties-system.h |  4 
> >  migration/migration-hmp-cmds.c  | 10 ++
> >  migration/options.c | 24 
> >  migration/options.h |  1 +
> >  qapi/migration.json | 26 +-
> >  6 files changed, 75 insertions(+), 1 deletion(-)
> >
> > diff --git a/hw/core/qdev-properties-system.c
> > b/hw/core/qdev-properties-system.c
> > index 688340610e..ed23035845 100644
> > --- a/hw/core/qdev-properties-system.c
> > +++ b/hw/core/qdev-properties-system.c
> > @@ -673,6 +673,17 @@ const PropertyInfo qdev_prop_multifd_compression =
> {
> >  .set_default_value = qdev_propinfo_set_default_value_enum,
> >  };
> >
> > +/* --- MultiFD Compression Accelerator --- */
> > +
> > +const PropertyInfo qdev_prop_multifd_compression_accel = {
> > +.name = "MultiFDCompressionAccel",
> > +.description = "MultiFD Compression Accelerator, "
> > +   "auto/none/qpl",
> > +.enum_table = &MultiFDCompressionAccel_lookup,
> > +.get = qdev_propinfo_get_enum,
> > +.set = qdev_propinfo_set_enum,
> > +.set_default_value = qdev_propinfo_set_default_value_enum,
> > +};
> >  /* --- Reserved Region --- */
> >
> >  /*
> > diff --git a/include/hw/qdev-properties-system.h
> > b/include/hw/qdev-properties-system.h
> > index 0ac327ae60..da086bd836 100644
> > --- a/include/hw/qdev-properties-system.h
> > +++ b/include/hw/qdev-properties-system.h
> > @@ -7,6 +7,7 @@ extern const PropertyInfo qdev_prop_chr;  extern const
> > PropertyInfo qdev_prop_macaddr;  extern const PropertyInfo
> > qdev_prop_reserved_region;  extern const PropertyInfo
> > qdev_prop_multifd_compression;
> > +extern const PropertyInfo qdev_prop_multifd_compression_accel;
> >  extern const PropertyInfo qdev_prop_losttickpolicy;  extern const
> > PropertyInfo qdev_prop_blockdev_on_error;  extern const PropertyInfo
> > qdev_prop_bios_chs_trans; @@ -41,6 +42,9 @@ extern const PropertyInfo
> > qdev_prop_pcie_link_width;  #define
> > DEFINE_PROP_MULTIFD_COMPRESSION(_n, _s, _f, _d) \
> >  DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_multifd_compression, \
> > MultiFDCompression)
> > +#define DEFINE_PROP_MULTIFD_COMPRESSION_ACCEL(_n, _s, _f, _d) \
> > +DEFINE_PROP_SIGNED(_n, _s, _f, _d,
> qdev_prop_multifd_compression_accel, \
> > +   MultiFDCompressionAccel)
> >  #define DEFINE_PROP_LOSTTICKPOLICY(_n, _s, _f, _d) \
> >  DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_losttickpolicy, \
> >  LostTickPolicy) diff --git
> > a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c
> > index a82597f18e..3a278c89d9 100644
> > --- a/migration/migration-hmp-cmds.c
> > +++ b/migration/migration-hmp-cmds.c
> > @@ -344,6 +344,11 @@ void hmp_info_migrate_parameters(Monitor *mon,
> const QDict *qdict)
> >  monitor_printf(mon, "%s: %s\n",
> >
> MigrationParameter_str(MIGRATION_PARAMETER_MULTIFD_COMPRESSION),
> >  MultiFDCompression_str(params->multifd_compression));
> > +assert(params->has_multifd_compression_accel);
> > +monitor_printf(mon, "%s: %s\n",
> > +MigrationParameter_str(
> > +MIGRATION_PARAMETER_MULTIFD_COMPRESSION_ACCEL),
> > +
> > + MultiFDCompressionAccel_str(params->multifd_compression_accel));
> >  monitor_printf(mon, "%s: %" PRIu64 " bytes\n",
> >
> MigrationParameter_str(MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE),
> >  params->xbzrle_cache_size); @@ -610,6 +615,11 @@ void
> > hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
> >  visit_type_MultiFDCompression(v, param, &p-
> >multifd_compression,
> >&err);
> >  break;
> > +case MIGRATION_PARAMETER_MULTIFD_COMPRESSION_ACCEL:
> > +p->has_multifd_compression_accel = true;
> > +visit_type_MultiFDCompressionAccel(v, param,
> > +   &p-
> >multifd_compressi

Re: [PATCH v3 08/70] physmem: replace function name with __func__ in ram_block_discard_range()

2023-12-04 Thread David Hildenbrand

On 04.12.23 08:40, Xiaoyao Li wrote:

On 11/16/2023 2:21 AM, David Hildenbrand wrote:

On 15.11.23 08:14, Xiaoyao Li wrote:

Use __func__ to avoid hard-coded function name.

Signed-off-by: Xiaoyao Li 
---


That can be queued independently.


Will you queue it for 9.0? for someone else?

Do I need to send it separately?


Probably best to just send it as a separate cleanup. Likely, Paolo will 
queue it. If not, I can do it.


--
Cheers,

David / dhildenb




Re: [PATCH v3 07/70] physmem: Relax the alignment check of host_startaddr in ram_block_discard_range()

2023-12-04 Thread David Hildenbrand

On 04.12.23 08:53, Xiaoyao Li wrote:

On 12/4/2023 3:35 PM, Xiaoyao Li wrote:

On 11/20/2023 5:56 PM, David Hildenbrand wrote:

On 16.11.23 03:56, Xiaoyao Li wrote:

On 11/16/2023 2:20 AM, David Hildenbrand wrote:

On 15.11.23 08:14, Xiaoyao Li wrote:

Commit d3a5038c461 ("exec: ram_block_discard_range") introduced
ram_block_discard_range() which grabs some code from
ram_discard_range(). However, during code movement, it changed
alignment
check of host_startaddr from qemu_host_page_size to rb->page_size.

When ramblock is back'ed by hugepage, it requires the startaddr to be
huge page size aligned, which is a overkill. e.g., TDX's
private-shared
page conversion is done at 4KB granularity. Shared page is discarded
when it gets converts to private and when shared page back'ed by
hugepage it is going to fail on this check.

So change to alignment check back to qemu_host_page_size.

Signed-off-by: Xiaoyao Li 
---
Changes in v3:
    - Newly added in v3;
---
    system/physmem.c | 2 +-
    1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/system/physmem.c b/system/physmem.c
index c56b17e44df6..8a4e42c7cf60 100644
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -3532,7 +3532,7 @@ int ram_block_discard_range(RAMBlock *rb,
uint64_t start, size_t length)
    uint8_t *host_startaddr = rb->host + start;
-    if (!QEMU_PTR_IS_ALIGNED(host_startaddr, rb->page_size)) {
+    if (!QEMU_PTR_IS_ALIGNED(host_startaddr, qemu_host_page_size)) {


For your use cases, rb->page_size should always match
qemu_host_page_size.

IIRC, we only set rb->page_size to different values for hugetlb. And
guest_memfd does not support hugetlb.

Even if QEMU is using THP, rb->page_size should 4k.

Please elaborate how you can actually trigger that. From what I recall,
guest_memfd is not compatible with hugetlb.


It's the shared memory that can be back'ed by hugetlb.


Serious question: does that configuration make any sense to support at
this point? I claim: no.



Later patch 9 introduces ram_block_convert_page(), which will discard
shared memory when it gets converted to private. TD guest can request
convert a 4K to private while the page is previously back'ed by hugetlb
as 2M shared page.


So you can call ram_block_discard_guest_memfd_range() on subpage
basis, but not ram_block_discard_range().

ram_block_convert_range() would have to thought that that
(questionable) combination of hugetlb for shmem and ordinary pages for
guest_memfd cannot discard shared memory.

And it probably shouldn't either way. There are other problems when
not using hugetlb along with preallocation.


If I understand correctly, preallocation needs to be enabled for
hugetlb. And in preallocation case, it doesn't need to discard memory.
Is it correct?


Yes The downside is that we'll end up with double-memory consumption. 
But if/how to optimize that in this case ca be left for future work.





The check in ram_block_discard_range() is correct, whoever ends up
calling it has to stop calling it.


  > So, I need add logic to ram_block_discard_page() that if the size of


Sorry, I made a typo.

Correct myself, s/ram_block_discard_page()/ram_block_convert_range()


Yes, just leave any shared memory backend that uses hugetlb alone.

--
Cheers,

David / dhildenb




Re: [PATCH v2] system/memory: use ldn_he_p/stn_he_p

2023-12-04 Thread Philippe Mathieu-Daudé

Hi Patrick,

On 3/12/23 16:42, Patrick Venture wrote:

Friendly ping? Is this going to be applied or do I need to add another 
CC or?  I do think it should go into stable.


I'll send a PR with this patch included.

Regards,

Phil.



Re: [PATCH 06/12] scsi: remove AioContext locking

2023-12-04 Thread Kevin Wolf
Am 29.11.2023 um 20:55 hat Stefan Hajnoczi geschrieben:
> The AioContext lock no longer has any effect. Remove it.
> 
> Signed-off-by: Stefan Hajnoczi 
> ---
>  include/hw/virtio/virtio-scsi.h | 14 --
>  hw/scsi/scsi-bus.c  |  2 --
>  hw/scsi/scsi-disk.c | 28 
>  hw/scsi/virtio-scsi.c   | 18 --
>  4 files changed, 4 insertions(+), 58 deletions(-)

> @@ -2531,13 +2527,11 @@ static void scsi_unrealize(SCSIDevice *dev)
>  static void scsi_hd_realize(SCSIDevice *dev, Error **errp)
>  {
>  SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
> -AioContext *ctx = NULL;
> +
>  /* can happen for devices without drive. The error message for missing
>   * backend will be issued in scsi_realize
>   */
>  if (s->qdev.conf.blk) {
> -ctx = blk_get_aio_context(s->qdev.conf.blk);
> -aio_context_acquire(ctx);
>  if (!blkconf_blocksizes(&s->qdev.conf, errp)) {
>  goto out;
>  }
> @@ -2549,15 +2543,11 @@ static void scsi_hd_realize(SCSIDevice *dev, Error 
> **errp)
>  }
>  scsi_realize(&s->qdev, errp);
>  out:
> -if (ctx) {
> -aio_context_release(ctx);
> -}
>  }

This doesn't build for me:

../hw/scsi/scsi-disk.c:2545:1: error: label at end of compound statement is a 
C2x extension [-Werror,-Wc2x-extensions]
}
^
1 error generated.

Kevin




RE: [PATCH v2 3/4] configure: add qpl option

2023-12-04 Thread Fabiano Rosas
"Liu, Yuan1"  writes:

>> -Original Message-
>> From: Fabiano Rosas 
>> Sent: Saturday, December 2, 2023 1:45 AM
>> To: Liu, Yuan1 ; quint...@redhat.com;
>> pet...@redhat.com; leob...@redhat.com
>> Cc: qemu-devel@nongnu.org; Liu, Yuan1 ; Zou, Nanhai
>> 
>> Subject: Re: [PATCH v2 3/4] configure: add qpl option
>> 
>> Yuan Liu  writes:
>> 
>> > the Query Processing Library (QPL) is an open-source library that
>> > supports data compression and decompression features.
>> >
>> > add --enable-qpl and --disable-qpl options to enable and disable the
>> > QPL compression accelerator. The QPL compression accelerator can
>> > accelerate the Zlib compression algorithm during the live migration.
>> >
>> > Signed-off-by: Yuan Liu 
>> > Reviewed-by: Nanhai Zou 
>> > ---
>> >  meson.build   | 7 +++
>> >  meson_options.txt | 2 ++
>> >  scripts/meson-buildoptions.sh | 3 +++
>> >  3 files changed, 12 insertions(+)
>> >
>> > diff --git a/meson.build b/meson.build index 259dc5f308..b4ba30b4fa
>> > 100644
>> > --- a/meson.build
>> > +++ b/meson.build
>> > @@ -1032,6 +1032,11 @@ if not get_option('zstd').auto() or have_block
>> >  required: get_option('zstd'),
>> >  method: 'pkg-config')  endif
>> > +qpl = not_found
>> > +if not get_option('qpl').auto()
>> > +qpl = dependency('libqpl', required: get_option('qpl'),
>> > + method: 'pkg-config') endif
>> 
>> Hm.. I'm not having success with pkg-config:
>> 
>> ../meson.build:1043:10: ERROR: Dependency "libqpl" not found, tried
>> pkgconfig
>> 
>> It seems it doesn't find the static library. I had to use this instead:
>> 
>> qpl = declare_dependency(dependencies: cc.find_library('qpl',
>>  required: get_option('qpl')))
>> 
>> What am I missing here?
> Sorry about this, the QPL repo(https://github.com/intel/qpl) does not yet 
> support libqpl pkg-config file, we are in the process of adding this 
> functionality and we hope to resolve libqpl's dependencies through pkg-config 
> file.
> I will explicitly address this issue and provide relevant documentation in 
> the next version.

Ok, just remember to test with a clean setup next time.

>
> For the pkg-config test, 
> 1. create /usr/lib64/pkgconfig/libqpl.pc
> 2. add below lines
> prefix=/usr/local
> exec_prefix=${prefix}
> libdir=${exec_prefix}/lib
> includedir=${prefix}/include
>
> Name: libqpl
> Description: Intel Query Processing Library
> Version: 1.3.0
> Libs: -L${libdir} -lqpl -lpthread -laccel-config -ldl -lstdc++

We could probably do this with meson directly instead of requiring a
pkg-config preliminary setup. My meson-fu is not the best, but something
like:

  qpl = declare_dependency(dependencies: [
   cc.find_library('qpl', required: get_option('qpl')),
   cc.find_library('accel-config', required: get_option('qpl')),
   ...
   ], link_args: ['-lstdc++', ...])

> Cflags: -I${includedir}
>
> 3. Install the header files to /usr/local/include/qpl and static library to 
> /usr/local/lib64/libqpl.a

For this part is ok to just point to the official docs.




Re: [PATCH 01/12] virtio-scsi: replace AioContext lock with tmf_bh_lock

2023-12-04 Thread Kevin Wolf
Am 29.11.2023 um 20:55 hat Stefan Hajnoczi geschrieben:
> Protect the Task Management Function BH state with a lock. The TMF BH
> runs in the main loop thread. An IOThread might process a TMF at the
> same time as the TMF BH is running. Therefore tmf_bh_list and tmf_bh
> must be protected by a lock.
> 
> Run TMF request completion in the IOThread using aio_wait_bh_oneshot().
> This avoids more locking to protect the virtqueue and SCSI layer state.
> 
> Signed-off-by: Stefan Hajnoczi 

Reviewed-by: Kevin Wolf 




Re: [PATCH v2 6/7] gitlab: build the correct microblaze target

2023-12-04 Thread Alex Bennée
Thomas Huth  writes:

> On 01/12/2023 10.36, Alex Bennée wrote:
>> We inadvertently built the LE target for BE tests.
>> Fixes: 78ebc00b06 (gitlab: shuffle some targets and reduce avocado
>> noise)
>> Signed-off-by: Alex Bennée 
>> ---
>>   .gitlab-ci.d/buildtest.yml | 2 +-
>>   1 file changed, 1 insertion(+), 1 deletion(-)
>> diff --git a/.gitlab-ci.d/buildtest.yml b/.gitlab-ci.d/buildtest.yml
>> index 7f9af83b10..62b5379a5e 100644
>> --- a/.gitlab-ci.d/buildtest.yml
>> +++ b/.gitlab-ci.d/buildtest.yml
>> @@ -41,7 +41,7 @@ build-system-ubuntu:
>> variables:
>>   IMAGE: ubuntu2204
>>   CONFIGURE_ARGS: --enable-docs
>> -TARGETS: alpha-softmmu microblazeel-softmmu mips64el-softmmu
>> +TARGETS: alpha-softmmu microblaze-softmmu mips64el-softmmu
>>   MAKE_CHECK_ARGS: check-build
>> check-system-ubuntu:
>
> We've got microblazeel-softmmu here and microblaze-softmmu in the
> build-system-fedora job. So please don't change the ubuntu job here,
> otherwise we're building the same target twice instead.

Hmm - what would be really useful is an actual microblazeel test image
so we can test what we build.

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro



RE: [PATCH v2 3/4] configure: add qpl option

2023-12-04 Thread Liu, Yuan1
> -Original Message-
> From: Fabiano Rosas 
> Sent: Monday, December 4, 2023 8:30 PM
> To: Liu, Yuan1 ; quint...@redhat.com;
> pet...@redhat.com; leob...@redhat.com
> Cc: qemu-devel@nongnu.org; Zou, Nanhai 
> Subject: RE: [PATCH v2 3/4] configure: add qpl option
> 
> "Liu, Yuan1"  writes:
> 
> >> -Original Message-
> >> From: Fabiano Rosas 
> >> Sent: Saturday, December 2, 2023 1:45 AM
> >> To: Liu, Yuan1 ; quint...@redhat.com;
> >> pet...@redhat.com; leob...@redhat.com
> >> Cc: qemu-devel@nongnu.org; Liu, Yuan1 ; Zou,
> >> Nanhai 
> >> Subject: Re: [PATCH v2 3/4] configure: add qpl option
> >>
> >> Yuan Liu  writes:
> >>
> >> > the Query Processing Library (QPL) is an open-source library that
> >> > supports data compression and decompression features.
> >> >
> >> > add --enable-qpl and --disable-qpl options to enable and disable
> >> > the QPL compression accelerator. The QPL compression accelerator
> >> > can accelerate the Zlib compression algorithm during the live
> migration.
> >> >
> >> > Signed-off-by: Yuan Liu 
> >> > Reviewed-by: Nanhai Zou 
> >> > ---
> >> >  meson.build   | 7 +++
> >> >  meson_options.txt | 2 ++
> >> >  scripts/meson-buildoptions.sh | 3 +++
> >> >  3 files changed, 12 insertions(+)
> >> >
> >> > diff --git a/meson.build b/meson.build index 259dc5f308..b4ba30b4fa
> >> > 100644
> >> > --- a/meson.build
> >> > +++ b/meson.build
> >> > @@ -1032,6 +1032,11 @@ if not get_option('zstd').auto() or have_block
> >> >  required: get_option('zstd'),
> >> >  method: 'pkg-config')  endif
> >> > +qpl = not_found
> >> > +if not get_option('qpl').auto()
> >> > +qpl = dependency('libqpl', required: get_option('qpl'),
> >> > + method: 'pkg-config') endif
> >>
> >> Hm.. I'm not having success with pkg-config:
> >>
> >> ../meson.build:1043:10: ERROR: Dependency "libqpl" not found, tried
> >> pkgconfig
> >>
> >> It seems it doesn't find the static library. I had to use this instead:
> >>
> >> qpl = declare_dependency(dependencies: cc.find_library('qpl',
> >>  required: get_option('qpl')))
> >>
> >> What am I missing here?
> > Sorry about this, the QPL repo(https://github.com/intel/qpl) does not
> yet support libqpl pkg-config file, we are in the process of adding this
> functionality and we hope to resolve libqpl's dependencies through pkg-
> config file.
> > I will explicitly address this issue and provide relevant documentation
> in the next version.
> 
> Ok, just remember to test with a clean setup next time.
Sure

> > For the pkg-config test,
> > 1. create /usr/lib64/pkgconfig/libqpl.pc 2. add below lines
> > prefix=/usr/local exec_prefix=${prefix} libdir=${exec_prefix}/lib
> > includedir=${prefix}/include
> >
> > Name: libqpl
> > Description: Intel Query Processing Library
> > Version: 1.3.0
> > Libs: -L${libdir} -lqpl -lpthread -laccel-config -ldl -lstdc++
> 
> We could probably do this with meson directly instead of requiring a pkg-
> config preliminary setup. My meson-fu is not the best, but something
> like:
> 
>   qpl = declare_dependency(dependencies: [
>cc.find_library('qpl', required: get_option('qpl')),
>cc.find_library('accel-config', required: get_option('qpl')),
>...
>], link_args: ['-lstdc++', ...])

I will fix this, thank you for the sample code

> > Cflags: -I${includedir}
> >
> > 3. Install the header files to /usr/local/include/qpl and static
> > library to /usr/local/lib64/libqpl.a
> 
> For this part is ok to just point to the official docs.
Yes, good idea




Re: [PATCH v2 6/7] gitlab: build the correct microblaze target

2023-12-04 Thread Thomas Huth

On 04/12/2023 13.40, Alex Bennée wrote:

Thomas Huth  writes:


On 01/12/2023 10.36, Alex Bennée wrote:

We inadvertently built the LE target for BE tests.
Fixes: 78ebc00b06 (gitlab: shuffle some targets and reduce avocado
noise)
Signed-off-by: Alex Bennée 
---
   .gitlab-ci.d/buildtest.yml | 2 +-
   1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.gitlab-ci.d/buildtest.yml b/.gitlab-ci.d/buildtest.yml
index 7f9af83b10..62b5379a5e 100644
--- a/.gitlab-ci.d/buildtest.yml
+++ b/.gitlab-ci.d/buildtest.yml
@@ -41,7 +41,7 @@ build-system-ubuntu:
 variables:
   IMAGE: ubuntu2204
   CONFIGURE_ARGS: --enable-docs
-TARGETS: alpha-softmmu microblazeel-softmmu mips64el-softmmu
+TARGETS: alpha-softmmu microblaze-softmmu mips64el-softmmu
   MAKE_CHECK_ARGS: check-build
 check-system-ubuntu:


We've got microblazeel-softmmu here and microblaze-softmmu in the
build-system-fedora job. So please don't change the ubuntu job here,
otherwise we're building the same target twice instead.


Hmm - what would be really useful is an actual microblazeel test image
so we can test what we build.


We've got at least a small test in tests/qtest/boot-serial-test.c, so we 
know that at least some instructions can be executed via TCG.


 Thomas





Re: [PATCH v2 6/7] gitlab: build the correct microblaze target

2023-12-04 Thread Cédric Le Goater

On 12/4/23 13:43, Thomas Huth wrote:

On 04/12/2023 13.40, Alex Bennée wrote:

Thomas Huth  writes:


On 01/12/2023 10.36, Alex Bennée wrote:

We inadvertently built the LE target for BE tests.
Fixes: 78ebc00b06 (gitlab: shuffle some targets and reduce avocado
noise)
Signed-off-by: Alex Bennée 
---
   .gitlab-ci.d/buildtest.yml | 2 +-
   1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.gitlab-ci.d/buildtest.yml b/.gitlab-ci.d/buildtest.yml
index 7f9af83b10..62b5379a5e 100644
--- a/.gitlab-ci.d/buildtest.yml
+++ b/.gitlab-ci.d/buildtest.yml
@@ -41,7 +41,7 @@ build-system-ubuntu:
 variables:
   IMAGE: ubuntu2204
   CONFIGURE_ARGS: --enable-docs
-    TARGETS: alpha-softmmu microblazeel-softmmu mips64el-softmmu
+    TARGETS: alpha-softmmu microblaze-softmmu mips64el-softmmu
   MAKE_CHECK_ARGS: check-build
 check-system-ubuntu:


We've got microblazeel-softmmu here and microblaze-softmmu in the
build-system-fedora job. So please don't change the ubuntu job here,
otherwise we're building the same target twice instead.


Hmm - what would be really useful is an actual microblazeel test image
so we can test what we build.


We've got at least a small test in tests/qtest/boot-serial-test.c, so we know 
that at least some instructions can be executed via TCG.


There are 2 configs under buildroot, qemu_microblazebe_mmu_defconfig and
qemu_microblazeel_mmu_defconfig, we could possibly use.

C.




Re: [PATCH 01/12] virtio-scsi: replace AioContext lock with tmf_bh_lock

2023-12-04 Thread Kevin Wolf
Am 29.11.2023 um 20:55 hat Stefan Hajnoczi geschrieben:
> Protect the Task Management Function BH state with a lock. The TMF BH
> runs in the main loop thread. An IOThread might process a TMF at the
> same time as the TMF BH is running. Therefore tmf_bh_list and tmf_bh
> must be protected by a lock.
> 
> Run TMF request completion in the IOThread using aio_wait_bh_oneshot().
> This avoids more locking to protect the virtqueue and SCSI layer state.
> 
> Signed-off-by: Stefan Hajnoczi 

The second part reminds me that the implicit protection of the virtqueue
and SCSI data structures by having all accesses in a single thread is
hard to review and I think we wanted to put some assertions there to
check that we're really running in the right thread. I don't think we
have done that so far, so I suppose after this patch would be the place
in the series to add them, before we remove the protection by the
AioContext lock?

Kevin




Re: [PATCH 02/12] tests: remove aio_context_acquire() tests

2023-12-04 Thread Kevin Wolf
Am 29.11.2023 um 20:55 hat Stefan Hajnoczi geschrieben:
> The aio_context_acquire() API is being removed. Drop the test case that
> calls the API.
> 
> Signed-off-by: Stefan Hajnoczi 

Reviewed-by: Kevin Wolf 




Re: [PATCH 03/12] aio: make aio_context_acquire()/aio_context_release() a no-op

2023-12-04 Thread Kevin Wolf
Am 29.11.2023 um 20:55 hat Stefan Hajnoczi geschrieben:
> aio_context_acquire()/aio_context_release() has been replaced by
> fine-grained locking to protect state shared by multiple threads. The
> AioContext lock still plays the role of balancing locking in
> AIO_WAIT_WHILE() and many functions in QEMU either require that the
> AioContext lock is held or not held for this reason. In other words, the
> AioContext lock is purely there for consistency with itself and serves
> no real purpose anymore.
> 
> Stop actually acquiring/releasing the lock in
> aio_context_acquire()/aio_context_release() so that subsequent patches
> can remove callers across the codebase incrementally.
> 
> I have performed "make check" and qemu-iotests stress tests across
> x86-64, ppc64le, and aarch64 to confirm that there are no failures as a
> result of eliminating the lock.
> 
> Signed-off-by: Stefan Hajnoczi 

YOLO.

Acked-by: Kevin Wolf 




Re: [PATCH 04/12] graph-lock: remove AioContext locking

2023-12-04 Thread Kevin Wolf
Am 29.11.2023 um 20:55 hat Stefan Hajnoczi geschrieben:
> Stop acquiring/releasing the AioContext lock in
> bdrv_graph_wrlock()/bdrv_graph_unlock() since the lock no longer has any
> effect.
> 
> The distinction between bdrv_graph_wrunlock() and
> bdrv_graph_wrunlock_ctx() becomes meaningless and they can be collapsed
> into one function.
> 
> Signed-off-by: Stefan Hajnoczi 

Reviewed-by: Kevin Wolf 




Re: [PATCH v2 6/7] gitlab: build the correct microblaze target

2023-12-04 Thread Thomas Huth

On 04/12/2023 13.48, Cédric Le Goater wrote:

On 12/4/23 13:43, Thomas Huth wrote:

On 04/12/2023 13.40, Alex Bennée wrote:

Thomas Huth  writes:


On 01/12/2023 10.36, Alex Bennée wrote:

We inadvertently built the LE target for BE tests.
Fixes: 78ebc00b06 (gitlab: shuffle some targets and reduce avocado
noise)
Signed-off-by: Alex Bennée 
---
   .gitlab-ci.d/buildtest.yml | 2 +-
   1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.gitlab-ci.d/buildtest.yml b/.gitlab-ci.d/buildtest.yml
index 7f9af83b10..62b5379a5e 100644
--- a/.gitlab-ci.d/buildtest.yml
+++ b/.gitlab-ci.d/buildtest.yml
@@ -41,7 +41,7 @@ build-system-ubuntu:
 variables:
   IMAGE: ubuntu2204
   CONFIGURE_ARGS: --enable-docs
-    TARGETS: alpha-softmmu microblazeel-softmmu mips64el-softmmu
+    TARGETS: alpha-softmmu microblaze-softmmu mips64el-softmmu
   MAKE_CHECK_ARGS: check-build
 check-system-ubuntu:


We've got microblazeel-softmmu here and microblaze-softmmu in the
build-system-fedora job. So please don't change the ubuntu job here,
otherwise we're building the same target twice instead.


Hmm - what would be really useful is an actual microblazeel test image
so we can test what we build.


We've got at least a small test in tests/qtest/boot-serial-test.c, so we 
know that at least some instructions can be executed via TCG.


There are 2 configs under buildroot, qemu_microblazebe_mmu_defconfig and
qemu_microblazeel_mmu_defconfig, we could possibly use.


I already used the big endian config for going shopping for ballerinas 
(which we already use in one of the avocado tests):


 http://www.qemu-advent-calendar.org/2018/#day-17

And as I just learnt today, the QEMU advent calendar opens its doors again 
in 2023:


 http://www.qemu-advent-calendar.org/2023/

Let's see, maybe we can have a nice surprise for microblazeel this time...

 Thomas




Re: [PATCH 2/2] linux-user: Fix openat() emulation to not modify atime

2023-12-04 Thread Philippe Mathieu-Daudé

Hi Laurent, Helge, Richard,

On 1/12/23 19:51, Shu-Chun Weng wrote:
On Fri, Dec 1, 2023 at 4:42 AM Philippe Mathieu-Daudé > wrote:


Hi Shu-Chun,

On 1/12/23 04:21, Shu-Chun Weng wrote:
 > Commit b8002058 strengthened openat()'s /proc detection by calling
 > realpath(3) on the given path, which allows various paths and
symlinks
 > that points to the /proc file system to be intercepted correctly.
 >
 > Using realpath(3), though, has a side effect that it reads the
symlinks
 > along the way, and thus changes their atime. The results in the
 > following code snippet already get ~now instead of the real atime:
 >
 >    int fd = open("/path/to/a/symlink", O_PATH | O_NOFOLLOW);
 >    struct stat st;
 >    fstat(fd, st);
 >    return st.st_atime;
 >
 > This change opens a path that doesn't appear to be part of /proc
 > directly and checks the destination of /proc/self/fd/n to
determine if
 > it actually refers to a file in /proc.
 >
 > Neither this nor the existing code works with symlinks or
indirect paths
 > (e.g.  /tmp/../proc/self/exe) that points to /proc/self/exe
because it
 > is itself a symlink, and both realpath(3) and /proc/self/fd/n will
 > resolve into the location of QEMU.

Does this fix any of the following issues?
https://gitlab.com/qemu-project/qemu/-/issues/829



Not this one -- this is purely in the logic of util/path.c, which we do 
see and carry an internal patch. It's quite a behavior change so we 
never upstreamed it.


https://gitlab.com/qemu-project/qemu/-/issues/927



No, either. This patch only touches the path handling, not how files are 
opened.


https://gitlab.com/qemu-project/qemu/-/issues/2004



Yes! Though I don't have a toolchain for HPPA or any of the 
architectures intercepting /proc/cpuinfo handy, I hacked the condition 
and confirmed that on 7.1 and 8.2, test.c as attached in the bug prints 
out the host cpuinfo while with this patch, it prints out the content 
generated by `open_cpuinfo()`.




 > Signed-off-by: Shu-Chun Weng mailto:s...@google.com>>


Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2004 



Do we need to merge this for 8.2?



 > ---
 >   linux-user/syscall.c | 42
+-
 >   1 file changed, 33 insertions(+), 9 deletions(-)


On Fri, Dec 1, 2023 at 9:09 AM Helge Deller > wrote:


On 12/1/23 04:21, Shu-Chun Weng wrote:
 > Commit b8002058 strengthened openat()'s /proc detection by calling
 > realpath(3) on the given path, which allows various paths and
symlinks
 > that points to the /proc file system to be intercepted correctly.
 >
 > Using realpath(3), though, has a side effect that it reads the
symlinks
 > along the way, and thus changes their atime.

Ah, ok. I didn't thought of that side effect when I came up with the
patch.
Does the updated atimes trigger some real case issue ?


We have an internal library shimming the underlying filesystem that uses 
the `open(O_PATH|O_NOFOLLOW)`+`fstat()` pattern for all file stats. 
Checking symlink atime is in one of the unittests, though I don't know 
if production ever uses it.



Helge






Re: [PATCH-for-8.2 v2] tests/avocado: Update yamon-bin-02.22.zip URL

2023-12-04 Thread Thomas Huth

On 01/12/2023 21.56, Philippe Mathieu-Daudé wrote:

http://www.imgtec.com/tools/mips-tools/downloads/ redirects
to https://mips.com/downloads/yamon-version-02-22/ then points
to an invalid path to a s3 bucket. Use the correct path. The
site will eventually be fixed.

Signed-off-by: Philippe Mathieu-Daudé 
---
Supersedes: <20231201142139.39816-1-phi...@linaro.org>
Cc: Mark Cave-Ayland 
Cc: BALATON Zoltan 
---
  tests/avocado/machine_mips_malta.py | 5 +++--
  1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/avocado/machine_mips_malta.py 
b/tests/avocado/machine_mips_malta.py
index 9bd54518bf..8e1ee120f4 100644
--- a/tests/avocado/machine_mips_malta.py
+++ b/tests/avocado/machine_mips_malta.py
@@ -124,8 +124,9 @@ def test_mips_malta_i6400_framebuffer_logo_8cores(self):
  class MaltaMachine(QemuSystemTest):
  
  def do_test_yamon(self):

-rom_url = ('http://www.imgtec.com/tools/mips-tools/downloads/'
-   'yamon/yamon-bin-02.22.zip')
+rom_url = ('https://s3-eu-west-1.amazonaws.com/'
+   'downloads-mips/mips-downloads/'
+   'YAMON/yamon-bin-02.22.zip')
  rom_hash = '8da7ecddbc5312704b8b324341ee238189bde480'
  zip_path = self.fetch_asset(rom_url, asset_hash=rom_hash)
  


Reviewed-by: Thomas Huth 




Re: [PATCH] hostmem: Round up memory size for qemu_madvise() in host_memory_backend_memory_complete()

2023-12-04 Thread Michal Prívozník
On 12/4/23 10:21, David Hildenbrand wrote:
> On 01.12.23 10:07, Michal Prívozník wrote:
>> On 11/27/23 14:55, David Hildenbrand wrote:
>>> On 27.11.23 14:37, David Hildenbrand wrote:
 On 27.11.23 13:32, Michal Privoznik wrote:
> Simple reproducer:
> qemu.git $ ./build/qemu-system-x86_64 \
> -m size=8389632k,slots=16,maxmem=2560k \
> -object
> '{"qom-type":"memory-backend-file","id":"ram-node0","mem-path":"/hugepages2M/","prealloc":true,"size":8590983168,"host-nodes":[0],"policy":"bind"}'
>  \
> -numa node,nodeid=0,cpus=0,memdev=ram-node0
>
> With current master I get:
>
> qemu-system-x86_64: cannot bind memory to host NUMA nodes: Invalid
> argument
>
> The problem is that memory size (8193MiB) is not an integer
> multiple of underlying pagesize (2MiB) which triggers a check
> inside of madvise(), since we can't really set a madvise() policy
> just to a fraction of a page.

 I thought we would just always fail create something that doesn't
 really
 make any sense.

 Why would we want to support that case?

 Let me dig, I thought we would have had some check there at some point
 that would make that fail (especially: RAM block not aligned to the
 pagesize).
>>>
>>>
>>> At least memory-backend-memfd properly fails for that case:
>>>
>>> $ ./build/qemu-system-x86_64 -object
>>> memory-backend-memfd,hugetlb=on,size=3m,id=tmp
>>> qemu-system-x86_64: failed to resize memfd to 3145728: Invalid argument
>>>
>>> memory-backend-file ends up creating a new file:
>>>
>>>   $ ./build/qemu-system-x86_64 -object
>>> memory-backend-file,share=on,mem-path=/dev/hugepages/tmp,size=3m,id=tmp
>>>
>>> $ stat /dev/hugepages/tmp
>>>    File: /dev/hugepages/tmp
>>>    Size: 4194304 Blocks: 0  IO Block: 2097152 regular
>>> file
>>>
>>> ... and ends up sizing it properly aligned to the huge page size.
>>>
>>>
>>> Seems to be due to:
>>>
>>>  if (memory < block->page_size) {
>>>  error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be
>>> equal to "
>>>     "or larger than page size 0x%zx",
>>>     memory, block->page_size);
>>>  return NULL;
>>>  }
>>>
>>>  memory = ROUND_UP(memory, block->page_size);
>>>
>>>  /*
>>>   * ftruncate is not supported by hugetlbfs in older
>>>   * hosts, so don't bother bailing out on errors.
>>>   * If anything goes wrong with it under other filesystems,
>>>   * mmap will fail.
>>>   *
>>>   * Do not truncate the non-empty backend file to avoid corrupting
>>>   * the existing data in the file. Disabling shrinking is not
>>>   * enough. For example, the current vNVDIMM implementation stores
>>>   * the guest NVDIMM labels at the end of the backend file. If the
>>>   * backend file is later extended, QEMU will not be able to find
>>>   * those labels. Therefore, extending the non-empty backend file
>>>   * is disabled as well.
>>>   */
>>>  if (truncate && ftruncate(fd, offset + memory)) {
>>>  perror("ftruncate");
>>>  }
>>>
>>> So we create a bigger file and map the bigger file and also have a
>>> RAMBlock that is bigger. So we'll also consume more memory.
>>>
>>> ... but the memory region is smaller and we tell the VM that it has
>>> less memory. Lot of work with no obvious benefit, and only some
>>> memory waste :)
>>>
>>>
>>> We better should have just rejected such memory backends right from
>>> the start. But now it's likely too late.
>>>
>>> I suspect other things like
>>>   * qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE);
>>>   * qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP);
>>>
>>> fail, but we don't care for hugetlb at least regarding merging
>>> and don't even log an error.
>>>
>>> But QEMU_MADV_DONTDUMP might also be broken, because that
>>> qemu_madvise() call will just fail.
>>>
>>> Your fix would be correct. But I do wonder if we want to just let that
>>> case fail and warn users that they are doing something that doesn't
>>> make too much sense.
>>>
>>
>> Yeah, what's suspicious is: if the size is smaller than page size we
>> error out, but if it's larger (but still not aligned) we accept that.
>> I'm failing to see reasoning there. Looks like the ROUND_UP() was added
>> in v0.13.0-rc0~1201^2~4 (though it's done with some bit magic) and the
>> check itself was added in v2.8.0-rc0~30^2~23. So it's a bit late, yes.
> 
> Yeah.
> 
>>
>> OTOH - if users want to waste resources, should we stop them? For
> 
> It's all inconsistent, including memfd handling or what you noted above.
> 
> For example, Having a 1025 MiB guest on gigantic pages, consuming 2 GiB
> really is just absolutely stupid.
> 
> Likely the user wants to know about such mistakes instead of making QEMU
> silence all side effects of that. :)

Agreed. As I said, consistency should win here.

> 
> 
>> instance, when user requests more vCPUs than physical CPUs a warning is
>> pri

[PULL 1/3] tests/qemu-iotests/149: Use more inclusive language in this test

2023-12-04 Thread Thomas Huth
Let's use 'unsupported_configs' and 'tested_configs' here
instead of non-inclusive words.

Message-ID: <20231122084000.809696-1-th...@redhat.com>
Reviewed-by: "Daniel P. Berrangé" 
Signed-off-by: Thomas Huth 
---
 tests/qemu-iotests/149 | 16 +---
 tests/qemu-iotests/149.out |  8 
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/tests/qemu-iotests/149 b/tests/qemu-iotests/149
index 2ae318f16f..c13343d7ef 100755
--- a/tests/qemu-iotests/149
+++ b/tests/qemu-iotests/149
@@ -518,7 +518,7 @@ configs = [
 
 ]
 
-blacklist = [
+unsupported_configs = [
 # We don't have a cast-6 cipher impl for QEMU yet
 "cast6-256-xts-plain64-sha1",
 "cast6-128-xts-plain64-sha1",
@@ -528,17 +528,19 @@ blacklist = [
 "twofish-192-xts-plain64-sha1",
 ]
 
-whitelist = []
+# Optionally test only the configurations in the LUKS_CONFIG
+# environment variable
+tested_configs = None
 if "LUKS_CONFIG" in os.environ:
-whitelist = os.environ["LUKS_CONFIG"].split(",")
+tested_configs = os.environ["LUKS_CONFIG"].split(",")
 
 for config in configs:
-if config.name in blacklist:
-iotests.log("Skipping %s in blacklist" % config.name)
+if config.name in unsupported_configs:
+iotests.log("Skipping %s (config not supported)" % config.name)
 continue
 
-if len(whitelist) > 0 and config.name not in whitelist:
-iotests.log("Skipping %s not in whitelist" % config.name)
+if tested_configs is not None and config.name not in tested_configs:
+iotests.log("Skipping %s (by user request)" % config.name)
 continue
 
 test_once(config, qemu_img=False)
diff --git a/tests/qemu-iotests/149.out b/tests/qemu-iotests/149.out
index 2cc5b82f7c..72ca847159 100644
--- a/tests/qemu-iotests/149.out
+++ b/tests/qemu-iotests/149.out
@@ -470,7 +470,7 @@ sudo cryptsetup -q -v luksClose 
qiotest-145-cast5-128-cbc-plain64-sha1
 # Delete image
 unlink TEST_DIR/luks-cast5-128-cbc-plain64-sha1.img
 
-Skipping cast6-256-xts-plain64-sha1 in blacklist
+Skipping cast6-256-xts-plain64-sha1 (config not supported)
 # = dm-crypt aes-256-cbc-plain-sha1 =
 # Create image
 truncate TEST_DIR/luks-aes-256-cbc-plain-sha1.img --size 4194304MB
@@ -1297,7 +1297,7 @@ sudo cryptsetup -q -v luksClose 
qiotest-145-twofish-128-xts-plain64-sha1
 # Delete image
 unlink TEST_DIR/luks-twofish-128-xts-plain64-sha1.img
 
-Skipping twofish-192-xts-plain64-sha1 in blacklist
+Skipping twofish-192-xts-plain64-sha1 (config not supported)
 # = dm-crypt serpent-128-xts-plain64-sha1 =
 # Create image
 truncate TEST_DIR/luks-serpent-128-xts-plain64-sha1.img --size 4194304MB
@@ -1534,8 +1534,8 @@ sudo cryptsetup -q -v luksClose 
qiotest-145-serpent-192-xts-plain64-sha1
 # Delete image
 unlink TEST_DIR/luks-serpent-192-xts-plain64-sha1.img
 
-Skipping cast6-128-xts-plain64-sha1 in blacklist
-Skipping cast6-192-xts-plain64-sha1 in blacklist
+Skipping cast6-128-xts-plain64-sha1 (config not supported)
+Skipping cast6-192-xts-plain64-sha1 (config not supported)
 # = dm-crypt aes-256-xts-plain64-sha224 =
 # Create image
 truncate TEST_DIR/luks-aes-256-xts-plain64-sha224.img --size 4194304MB
-- 
2.42.0




[PULL 0/3] Misc patches for QEMU 8.2-rc3

2023-12-04 Thread Thomas Huth
The following changes since commit 29b5d70cb70574b499517ec9e9f80dea496a3cc0:

  Merge tag 'pull-ppc-for-8.2-20231130' of https://gitlab.com/npiggin/qemu into 
staging (2023-12-01 07:29:52 -0500)

are available in the Git repository at:

  https://gitlab.com/thuth/qemu.git tags/pull-request-2023-12-04

for you to fetch changes up to 4d98618b8a657f1ce361d90e0eade759af912b98:

  tests/qtest: check the return value (2023-12-04 15:12:57 +0100)


* Fix wording in iotest 149
* Fix whitespace issues in sh4 code (ignore checkpatch.pl warnings here)
* Make sure to check return values in qtests


Thomas Huth (1):
  tests/qemu-iotests/149: Use more inclusive language in this test

Yihuan Pan (1):
  sh4: Coding style: Remove tabs

Zhu Jun (1):
  tests/qtest: check the return value

 linux-user/sh4/termbits.h|  204 ++---
 target/sh4/cpu.h |   80 +-
 target/sh4/helper.c  |  236 +++---
 target/sh4/op_helper.c   |   70 +-
 target/sh4/translate.c   | 1466 +-
 tests/qtest/test-filter-mirror.c |1 +
 tests/qtest/test-filter-redirector.c |2 +
 tests/qtest/virtio-net-test.c|1 +
 tests/qemu-iotests/149   |   16 +-
 tests/qemu-iotests/149.out   |8 +-
 10 files changed, 1045 insertions(+), 1039 deletions(-)




[PULL 3/3] tests/qtest: check the return value

2023-12-04 Thread Thomas Huth
From: Zhu Jun 

These variables "ret" are never referenced in the code, thus
add check logic for the "ret"

Signed-off-by: Zhu Jun 
Reviewed-by: Thomas Huth 
Message-ID: <20231121080802.4500-1-zhuj...@cmss.chinamobile.com>
Signed-off-by: Thomas Huth 
---
 tests/qtest/test-filter-mirror.c | 1 +
 tests/qtest/test-filter-redirector.c | 2 ++
 tests/qtest/virtio-net-test.c| 1 +
 3 files changed, 4 insertions(+)

diff --git a/tests/qtest/test-filter-mirror.c b/tests/qtest/test-filter-mirror.c
index adeada3eb8..f3865f7519 100644
--- a/tests/qtest/test-filter-mirror.c
+++ b/tests/qtest/test-filter-mirror.c
@@ -61,6 +61,7 @@ static void test_mirror(void)
 g_assert_cmpint(len, ==, sizeof(send_buf));
 recv_buf = g_malloc(len);
 ret = recv(recv_sock[0], recv_buf, len, 0);
+g_assert_cmpint(ret, ==, len);
 g_assert_cmpstr(recv_buf, ==, send_buf);
 
 g_free(recv_buf);
diff --git a/tests/qtest/test-filter-redirector.c 
b/tests/qtest/test-filter-redirector.c
index e72e3b7873..a77d5fd8ec 100644
--- a/tests/qtest/test-filter-redirector.c
+++ b/tests/qtest/test-filter-redirector.c
@@ -118,6 +118,7 @@ static void test_redirector_tx(void)
 g_assert_cmpint(len, ==, sizeof(send_buf));
 recv_buf = g_malloc(len);
 ret = recv(recv_sock, recv_buf, len, 0);
+g_assert_cmpint(ret, ==, len);
 g_assert_cmpstr(recv_buf, ==, send_buf);
 
 g_free(recv_buf);
@@ -185,6 +186,7 @@ static void test_redirector_rx(void)
 g_assert_cmpint(len, ==, sizeof(send_buf));
 recv_buf = g_malloc(len);
 ret = recv(backend_sock[0], recv_buf, len, 0);
+g_assert_cmpint(ret, ==, len);
 g_assert_cmpstr(recv_buf, ==, send_buf);
 
 close(send_sock);
diff --git a/tests/qtest/virtio-net-test.c b/tests/qtest/virtio-net-test.c
index fab5dd8b05..2df75c9780 100644
--- a/tests/qtest/virtio-net-test.c
+++ b/tests/qtest/virtio-net-test.c
@@ -91,6 +91,7 @@ static void tx_test(QVirtioDevice *dev,
 len = ntohl(len);
 
 ret = recv(socket, buffer, len, 0);
+g_assert_cmpint(ret, ==, len);
 g_assert_cmpstr(buffer, ==, "TEST");
 }
 
-- 
2.42.0




[PULL 2/3] sh4: Coding style: Remove tabs

2023-12-04 Thread Thomas Huth
From: Yihuan Pan 

Replaces TABS with spaces to ensure have a consistent coding
style with an indentation of 4 spaces in the SH4 subsystem.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/376
Signed-off-by: Yihuan Pan 
Reviewed-by: Thomas Huth 
Message-ID: <20231124044554.513752-1-xun...@gmail.com>
Signed-off-by: Thomas Huth 
---
 linux-user/sh4/termbits.h |  204 +++---
 target/sh4/cpu.h  |   80 +-
 target/sh4/helper.c   |  236 +++---
 target/sh4/op_helper.c|   70 +-
 target/sh4/translate.c| 1466 ++---
 5 files changed, 1028 insertions(+), 1028 deletions(-)

diff --git a/linux-user/sh4/termbits.h b/linux-user/sh4/termbits.h
index eeabd2d7a9..28e79f2c9a 100644
--- a/linux-user/sh4/termbits.h
+++ b/linux-user/sh4/termbits.h
@@ -39,86 +39,86 @@ struct target_termios {
 #define TARGET_VEOL2 16
 
 /* c_iflag bits */
-#define TARGET_IGNBRK  001
-#define TARGET_BRKINT  002
-#define TARGET_IGNPAR  004
-#define TARGET_PARMRK  010
-#define TARGET_INPCK   020
-#define TARGET_ISTRIP  040
-#define TARGET_INLCR   100
-#define TARGET_IGNCR   200
-#define TARGET_ICRNL   400
-#define TARGET_IUCLC   0001000
-#define TARGET_IXON0002000
-#define TARGET_IXANY   0004000
-#define TARGET_IXOFF   001
-#define TARGET_IMAXBEL 002
-#define TARGET_IUTF8   004
+#define TARGET_IGNBRK  001
+#define TARGET_BRKINT  002
+#define TARGET_IGNPAR  004
+#define TARGET_PARMRK  010
+#define TARGET_INPCK   020
+#define TARGET_ISTRIP  040
+#define TARGET_INLCR   100
+#define TARGET_IGNCR   200
+#define TARGET_ICRNL   400
+#define TARGET_IUCLC   0001000
+#define TARGET_IXON0002000
+#define TARGET_IXANY   0004000
+#define TARGET_IXOFF   001
+#define TARGET_IMAXBEL 002
+#define TARGET_IUTF8   004
 
 /* c_oflag bits */
-#define TARGET_OPOST   001
-#define TARGET_OLCUC   002
-#define TARGET_ONLCR   004
-#define TARGET_OCRNL   010
-#define TARGET_ONOCR   020
-#define TARGET_ONLRET  040
-#define TARGET_OFILL   100
-#define TARGET_OFDEL   200
-#define TARGET_NLDLY   400
-#define TARGET_NL0 000
-#define TARGET_NL1 400
-#define TARGET_CRDLY   0003000
-#define TARGET_CR0 000
-#define TARGET_CR1 0001000
-#define TARGET_CR2 0002000
-#define TARGET_CR3 0003000
-#define TARGET_TABDLY  0014000
-#define TARGET_TAB0000
-#define TARGET_TAB10004000
-#define TARGET_TAB2001
-#define TARGET_TAB30014000
-#define TARGET_XTABS   0014000
-#define TARGET_BSDLY   002
-#define TARGET_BS0 000
-#define TARGET_BS1 002
-#define TARGET_VTDLY   004
-#define TARGET_VT0 000
-#define TARGET_VT1 004
-#define TARGET_FFDLY   010
-#define TARGET_FF0 000
-#define TARGET_FF1 010
+#define TARGET_OPOST   001
+#define TARGET_OLCUC   002
+#define TARGET_ONLCR   004
+#define TARGET_OCRNL   010
+#define TARGET_ONOCR   020
+#define TARGET_ONLRET  040
+#define TARGET_OFILL   100
+#define TARGET_OFDEL   200
+#define TARGET_NLDLY   400
+#define TARGET_NL0 000
+#define TARGET_NL1 400
+#define TARGET_CRDLY   0003000
+#define TARGET_CR0 000
+#define TARGET_CR1 0001000
+#define TARGET_CR2 0002000
+#define TARGET_CR3 0003000
+#define TARGET_TABDLY  0014000
+#define TARGET_TAB0000
+#define TARGET_TAB10004000
+#define TARGET_TAB2001
+#define TARGET_TAB30014000
+#define TARGET_XTABS   0014000
+#define TARGET_BSDLY   002
+#define TARGET_BS0 000
+#define TARGET_BS1 002
+#define TARGET_VTDLY   004
+#define TARGET_VT0 000
+#define TARGET_VT1 004
+#define TARGET_FFDLY   010
+#define TARGET_FF0 000
+#define TARGET_FF1 010
 
 /* c_cflag bit meaning */
-#define TARGET_CBAUD   0010017
-#define TARGET_B0  000 /* hang up */
-#define TARGET_B50 001
-#define TARGET_B75 002
-#define TARGET_B110003
-#define TARGET_B134004
-#define TARGET_B150005
-#define TARGET_B200006
-#define TARGET_B300007
-#define TARGET_B600010
-#define TARGET_B1200   011
-#define TARGET_B1800   012
-#define TARGET_B2400   013
-#define TARGET_B4800   014
-#define TARGET_B9600   015
-#define TARGET_B19200  016
-#define TARGET_B38400  017
+#define TARGET_CBAUD   0010017
+#define TARGET_B0  000 /* hang up */
+#define TARGET_B50 001
+#define TARGET_B75 002
+#define TARGET_B110003
+#define TARGET_B134004
+#define TARGET_B150005
+#define TARGET_B200006
+#define TARGET_B300007
+#define TARGET_B600010
+#define TARGET_B1200   011
+#define TARGET_B1800   012
+#define TARGET_B2400   013
+#define TARGET_B4800   014
+#define TARGET_B9600   015
+#define TARGET_B19200  016
+#define TARGET_B38400  

[PATCH] qemu: send stop event after bdrv_flush_all

2023-12-04 Thread tianren
From: Tianren Zhang 

The stop process is not finished until bdrv_flush_all
is done. Some users (e.g., libvirt) detects the STOP
event and invokes some lock release logic to revoke
the disk lock held by current qemu when such event is
emitted. In such case, if the bdrv_flush_all is after
the stop event, it's possible that the disk lock is
released while the qemu is still waiting for I/O.
Therefore, it's better to have the stop event generated
after the whole stop process is done.

Change-Id: Ia2f95cd55edfdeb71ee2e04005ac216cfabffa22
Signed-off-by: Tianren Zhang 
---
 system/cpus.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/system/cpus.c b/system/cpus.c
index a444a747f0..b0421f3e22 100644
--- a/system/cpus.c
+++ b/system/cpus.c
@@ -268,15 +268,16 @@ static int do_vm_stop(RunState state, bool send_stop)
 cpu_disable_ticks();
 pause_all_vcpus();
 vm_state_notify(0, state);
-if (send_stop) {
-qapi_event_send_stop();
-}
 }
 
 bdrv_drain_all();
 ret = bdrv_flush_all();
 trace_vm_stop_flush_all(ret);
 
+if (runstate_is_running() && send_stop) {
+qapi_event_send_stop();
+}
+
 return ret;
 }
 
-- 
2.38.1




[PULL 0/1] target-arm queue

2023-12-04 Thread Peter Maydell
Just one fix for rc3. Technically this isn't a regression since
8.1, but it is a small change that avoids the user being able
to select an oddball combination of CPU features that currently
QEMU will assert on.

thanks
-- PMM

The following changes since commit 29b5d70cb70574b499517ec9e9f80dea496a3cc0:

  Merge tag 'pull-ppc-for-8.2-20231130' of https://gitlab.com/npiggin/qemu into 
staging (2023-12-01 07:29:52 -0500)

are available in the Git repository at:

  https://git.linaro.org/people/pmaydell/qemu-arm.git 
tags/pull-target-arm-20231204-1

for you to fetch changes up to f7767ca301796334f74b9b642b395a4bd3e3dbac:

  target/arm: Disable SME if SVE is disabled (2023-12-04 13:34:16 +)


target-arm queue:
 * Turn off SME if SVE is turned off (this combination doesn't
   currently work and QEMU will assert if you try it)


Peter Maydell (1):
  target/arm: Disable SME if SVE is disabled

 target/arm/cpu.c | 10 ++
 1 file changed, 10 insertions(+)



[PULL 1/1] target/arm: Disable SME if SVE is disabled

2023-12-04 Thread Peter Maydell
There is no architectural requirement that SME implies SVE, but
our implementation currently assumes it. (FEAT_SME_FA64 does
imply SVE.) So if you try to run a CPU with eg "-cpu max,sve=off"
you quickly run into an assert when the guest tries to write to
SMCR_EL1:

#6  0x74b38e96 in __GI___assert_fail
(assertion=0x566e69cb "sm", file=0x566e5b24 
"../../target/arm/helper.c", line=6865, function=0x566e82f0 
<__PRETTY_FUNCTION__.31> "sve_vqm1_for_el_sm") at ./assert/assert.c:101
#7  0x55ee33aa in sve_vqm1_for_el_sm (env=0x57d291f0, el=2, 
sm=false) at ../../target/arm/helper.c:6865
#8  0x55ee3407 in sve_vqm1_for_el (env=0x57d291f0, el=2) at 
../../target/arm/helper.c:6871
#9  0x55ee3724 in smcr_write (env=0x57d291f0, ri=0x57da23b0, 
value=2147483663) at ../../target/arm/helper.c:6995
#10 0x55fd1dba in helper_set_cp_reg64 (env=0x57d291f0, 
rip=0x57da23b0, value=2147483663) at ../../target/arm/tcg/op_helper.c:839
#11 0x7fff60056781 in code_gen_buffer ()

Avoid this unsupported and slightly odd combination by
disabling SME when SVE is not present.

Cc: qemu-sta...@nongnu.org
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2005
Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Message-id: 20231127173318.674758-1-peter.mayd...@linaro.org
---
 target/arm/cpu.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 25e9d2ae7b8..efb22a87f9e 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -1743,6 +1743,16 @@ void arm_cpu_finalize_features(ARMCPU *cpu, Error **errp)
 return;
 }
 
+/*
+ * FEAT_SME is not architecturally dependent on FEAT_SVE (unless
+ * FEAT_SME_FA64 is present). However our implementation currently
+ * assumes it, so if the user asked for sve=off then turn off SME also.
+ * (KVM doesn't currently support SME at all.)
+ */
+if (cpu_isar_feature(aa64_sme, cpu) && !cpu_isar_feature(aa64_sve, 
cpu)) {
+object_property_set_bool(OBJECT(cpu), "sme", false, &error_abort);
+}
+
 arm_cpu_sme_finalize(cpu, &local_err);
 if (local_err != NULL) {
 error_propagate(errp, local_err);
-- 
2.34.1




Re: [PATCH 05/12] block: remove AioContext locking

2023-12-04 Thread Kevin Wolf
Am 29.11.2023 um 20:55 hat Stefan Hajnoczi geschrieben:
> This is the big patch that removes
> aio_context_acquire()/aio_context_release() from the block layer and
> affected block layer users.
> 
> There isn't a clean way to split this patch and the reviewers are likely
> the same group of people, so I decided to do it in one patch.
> 
> Signed-off-by: Stefan Hajnoczi 

> @@ -7585,29 +7433,12 @@ void coroutine_fn bdrv_co_leave(BlockDriverState *bs, 
> AioContext *old_ctx)
>  
>  void coroutine_fn bdrv_co_lock(BlockDriverState *bs)
>  {
> -AioContext *ctx = bdrv_get_aio_context(bs);
> -
> -/* In the main thread, bs->aio_context won't change concurrently */
> -assert(qemu_get_current_aio_context() == qemu_get_aio_context());
> -
> -/*
> - * We're in coroutine context, so we already hold the lock of the main
> - * loop AioContext. Don't lock it twice to avoid deadlocks.
> - */
> -assert(qemu_in_coroutine());
> -if (ctx != qemu_get_aio_context()) {
> -aio_context_acquire(ctx);
> -}
> +/* TODO removed in next patch */
>  }

It's still there at the end of the series.

>  void coroutine_fn bdrv_co_unlock(BlockDriverState *bs)
>  {
> -AioContext *ctx = bdrv_get_aio_context(bs);
> -
> -assert(qemu_in_coroutine());
> -if (ctx != qemu_get_aio_context()) {
> -aio_context_release(ctx);
> -}
> +/* TODO removed in next patch */
>  }

This one, too.

Reviewed-by: Kevin Wolf 




Re: [PULL 0/1] Migration 20231201 patches

2023-12-04 Thread Stefan Hajnoczi
Applied, thanks.

Please update the changelog at https://wiki.qemu.org/ChangeLog/8.2 for any 
user-visible changes.


signature.asc
Description: PGP signature


Re: [PULL 00/15] virtio,pc,pci: fixes

2023-12-04 Thread Stefan Hajnoczi
Applied, thanks.

Please update the changelog at https://wiki.qemu.org/ChangeLog/8.2 for any 
user-visible changes.


signature.asc
Description: PGP signature


Re: [PULL 0/3] UI patches

2023-12-04 Thread Stefan Hajnoczi
Applied, thanks.

Please update the changelog at https://wiki.qemu.org/ChangeLog/8.2 for any 
user-visible changes.


signature.asc
Description: PGP signature


Re: [PULL v2 0/5] gdbstub, avocado and gitlab updates

2023-12-04 Thread Stefan Hajnoczi
Applied, thanks.

Please update the changelog at https://wiki.qemu.org/ChangeLog/8.2 for any 
user-visible changes.


signature.asc
Description: PGP signature


Re: [PATCH-for-8.2 v2] tests/avocado: Update yamon-bin-02.22.zip URL

2023-12-04 Thread Stefan Hajnoczi
On Fri, 1 Dec 2023 at 15:56, Philippe Mathieu-Daudé  wrote:
>
> http://www.imgtec.com/tools/mips-tools/downloads/ redirects
> to https://mips.com/downloads/yamon-version-02-22/ then points
> to an invalid path to a s3 bucket. Use the correct path. The
> site will eventually be fixed.
>
> Signed-off-by: Philippe Mathieu-Daudé 
> ---
> Supersedes: <20231201142139.39816-1-phi...@linaro.org>
> Cc: Mark Cave-Ayland 
> Cc: BALATON Zoltan 
> ---
>  tests/avocado/machine_mips_malta.py | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)

Applied, thanks!

Stefan

>
> diff --git a/tests/avocado/machine_mips_malta.py 
> b/tests/avocado/machine_mips_malta.py
> index 9bd54518bf..8e1ee120f4 100644
> --- a/tests/avocado/machine_mips_malta.py
> +++ b/tests/avocado/machine_mips_malta.py
> @@ -124,8 +124,9 @@ def test_mips_malta_i6400_framebuffer_logo_8cores(self):
>  class MaltaMachine(QemuSystemTest):
>
>  def do_test_yamon(self):
> -rom_url = ('http://www.imgtec.com/tools/mips-tools/downloads/'
> -   'yamon/yamon-bin-02.22.zip')
> +rom_url = ('https://s3-eu-west-1.amazonaws.com/'
> +   'downloads-mips/mips-downloads/'
> +   'YAMON/yamon-bin-02.22.zip')
>  rom_hash = '8da7ecddbc5312704b8b324341ee238189bde480'
>  zip_path = self.fetch_asset(rom_url, asset_hash=rom_hash)
>
> --
> 2.41.0
>



Re: [PATCH 01/12] virtio-scsi: replace AioContext lock with tmf_bh_lock

2023-12-04 Thread Stefan Hajnoczi
On Thu, Nov 30, 2023 at 09:25:52AM -0600, Eric Blake wrote:
> On Wed, Nov 29, 2023 at 02:55:42PM -0500, Stefan Hajnoczi wrote:
> > Protect the Task Management Function BH state with a lock. The TMF BH
> > runs in the main loop thread. An IOThread might process a TMF at the
> > same time as the TMF BH is running. Therefore tmf_bh_list and tmf_bh
> > must be protected by a lock.
> > 
> > Run TMF request completion in the IOThread using aio_wait_bh_oneshot().
> > This avoids more locking to protect the virtqueue and SCSI layer state.
> 
> Are we trying to get this into 8.2?

No. 8.2 still has the AioContext lock is therefore safe without this
patch.

Stefan

> 
> > 
> > Signed-off-by: Stefan Hajnoczi 
> > ---
> >  include/hw/virtio/virtio-scsi.h |  3 +-
> >  hw/scsi/virtio-scsi.c   | 62 ++---
> >  2 files changed, 43 insertions(+), 22 deletions(-)
> >
> 
> Reviewed-by: Eric Blake 
> 
> -- 
> Eric Blake, Principal Software Engineer
> Red Hat, Inc.
> Virtualization:  qemu.org | libguestfs.org
> 


signature.asc
Description: PGP signature


Re: [PATCH v4 3/6] xen: decouple generic xen code from legacy backends codebase

2023-12-04 Thread Paul Durrant

On 02/12/2023 01:41, Volodymyr Babchuk wrote:

In xen-all.c there are unneeded dependencies on xen-legacy-backend.c:

  - xen_init() uses xen_pv_printf() to report errors, but it does not
  provide a pointer to the struct XenLegacyDevice, so it is kind of
  useless, we can use standard error_report() instead.

  - xen-all.c has function xenstore_record_dm_state() which uses global
  variable "xenstore" defined and initialized in xen-legacy-backend.c
  It is used exactly once, so we can just open a new connection to the
  xenstore, update DM state and close connection back.

Those two changes allows us to remove xen-legacy-backend.c at all,
what should be done in the future anyways. But right now this patch
moves us one step close to have QEMU build without legacy Xen
backends.

Signed-off-by: Volodymyr Babchuk 

---

In v4:

  - New in v4, previous was part of "xen: add option to disable legacy
  backends"
  - Do not move xenstore global variable from xen-legacy-backend.c,
instead use a local variable.
---
  accel/xen/xen-all.c | 16 
  1 file changed, 12 insertions(+), 4 deletions(-)



Reviewed-by: Paul Durrant 




Re: [PATCH 01/12] virtio-scsi: replace AioContext lock with tmf_bh_lock

2023-12-04 Thread Stefan Hajnoczi
On Mon, Dec 04, 2023 at 01:46:13PM +0100, Kevin Wolf wrote:
> Am 29.11.2023 um 20:55 hat Stefan Hajnoczi geschrieben:
> > Protect the Task Management Function BH state with a lock. The TMF BH
> > runs in the main loop thread. An IOThread might process a TMF at the
> > same time as the TMF BH is running. Therefore tmf_bh_list and tmf_bh
> > must be protected by a lock.
> > 
> > Run TMF request completion in the IOThread using aio_wait_bh_oneshot().
> > This avoids more locking to protect the virtqueue and SCSI layer state.
> > 
> > Signed-off-by: Stefan Hajnoczi 
> 
> The second part reminds me that the implicit protection of the virtqueue
> and SCSI data structures by having all accesses in a single thread is
> hard to review and I think we wanted to put some assertions there to
> check that we're really running in the right thread. I don't think we
> have done that so far, so I suppose after this patch would be the place
> in the series to add them, before we remove the protection by the
> AioContext lock?

Thanks for reminding me. I will add assertions in the next revision of
this series.

Stefan


signature.asc
Description: PGP signature


Re: [PATCH 05/12] block: remove AioContext locking

2023-12-04 Thread Stefan Hajnoczi
On Mon, Dec 04, 2023 at 03:33:57PM +0100, Kevin Wolf wrote:
> Am 29.11.2023 um 20:55 hat Stefan Hajnoczi geschrieben:
> > This is the big patch that removes
> > aio_context_acquire()/aio_context_release() from the block layer and
> > affected block layer users.
> > 
> > There isn't a clean way to split this patch and the reviewers are likely
> > the same group of people, so I decided to do it in one patch.
> > 
> > Signed-off-by: Stefan Hajnoczi 
> 
> > @@ -7585,29 +7433,12 @@ void coroutine_fn bdrv_co_leave(BlockDriverState 
> > *bs, AioContext *old_ctx)
> >  
> >  void coroutine_fn bdrv_co_lock(BlockDriverState *bs)
> >  {
> > -AioContext *ctx = bdrv_get_aio_context(bs);
> > -
> > -/* In the main thread, bs->aio_context won't change concurrently */
> > -assert(qemu_get_current_aio_context() == qemu_get_aio_context());
> > -
> > -/*
> > - * We're in coroutine context, so we already hold the lock of the main
> > - * loop AioContext. Don't lock it twice to avoid deadlocks.
> > - */
> > -assert(qemu_in_coroutine());
> > -if (ctx != qemu_get_aio_context()) {
> > -aio_context_acquire(ctx);
> > -}
> > +/* TODO removed in next patch */
> >  }
> 
> It's still there at the end of the series.

Will fix in v2. Thanks!


signature.asc
Description: PGP signature


[PULL 0/4] Misc fixes for 2023-12-04

2023-12-04 Thread Philippe Mathieu-Daudé
The following changes since commit 17dacf7ac9e2f076c15f32a290203f8f571a8800:

  Merge tag 'pull-more-8.2-fixes-011223-2' of https://gitlab.com/stsquad/qemu 
into staging (2023-12-04 08:03:18 -0500)

are available in the Git repository at:

  https://github.com/philmd/qemu.git tags/misc-fixes-20231204

for you to fetch changes up to 2e8ed6a970e1842528f34aeb36b387834205c53a:

  tests/avocado: mark ReplayKernelNormal.test_mips64el_malta as flaky 
(2023-12-04 16:21:00 +0100)


Misc fixes for 8.2

- memory: Avoid unaligned accesses (Patrick)
- target/riscv: Fix variable shadowing (Daniel)
- tests/avocado: Update URL, skip flaky test (Alex, Phil)



Alex Bennée (1):
  tests/avocado: mark ReplayKernelNormal.test_mips64el_malta as flaky

Daniel Henrique Barboza (1):
  target/riscv/kvm: fix shadowing in kvm_riscv_(get|put)_regs_csr

Patrick Venture (1):
  system/memory: use ldn_he_p/stn_he_p

Philippe Mathieu-Daudé (1):
  tests/avocado: Update yamon-bin-02.22.zip URL

 system/memory.c | 32 ++---
 target/riscv/kvm/kvm-cpu.c  | 19 -
 tests/avocado/machine_mips_malta.py |  5 +++--
 tests/avocado/replay_kernel.py  |  3 +++
 4 files changed, 17 insertions(+), 42 deletions(-)

-- 
2.41.0




[PULL 2/4] target/riscv/kvm: fix shadowing in kvm_riscv_(get|put)_regs_csr

2023-12-04 Thread Philippe Mathieu-Daudé
From: Daniel Henrique Barboza 

KVM_RISCV_GET_CSR() and KVM_RISCV_SET_CSR() use an 'int ret' variable
that is used to do an early 'return' if ret > 0. Both are being called
in functions that are also declaring a 'ret' integer, initialized with
'0', and this integer is used as return of the function.

The result is that the compiler is less than pleased and is pointing
shadowing errors:

../target/riscv/kvm/kvm-cpu.c: In function 'kvm_riscv_get_regs_csr':
../target/riscv/kvm/kvm-cpu.c:90:13: error: declaration of 'ret' shadows a 
previous local [-Werror=shadow=compatible-local]
   90 | int ret = kvm_get_one_reg(cs, RISCV_CSR_REG(env, csr), ®); \
  | ^~~
../target/riscv/kvm/kvm-cpu.c:539:5: note: in expansion of macro 
'KVM_RISCV_GET_CSR'
  539 | KVM_RISCV_GET_CSR(cs, env, sstatus, env->mstatus);
  | ^
../target/riscv/kvm/kvm-cpu.c:536:9: note: shadowed declaration is here
  536 | int ret = 0;
  | ^~~

../target/riscv/kvm/kvm-cpu.c: In function 'kvm_riscv_put_regs_csr':
../target/riscv/kvm/kvm-cpu.c:98:13: error: declaration of 'ret' shadows a 
previous local [-Werror=shadow=compatible-local]
   98 | int ret = kvm_set_one_reg(cs, RISCV_CSR_REG(env, csr), ®); \
  | ^~~
../target/riscv/kvm/kvm-cpu.c:556:5: note: in expansion of macro 
'KVM_RISCV_SET_CSR'
  556 | KVM_RISCV_SET_CSR(cs, env, sstatus, env->mstatus);
  | ^
../target/riscv/kvm/kvm-cpu.c:553:9: note: shadowed declaration is here
  553 | int ret = 0;
  | ^~~

The macros are doing early returns for non-zero returns and the local
'ret' variable for both functions is used just to do 'return 0', so
remove them from kvm_riscv_get_regs_csr() and kvm_riscv_put_regs_csr()
and do a straight 'return 0' in the end.

For good measure let's also rename the 'ret' variables in
KVM_RISCV_GET_CSR() and KVM_RISCV_SET_CSR() to '_ret' to make them more
resilient to these kind of errors.

Fixes: 937f0b4512 ("target/riscv: Implement kvm_arch_get_registers")
Signed-off-by: Daniel Henrique Barboza 
Reviewed-by: Philippe Mathieu-Daudé 
Tested-by: Philippe Mathieu-Daudé 
Reviewed-by: Alistair Francis 
Message-ID: <20231123101338.1040134-1-dbarb...@ventanamicro.com>
Signed-off-by: Philippe Mathieu-Daudé 
---
 target/riscv/kvm/kvm-cpu.c | 19 +--
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c
index 78fa1fa162..45b6cf1cfa 100644
--- a/target/riscv/kvm/kvm-cpu.c
+++ b/target/riscv/kvm/kvm-cpu.c
@@ -87,17 +87,17 @@ static uint64_t kvm_riscv_reg_id(CPURISCVState *env, 
uint64_t type,
 
 #define KVM_RISCV_GET_CSR(cs, env, csr, reg) \
 do { \
-int ret = kvm_get_one_reg(cs, RISCV_CSR_REG(env, csr), ®); \
-if (ret) { \
-return ret; \
+int _ret = kvm_get_one_reg(cs, RISCV_CSR_REG(env, csr), ®); \
+if (_ret) { \
+return _ret; \
 } \
 } while (0)
 
 #define KVM_RISCV_SET_CSR(cs, env, csr, reg) \
 do { \
-int ret = kvm_set_one_reg(cs, RISCV_CSR_REG(env, csr), ®); \
-if (ret) { \
-return ret; \
+int _ret = kvm_set_one_reg(cs, RISCV_CSR_REG(env, csr), ®); \
+if (_ret) { \
+return _ret; \
 } \
 } while (0)
 
@@ -533,7 +533,6 @@ static int kvm_riscv_put_regs_core(CPUState *cs)
 
 static int kvm_riscv_get_regs_csr(CPUState *cs)
 {
-int ret = 0;
 CPURISCVState *env = &RISCV_CPU(cs)->env;
 
 KVM_RISCV_GET_CSR(cs, env, sstatus, env->mstatus);
@@ -545,12 +544,12 @@ static int kvm_riscv_get_regs_csr(CPUState *cs)
 KVM_RISCV_GET_CSR(cs, env, stval, env->stval);
 KVM_RISCV_GET_CSR(cs, env, sip, env->mip);
 KVM_RISCV_GET_CSR(cs, env, satp, env->satp);
-return ret;
+
+return 0;
 }
 
 static int kvm_riscv_put_regs_csr(CPUState *cs)
 {
-int ret = 0;
 CPURISCVState *env = &RISCV_CPU(cs)->env;
 
 KVM_RISCV_SET_CSR(cs, env, sstatus, env->mstatus);
@@ -563,7 +562,7 @@ static int kvm_riscv_put_regs_csr(CPUState *cs)
 KVM_RISCV_SET_CSR(cs, env, sip, env->mip);
 KVM_RISCV_SET_CSR(cs, env, satp, env->satp);
 
-return ret;
+return 0;
 }
 
 static int kvm_riscv_get_regs_fp(CPUState *cs)
-- 
2.41.0




[PULL 3/4] tests/avocado: Update yamon-bin-02.22.zip URL

2023-12-04 Thread Philippe Mathieu-Daudé
http://www.imgtec.com/tools/mips-tools/downloads/ redirects
to https://mips.com/downloads/yamon-version-02-22/ then points
to an invalid path to a s3 bucket. Use the correct path. The
site will eventually be fixed.

Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Thomas Huth 
Message-Id: <20231201205630.10837-1-phi...@linaro.org>
---
 tests/avocado/machine_mips_malta.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/avocado/machine_mips_malta.py 
b/tests/avocado/machine_mips_malta.py
index 99bee49e9a..8cf84bd805 100644
--- a/tests/avocado/machine_mips_malta.py
+++ b/tests/avocado/machine_mips_malta.py
@@ -128,8 +128,9 @@ def test_mips_malta_i6400_framebuffer_logo_8cores(self):
 class MaltaMachine(QemuSystemTest):
 
 def do_test_yamon(self):
-rom_url = ('http://www.imgtec.com/tools/mips-tools/downloads/'
-   'yamon/yamon-bin-02.22.zip')
+rom_url = ('https://s3-eu-west-1.amazonaws.com/'
+   'downloads-mips/mips-downloads/'
+   'YAMON/yamon-bin-02.22.zip')
 rom_hash = '8da7ecddbc5312704b8b324341ee238189bde480'
 zip_path = self.fetch_asset(rom_url, asset_hash=rom_hash)
 
-- 
2.41.0




Re: [RFC PATCH] tests/avocado: mark ReplayKernelNormal.test_mips64el_malta as flaky

2023-12-04 Thread Philippe Mathieu-Daudé

On 1/12/23 21:10, Alex Bennée wrote:

I missed this when going through the recent failure logs. I can run
the test 30 times without failure locally but it seems to hang pretty
reliably on GitLab's CI infra-structure.

Signed-off-by: Alex Bennée 
Cc: Philippe Mathieu-Daudé 
---
  tests/avocado/replay_kernel.py | 3 +++
  1 file changed, 3 insertions(+)


Thanks, queued in misc-fixes.



[PULL 4/4] tests/avocado: mark ReplayKernelNormal.test_mips64el_malta as flaky

2023-12-04 Thread Philippe Mathieu-Daudé
From: Alex Bennée 

I missed this when going through the recent failure logs. I can run
the test 30 times without failure locally but it seems to hang pretty
reliably on GitLab's CI infra-structure.

Cc: Philippe Mathieu-Daudé 
Signed-off-by: Alex Bennée 
Reviewed-by: Philippe Mathieu-Daudé 
Message-ID: <20231201201027.2689404-1-alex.ben...@linaro.org>
Signed-off-by: Philippe Mathieu-Daudé 
---
 tests/avocado/replay_kernel.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/avocado/replay_kernel.py b/tests/avocado/replay_kernel.py
index af086eab08..c37afa662c 100644
--- a/tests/avocado/replay_kernel.py
+++ b/tests/avocado/replay_kernel.py
@@ -119,6 +119,8 @@ def test_mips_malta(self):
 
 self.run_rr(kernel_path, kernel_command_line, console_pattern, shift=5)
 
+# See https://gitlab.com/qemu-project/qemu/-/issues/2013
+@skipUnless(os.getenv('QEMU_TEST_FLAKY_TESTS'), 'Test is unstable on 
GitLab')
 def test_mips64el_malta(self):
 """
 This test requires the ar tool to extract "data.tar.gz" from
@@ -134,6 +136,7 @@ def test_mips64el_malta(self):
 
 :avocado: tags=arch:mips64el
 :avocado: tags=machine:malta
+:avocado: tags=flaky
 """
 deb_url = ('http://snapshot.debian.org/archive/debian/'
'20130217T032700Z/pool/main/l/linux-2.6/'
-- 
2.41.0




[PULL 1/4] system/memory: use ldn_he_p/stn_he_p

2023-12-04 Thread Philippe Mathieu-Daudé
From: Patrick Venture 

Using direct pointer dereferencing can allow for unaligned accesses,
which was seen during execution with sanitizers enabled.

Cc: qemu-sta...@nongnu.org
Reviewed-by: Chris Rauer 
Reviewed-by: Peter Foley 
Signed-off-by: Patrick Venture 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: David Hildenbrand 
Message-ID: <20231116163633.276671-1-vent...@google.com>
Signed-off-by: Philippe Mathieu-Daudé 
---
 system/memory.c | 32 ++--
 1 file changed, 2 insertions(+), 30 deletions(-)

diff --git a/system/memory.c b/system/memory.c
index 4d9cb0a7ff..798b6c0a17 100644
--- a/system/memory.c
+++ b/system/memory.c
@@ -1339,22 +1339,7 @@ static uint64_t memory_region_ram_device_read(void 
*opaque,
   hwaddr addr, unsigned size)
 {
 MemoryRegion *mr = opaque;
-uint64_t data = (uint64_t)~0;
-
-switch (size) {
-case 1:
-data = *(uint8_t *)(mr->ram_block->host + addr);
-break;
-case 2:
-data = *(uint16_t *)(mr->ram_block->host + addr);
-break;
-case 4:
-data = *(uint32_t *)(mr->ram_block->host + addr);
-break;
-case 8:
-data = *(uint64_t *)(mr->ram_block->host + addr);
-break;
-}
+uint64_t data = ldn_he_p(mr->ram_block->host + addr, size);
 
 trace_memory_region_ram_device_read(get_cpu_index(), mr, addr, data, size);
 
@@ -1368,20 +1353,7 @@ static void memory_region_ram_device_write(void *opaque, 
hwaddr addr,
 
 trace_memory_region_ram_device_write(get_cpu_index(), mr, addr, data, 
size);
 
-switch (size) {
-case 1:
-*(uint8_t *)(mr->ram_block->host + addr) = (uint8_t)data;
-break;
-case 2:
-*(uint16_t *)(mr->ram_block->host + addr) = (uint16_t)data;
-break;
-case 4:
-*(uint32_t *)(mr->ram_block->host + addr) = (uint32_t)data;
-break;
-case 8:
-*(uint64_t *)(mr->ram_block->host + addr) = data;
-break;
-}
+stn_he_p(mr->ram_block->host + addr, size, data);
 }
 
 static const MemoryRegionOps ram_device_mem_ops = {
-- 
2.41.0




Re: [PATCH 05/12] block: remove AioContext locking

2023-12-04 Thread Stefan Hajnoczi
On Thu, Nov 30, 2023 at 03:31:37PM -0600, Eric Blake wrote:
> On Wed, Nov 29, 2023 at 02:55:46PM -0500, Stefan Hajnoczi wrote:
> > This is the big patch that removes
> > aio_context_acquire()/aio_context_release() from the block layer and
> > affected block layer users.
> > 
> > There isn't a clean way to split this patch and the reviewers are likely
> > the same group of people, so I decided to do it in one patch.
> > 
> > Signed-off-by: Stefan Hajnoczi 
> > ---
> 
> > +++ b/block.c
> > @@ -7585,29 +7433,12 @@ void coroutine_fn bdrv_co_leave(BlockDriverState 
> > *bs, AioContext *old_ctx)
> >  
> >  void coroutine_fn bdrv_co_lock(BlockDriverState *bs)
> >  {
> > -AioContext *ctx = bdrv_get_aio_context(bs);
> > -
> > -/* In the main thread, bs->aio_context won't change concurrently */
> > -assert(qemu_get_current_aio_context() == qemu_get_aio_context());
> > -
> > -/*
> > - * We're in coroutine context, so we already hold the lock of the main
> > - * loop AioContext. Don't lock it twice to avoid deadlocks.
> > - */
> > -assert(qemu_in_coroutine());
> 
> Is this assertion worth keeping in the short term?...

Probably not because coroutine vs non-coroutine functions don't change
in this patch series, so it's unlikely that this will break.

> 
> > -if (ctx != qemu_get_aio_context()) {
> > -aio_context_acquire(ctx);
> > -}
> > +/* TODO removed in next patch */
> >  }
> 
> ...I guess I'll see in the next patch.
> 
> >  
> >  void coroutine_fn bdrv_co_unlock(BlockDriverState *bs)
> >  {
> > -AioContext *ctx = bdrv_get_aio_context(bs);
> > -
> > -assert(qemu_in_coroutine());
> > -if (ctx != qemu_get_aio_context()) {
> > -aio_context_release(ctx);
> > -}
> > +/* TODO removed in next patch */
> >  }
> 
> Same comment.
> 
> > +++ b/blockdev.c
> > @@ -1395,7 +1352,6 @@ static void 
> > external_snapshot_action(TransactionAction *action,
> >  /* File name of the new image (for 'blockdev-snapshot-sync') */
> >  const char *new_image_file;
> >  ExternalSnapshotState *state = g_new0(ExternalSnapshotState, 1);
> > -AioContext *aio_context;
> >  uint64_t perm, shared;
> >  
> >  /* TODO We'll eventually have to take a writer lock in this function */
> 
> I'm guessing removal of the locking gets us one step closer to
> implementing this TODO at a later time?  Or is it now a stale comment?
> Either way, it doesn't affect this patch.

I'm not sure. Kevin can answer questions about the graph lock.

> > +++ b/tests/unit/test-blockjob.c
> 
> > -static void test_complete_in_standby(void)
> > -{
> 
> > @@ -531,13 +402,5 @@ int main(int argc, char **argv)
> >  g_test_add_func("/blockjob/cancel/standby", test_cancel_standby);
> >  g_test_add_func("/blockjob/cancel/pending", test_cancel_pending);
> >  g_test_add_func("/blockjob/cancel/concluded", test_cancel_concluded);
> > -
> > -/*
> > - * This test is flaky and sometimes fails in CI and otherwise:
> > - * don't run unless user opts in via environment variable.
> > - */
> > -if (getenv("QEMU_TEST_FLAKY_TESTS")) {
> > -g_test_add_func("/blockjob/complete_in_standby", 
> > test_complete_in_standby);
> > -}
> 
> Looks like you ripped out this entire test, because it is no longer
> viable.  I might have mentioned it in the commit message, or squashed
> the removal of this test into the earlier 02/12 patch.

I have sent a separate patch to remove this test and once it's merged
this hunk will disappear this patch series:
https://lore.kernel.org/qemu-devel/20231127170210.422728-1-stefa...@redhat.com/

Stefan


signature.asc
Description: PGP signature


Re: [PATCH 06/12] scsi: remove AioContext locking

2023-12-04 Thread Stefan Hajnoczi
On Mon, Dec 04, 2023 at 01:23:09PM +0100, Kevin Wolf wrote:
> Am 29.11.2023 um 20:55 hat Stefan Hajnoczi geschrieben:
> > The AioContext lock no longer has any effect. Remove it.
> > 
> > Signed-off-by: Stefan Hajnoczi 
> > ---
> >  include/hw/virtio/virtio-scsi.h | 14 --
> >  hw/scsi/scsi-bus.c  |  2 --
> >  hw/scsi/scsi-disk.c | 28 
> >  hw/scsi/virtio-scsi.c   | 18 --
> >  4 files changed, 4 insertions(+), 58 deletions(-)
> 
> > @@ -2531,13 +2527,11 @@ static void scsi_unrealize(SCSIDevice *dev)
> >  static void scsi_hd_realize(SCSIDevice *dev, Error **errp)
> >  {
> >  SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
> > -AioContext *ctx = NULL;
> > +
> >  /* can happen for devices without drive. The error message for missing
> >   * backend will be issued in scsi_realize
> >   */
> >  if (s->qdev.conf.blk) {
> > -ctx = blk_get_aio_context(s->qdev.conf.blk);
> > -aio_context_acquire(ctx);
> >  if (!blkconf_blocksizes(&s->qdev.conf, errp)) {
> >  goto out;
> >  }
> > @@ -2549,15 +2543,11 @@ static void scsi_hd_realize(SCSIDevice *dev, Error 
> > **errp)
> >  }
> >  scsi_realize(&s->qdev, errp);
> >  out:
> > -if (ctx) {
> > -aio_context_release(ctx);
> > -}
> >  }
> 
> This doesn't build for me:
> 
> ../hw/scsi/scsi-disk.c:2545:1: error: label at end of compound statement is a 
> C2x extension [-Werror,-Wc2x-extensions]
> }
> ^
> 1 error generated.

Will fix in v2. Thanks!

Stefan


signature.asc
Description: PGP signature


Re: [PATCH 2/2] linux-user: Fix openat() emulation to not modify atime

2023-12-04 Thread Stefan Hajnoczi
On Mon, Dec 04, 2023 at 02:39:24PM +0100, Philippe Mathieu-Daudé wrote:
> Hi Laurent, Helge, Richard,
> 
> On 1/12/23 19:51, Shu-Chun Weng wrote:
> > On Fri, Dec 1, 2023 at 4:42 AM Philippe Mathieu-Daudé  > > wrote:
> > 
> > Hi Shu-Chun,
> > 
> > On 1/12/23 04:21, Shu-Chun Weng wrote:
> >  > Commit b8002058 strengthened openat()'s /proc detection by calling
> >  > realpath(3) on the given path, which allows various paths and
> > symlinks
> >  > that points to the /proc file system to be intercepted correctly.
> >  >
> >  > Using realpath(3), though, has a side effect that it reads the
> > symlinks
> >  > along the way, and thus changes their atime. The results in the
> >  > following code snippet already get ~now instead of the real atime:
> >  >
> >  >    int fd = open("/path/to/a/symlink", O_PATH | O_NOFOLLOW);
> >  >    struct stat st;
> >  >    fstat(fd, st);
> >  >    return st.st_atime;
> >  >
> >  > This change opens a path that doesn't appear to be part of /proc
> >  > directly and checks the destination of /proc/self/fd/n to
> > determine if
> >  > it actually refers to a file in /proc.
> >  >
> >  > Neither this nor the existing code works with symlinks or
> > indirect paths
> >  > (e.g.  /tmp/../proc/self/exe) that points to /proc/self/exe
> > because it
> >  > is itself a symlink, and both realpath(3) and /proc/self/fd/n will
> >  > resolve into the location of QEMU.
> > 
> > Does this fix any of the following issues?
> > https://gitlab.com/qemu-project/qemu/-/issues/829
> > 
> > 
> > 
> > Not this one -- this is purely in the logic of util/path.c, which we do
> > see and carry an internal patch. It's quite a behavior change so we
> > never upstreamed it.
> > 
> > https://gitlab.com/qemu-project/qemu/-/issues/927
> > 
> > 
> > 
> > No, either. This patch only touches the path handling, not how files are
> > opened.
> > 
> > https://gitlab.com/qemu-project/qemu/-/issues/2004
> > 
> > 
> > 
> > Yes! Though I don't have a toolchain for HPPA or any of the
> > architectures intercepting /proc/cpuinfo handy, I hacked the condition
> > and confirmed that on 7.1 and 8.2, test.c as attached in the bug prints
> > out the host cpuinfo while with this patch, it prints out the content
> > generated by `open_cpuinfo()`.
> > 
> > 
> > 
> >  > Signed-off-by: Shu-Chun Weng  > >
> > 
> > 
> > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2004
> > 
> 
> Do we need to merge this for 8.2?

Please assign release blocker issues to the 8.2 milestone so that are
tracked:
https://gitlab.com/qemu-project/qemu/-/milestones/10

Thanks,
Stefan

> 
> > 
> >  > ---
> >  >   linux-user/syscall.c | 42
> > +-
> >  >   1 file changed, 33 insertions(+), 9 deletions(-)
> > 
> > 
> > On Fri, Dec 1, 2023 at 9:09 AM Helge Deller  > > wrote:
> > 
> > On 12/1/23 04:21, Shu-Chun Weng wrote:
> >  > Commit b8002058 strengthened openat()'s /proc detection by calling
> >  > realpath(3) on the given path, which allows various paths and
> > symlinks
> >  > that points to the /proc file system to be intercepted correctly.
> >  >
> >  > Using realpath(3), though, has a side effect that it reads the
> > symlinks
> >  > along the way, and thus changes their atime.
> > 
> > Ah, ok. I didn't thought of that side effect when I came up with the
> > patch.
> > Does the updated atimes trigger some real case issue ?
> > 
> > 
> > We have an internal library shimming the underlying filesystem that uses
> > the `open(O_PATH|O_NOFOLLOW)`+`fstat()` pattern for all file stats.
> > Checking symlink atime is in one of the unittests, though I don't know
> > if production ever uses it.
> > 
> > 
> > Helge
> > 
> 


signature.asc
Description: PGP signature


Re: [PATCH 2/4] virtio-scsi: don't lock AioContext around virtio_queue_aio_attach_host_notifier()

2023-12-04 Thread Stefan Hajnoczi
On Mon, Nov 27, 2023 at 09:21:08AM -0600, Eric Blake wrote:
> On Thu, Nov 23, 2023 at 02:49:29PM -0500, Stefan Hajnoczi wrote:
> > virtio_queue_aio_attach_host_notifier() does not require the AioContext
> > lock. Stop taking the lock and remember add an explicit smp_wmb()
> 
> s/remember// ?

Will fix, thanks!

Stefan


signature.asc
Description: PGP signature


[RFC 0/8] Support generic Luks encryption

2023-12-04 Thread Hyman Huang
This functionality was motivated by the following to-do list seen
in crypto documents:
https://wiki.qemu.org/Features/Block/Crypto 

The last chapter says we should "separate header volume": 

The LUKS format has ability to store the header in a separate volume
from the payload. We should extend the LUKS driver in QEMU to support
this use case.

As a proof-of-concept, I've created this patchset, which I've named
the Gluks: generic luks. As their name suggests, they offer encryption
for any format that QEMU theoretically supports.

As you can see below, the Gluks format block layer driver's design is
quite simple.

 virtio-blk/vhost-user-blk...(front-end device)
  ^
  |
 Gluks   (format-like disk node) 
  / \ 
   file   header (blockdev reference)
/ \
 filefile (protocol node)
   |   |
   disk data   Luks data 

We don't need to create a new disk format in order to use the Gluks
to encrypt the disk; all we need to do is construct a Luks header, which
we will refer to as the "Gluk" because it only contains Luks header data
and no user data. The creation command, for instance, is nearly
identical to Luks image:

$ qemu-img create --object secret,id=sec0,data=abc123 -f gluks
  -o cipher-alg=aes-256,cipher-mode=xts -o key-secret=sec0
  cipher.gluks

As previously mentioned, the "size" option is not accepted during the
generation of the Gluks format because it only contains the Luks header
data.

To hot-add a raw disk with Gluks encryption, see the following steps:

1. add a protocol blockdev node of data disk 
$ virsh qemu-monitor-command vm '{"execute":"blockdev-add",
  "arguments":{"node-name": "libvirt-1-storage", "driver": "file",
  "filename": "/path/to/test_disk.raw"}}'

2. add a protocol blockdev node of Luks header
$ virsh qemu-monitor-command vm '{"execute":"blockdev-add",
  "arguments":{"node-name": "libvirt-2-storage", "driver": "file",
  "filename": "/path/to/cipher.gluks" }}'

3. add the secret for decrypting the cipher stored in Gluks header
$ virsh qemu-monitor-command c81_node1 '{"execute":"object-add",
  "arguments":{"qom-type": "secret", "id":
  "libvirt-2-storage-secret0", "data": "abc123"}}'

4. add the Gluks-drived blockdev to connect the user disk with Luks
   header, QEMU will use the cipher in the Luks header to
   encrypt/decrypt the disk data
$ virsh qemu-monitor-command vm '{"execute":"blockdev-add",
  "arguments":{"node-name": "libvirt-1-format", "driver": "gluks", "file":
  "libvirt-1-storage", "header": "libvirt-2-storage", "key-secret":
  "libvirt-2-storage-secret0"}}' 

5. add the device finally
$ virsh qemu-monitor-command vm '{"execute":"device_add",
  "arguments": {"num-queues": "1", "driver": "virtio-blk-pci", "scsi":
  "off", "drive": "libvirt-1-format", "id": "virtio-disk1"}}'

Do the reverse to hot-del the raw disk.

To hot-add a qcow2 disk with Gluks encryption:

1. add a protocol blockdev node of data disk
$ virsh qemu-monitor-command vm '{"execute":"blockdev-add",
  "arguments":{"node-name": "libvirt-1-storage", "driver": "file",
  "filename": "/path/to/test_disk.qcow2"}}'

2. add a protocol blockdev node of Luks header as above.
   block ref: libvirt-2-storage

3. add the secret for decrypting the cipher stored in Gluks header as
   above too 
   secret ref: libvirt-2-storage-secret0

4. add the qcow2-drived blockdev format node:
$ virsh qemu-monitor-command vm '{"execute":"blockdev-add",
  "arguments":{"node-name": "libvirt-1-format", "driver": "qcow2",
  "file": "libvirt-1-storage"}}'

5. add the Gluks-drived blockdev to connect the qcow2 disk with Luks
   header 
$ virsh qemu-monitor-command vm '{"execute":"blockdev-add",
  "arguments":{"node-name": "libvirt-2-format", "driver": "gluks",
  "file": "libvirt-1-format", "header": "libvirt-2-storage",
  "key-secret": "libvirt-2-format-secret0"}}'

6. add the device finally
$ virsh qemu-monitor-command vm '{"execute":"device_add",
  "arguments": {"num-queues": "1", "driver": "virtio-blk-pci", "scsi":
  "off", "drive": "libvirt-2-format", "id": "virtio-disk2"}}'

In a virtual machine, several disk nodes are allowed to share a single
Gluks header.

This patchset, as previously said, is a proof-of-concept; additional
work may be required before productization. As the title suggests, we
have uploaded it solely for comments. Additionally, a thorough test
would be performed on the following version.

Any ideas and comments about this feature would be appreciated.

Thanks,

Yong

Best regared !

Hyman Huang (8):
  crypto: Export util functions and structures
  crypto: Introduce payload offset set function
  Gluks: Add the basic framework
  Gluks: Introduce Gluks options
  qapi: Introduce Gluks types to qapi
  crypto: Provide the Luks crypto driver to Gluks
  Gluks: Implement the fundamental block layer driver hooks.
  block: Support Gluks format image creation using 

[RFC 7/8] Gluks: Implement the fundamental block layer driver hooks

2023-12-04 Thread Hyman Huang
Signed-off-by: Hyman Huang 
---
 block/generic-luks.c | 104 ++-
 1 file changed, 102 insertions(+), 2 deletions(-)

diff --git a/block/generic-luks.c b/block/generic-luks.c
index ebc0365d40..32cbedc86f 100644
--- a/block/generic-luks.c
+++ b/block/generic-luks.c
@@ -23,8 +23,14 @@
 #include "qemu/osdep.h"
 
 #include "block/block_int.h"
+#include "block/block-io.h"
 #include "block/crypto.h"
+#include "block/qdict.h"
 #include "crypto/block.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/module.h"
+#include "qemu/option.h"
 
 #include "generic-luks.h"
 
@@ -50,10 +56,89 @@ static QemuOptsList gluks_create_opts_luks = {
 },
 };
 
+static int gluks_read_func(QCryptoBlock *block,
+   size_t offset,
+   uint8_t *buf,
+   size_t buflen,
+   void *opaque,
+   Error **errp)
+{
+
+BlockDriverState *bs = opaque;
+BDRVGLUKSState *s = bs->opaque;
+ssize_t ret;
+
+GLOBAL_STATE_CODE();
+GRAPH_RDLOCK_GUARD_MAINLOOP();
+
+ret = bdrv_pread(s->header, offset, buflen, buf, 0);
+if (ret < 0) {
+error_setg_errno(errp, -ret, "Could not read generic luks header");
+return ret;
+}
+return 0;
+}
+
 static int gluks_open(BlockDriverState *bs, QDict *options, int flags,
   Error **errp)
 {
-return 0;
+BDRVGLUKSState *s = bs->opaque;
+QemuOpts *opts = NULL;
+QCryptoBlockOpenOptions *open_opts = NULL;
+QDict *cryptoopts = NULL;
+unsigned int cflags = 0;
+int ret;
+
+GLOBAL_STATE_CODE();
+
+if (!bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
+ (BDRV_CHILD_DATA | BDRV_CHILD_PRIMARY), false, errp)) 
{
+return -EINVAL;
+}
+s->header = bdrv_open_child(NULL, options, "header", bs,
+&child_of_bds, BDRV_CHILD_METADATA, false,
+errp);
+if (!s->header) {
+return -EINVAL;
+}
+
+GRAPH_RDLOCK_GUARD_MAINLOOP();
+
+opts = qemu_opts_create(&block_crypto_runtime_opts_luks,
+NULL, 0, &error_abort);
+if (!qemu_opts_absorb_qdict(opts, options, errp)) {
+ret = -EINVAL;
+goto cleanup;
+}
+
+cryptoopts = qemu_opts_to_qdict(opts, NULL);
+qdict_put_str(cryptoopts, "format",
+QCryptoBlockFormat_str(Q_CRYPTO_BLOCK_FORMAT_GLUKS));
+
+open_opts = block_crypto_open_opts_init(cryptoopts, errp);
+if (!open_opts) {
+goto cleanup;
+}
+
+s->crypto.block = qcrypto_block_open(open_opts, NULL,
+ gluks_read_func,
+ bs,
+ cflags,
+ 1,
+ errp);
+if (!s->crypto.block) {
+ret = -EIO;
+goto cleanup;
+}
+
+s->header_size = qcrypto_block_get_payload_offset(s->crypto.block);
+qcrypto_block_set_payload_offset(s->crypto.block, 0);
+
+ret = 0;
+ cleanup:
+qobject_unref(cryptoopts);
+qapi_free_QCryptoBlockOpenOptions(open_opts);
+return ret;
 }
 
 static int coroutine_fn GRAPH_UNLOCKED
@@ -70,13 +155,24 @@ gluks_child_perms(BlockDriverState *bs, BdrvChild *c,
   uint64_t perm, uint64_t shared,
   uint64_t *nperm, uint64_t *nshared)
 {
+if (role & BDRV_CHILD_METADATA) {
+/* assign read permission only */
+perm |= BLK_PERM_CONSISTENT_READ;
+/* share all permissions */
+shared |= BLK_PERM_ALL;
 
+*nperm = perm;
+*nshared = shared;
+return;
+}
+
+bdrv_default_perms(bs, c, role, reopen_queue, perm, shared, nperm, 
nshared);
 }
 
 static int64_t coroutine_fn GRAPH_RDLOCK
 gluks_co_getlength(BlockDriverState *bs)
 {
-return 0;
+return bdrv_co_getlength(bs->file->bs);
 }
 
 static BlockDriver bdrv_generic_luks = {
@@ -87,8 +183,12 @@ static BlockDriver bdrv_generic_luks = {
 .bdrv_child_perm= gluks_child_perms,
 .bdrv_co_getlength  = gluks_co_getlength,
 
+.bdrv_close = block_crypto_close,
+.bdrv_co_preadv = block_crypto_co_preadv,
+.bdrv_co_pwritev= block_crypto_co_pwritev,
 .create_opts= &gluks_create_opts_luks,
 .amend_opts = &block_crypto_amend_opts_luks,
+.is_format  = false,
 };
 
 static void block_generic_luks_init(void)
-- 
2.39.1




[RFC 3/8] Gluks: Add the basic framework

2023-12-04 Thread Hyman Huang
Gluks would be a built-in format in the QEMU block layer.

Signed-off-by: Hyman Huang 
---
 block/generic-luks.c | 81 
 block/generic-luks.h | 26 ++
 block/meson.build|  1 +
 3 files changed, 108 insertions(+)
 create mode 100644 block/generic-luks.c
 create mode 100644 block/generic-luks.h

diff --git a/block/generic-luks.c b/block/generic-luks.c
new file mode 100644
index 00..f23e202991
--- /dev/null
+++ b/block/generic-luks.c
@@ -0,0 +1,81 @@
+/*
+ * QEMU block driver for the generic luks encryption
+ *
+ * Copyright (c) 2024 SmartX Inc
+ *
+ * Author: Hyman Huang 
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ *
+ */
+
+#include "qemu/osdep.h"
+
+#include "block/block_int.h"
+#include "block/crypto.h"
+#include "crypto/block.h"
+
+#include "generic-luks.h"
+
+/* BDRVGLUKSState holds the state of one generic LUKS instance */
+typedef struct BDRVGLUKSState {
+BlockCrypto crypto;
+BdrvChild *header;  /* LUKS header node */
+uint64_t header_size;   /* In bytes */
+} BDRVGLUKSState;
+
+static int gluks_open(BlockDriverState *bs, QDict *options, int flags,
+  Error **errp)
+{
+return 0;
+}
+
+static int coroutine_fn GRAPH_UNLOCKED
+gluks_co_create_opts(BlockDriver *drv, const char *filename,
+ QemuOpts *opts, Error **errp)
+{
+return 0;
+}
+
+static void
+gluks_child_perms(BlockDriverState *bs, BdrvChild *c,
+  const BdrvChildRole role,
+  BlockReopenQueue *reopen_queue,
+  uint64_t perm, uint64_t shared,
+  uint64_t *nperm, uint64_t *nshared)
+{
+
+}
+
+static int64_t coroutine_fn GRAPH_RDLOCK
+gluks_co_getlength(BlockDriverState *bs)
+{
+return 0;
+}
+
+static BlockDriver bdrv_generic_luks = {
+.format_name= "gluks",
+.instance_size  = sizeof(BDRVGLUKSState),
+.bdrv_open  = gluks_open,
+.bdrv_co_create_opts= gluks_co_create_opts,
+.bdrv_child_perm= gluks_child_perms,
+.bdrv_co_getlength  = gluks_co_getlength,
+};
+
+static void block_generic_luks_init(void)
+{
+bdrv_register(&bdrv_generic_luks);
+}
+
+block_init(block_generic_luks_init);
diff --git a/block/generic-luks.h b/block/generic-luks.h
new file mode 100644
index 00..2aae866fa4
--- /dev/null
+++ b/block/generic-luks.h
@@ -0,0 +1,26 @@
+/*
+ * QEMU block driver for the generic luks encryption
+ *
+ * Copyright (c) 2024 SmartX Inc
+ *
+ * Author: Hyman Huang 
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ *
+ */
+
+#ifndef GENERIC_LUKS_H
+#define GENERIC_LUKS_H
+
+#endif /* GENERIC_LUKS_H */
diff --git a/block/meson.build b/block/meson.build
index 59ff6d380c..74f2da7bed 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -39,6 +39,7 @@ block_ss.add(files(
   'throttle.c',
   'throttle-groups.c',
   'write-threshold.c',
+  'generic-luks.c',
 ), zstd, zlib, gnutls)
 
 system_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
-- 
2.39.1




[RFC 8/8] block: Support Gluks format image creation using qemu-img

2023-12-04 Thread Hyman Huang
To create a Gluks header image, use the command as follows:
$ qemu-img create --object secret,id=sec0,data=abc123 -f gluks
> -o cipher-alg=aes-256,cipher-mode=xts -o key-secret=sec0
> cipher.gluks

Signed-off-by: Hyman Huang 
---
 block.c  |  5 +
 block/generic-luks.c | 53 +++-
 2 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/block.c b/block.c
index bfb0861ec6..cc9a517a25 100644
--- a/block.c
+++ b/block.c
@@ -7517,6 +7517,11 @@ void bdrv_img_create(const char *filename, const char 
*fmt,
 goto out;
 }
 
+if (!strcmp(fmt, "gluks")) {
+qemu_opt_set(opts, "size", "0M", &local_err);
+size = 0;
+}
+
 if (size == -1) {
 error_setg(errp, "Image creation needs a size parameter");
 goto out;
diff --git a/block/generic-luks.c b/block/generic-luks.c
index 32cbedc86f..579f01c4b0 100644
--- a/block/generic-luks.c
+++ b/block/generic-luks.c
@@ -145,7 +145,58 @@ static int coroutine_fn GRAPH_UNLOCKED
 gluks_co_create_opts(BlockDriver *drv, const char *filename,
  QemuOpts *opts, Error **errp)
 {
-return 0;
+QCryptoBlockCreateOptions *create_opts = NULL;
+BlockDriverState *bs = NULL;
+QDict *cryptoopts;
+int ret;
+
+if (qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0) != 0) {
+info_report("gluks format image need not size parameter, ignore it");
+}
+
+cryptoopts = qemu_opts_to_qdict_filtered(opts, NULL,
+ &gluks_create_opts_luks,
+ true);
+
+qdict_put_str(cryptoopts, "format",
+QCryptoBlockFormat_str(Q_CRYPTO_BLOCK_FORMAT_GLUKS));
+
+create_opts = block_crypto_create_opts_init(cryptoopts, errp);
+if (!create_opts) {
+ret = -EINVAL;
+goto fail;
+}
+
+/* Create protocol layer */
+ret = bdrv_co_create_file(filename, opts, errp);
+if (ret < 0) {
+goto fail;
+}
+
+bs = bdrv_co_open(filename, NULL, NULL,
+  BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
+if (!bs) {
+ret = -EINVAL;
+goto fail;
+}
+/* Create format layer */
+ret = block_crypto_co_create_generic(bs, 0, create_opts, 0, errp);
+if (ret < 0) {
+goto fail;
+}
+
+ret = 0;
+fail:
+/*
+ * If an error occurred, delete 'filename'. Even if the file existed
+ * beforehand, it has been truncated and corrupted in the process.
+ */
+if (ret) {
+bdrv_graph_co_rdlock();
+bdrv_co_delete_file_noerr(bs);
+bdrv_graph_co_rdunlock();
+}
+return ret;
 }
 
 static void
-- 
2.39.1




[RFC 2/8] crypto: Introduce payload offset set function

2023-12-04 Thread Hyman Huang
Implement the payload offset set function for Gluks.

Signed-off-by: Hyman Huang 
---
 crypto/block.c | 4 
 include/crypto/block.h | 1 +
 2 files changed, 5 insertions(+)

diff --git a/crypto/block.c b/crypto/block.c
index 7bb4b74a37..3dcf22a69f 100644
--- a/crypto/block.c
+++ b/crypto/block.c
@@ -319,6 +319,10 @@ QCryptoHashAlgorithm 
qcrypto_block_get_kdf_hash(QCryptoBlock *block)
 return block->kdfhash;
 }
 
+void qcrypto_block_set_payload_offset(QCryptoBlock *block, uint64_t offset)
+{
+block->payload_offset = offset;
+}
 
 uint64_t qcrypto_block_get_payload_offset(QCryptoBlock *block)
 {
diff --git a/include/crypto/block.h b/include/crypto/block.h
index 4f63a37872..b47a90c529 100644
--- a/include/crypto/block.h
+++ b/include/crypto/block.h
@@ -312,4 +312,5 @@ void qcrypto_block_free(QCryptoBlock *block);
 
 G_DEFINE_AUTOPTR_CLEANUP_FUNC(QCryptoBlock, qcrypto_block_free)
 
+void qcrypto_block_set_payload_offset(QCryptoBlock *block, uint64_t offset);
 #endif /* QCRYPTO_BLOCK_H */
-- 
2.39.1




[RFC 1/8] crypto: Export util functions and structures

2023-12-04 Thread Hyman Huang
Luks driver logic is primarily reused by Gluk, which,
therefore, exports several pre-existing functions and
structures.

Signed-off-by: Hyman Huang 
---
 block/crypto.c | 16 
 block/crypto.h | 23 +++
 2 files changed, 27 insertions(+), 12 deletions(-)

diff --git a/block/crypto.c b/block/crypto.c
index 921933a5e5..6afae1de2e 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -34,14 +34,6 @@
 #include "qemu/memalign.h"
 #include "crypto.h"
 
-typedef struct BlockCrypto BlockCrypto;
-
-struct BlockCrypto {
-QCryptoBlock *block;
-bool updating_keys;
-};
-
-
 static int block_crypto_probe_generic(QCryptoBlockFormat format,
   const uint8_t *buf,
   int buf_size,
@@ -321,7 +313,7 @@ static int block_crypto_open_generic(QCryptoBlockFormat 
format,
 }
 
 
-static int coroutine_fn GRAPH_UNLOCKED
+int coroutine_fn GRAPH_UNLOCKED
 block_crypto_co_create_generic(BlockDriverState *bs, int64_t size,
QCryptoBlockCreateOptions *opts,
PreallocMode prealloc, Error **errp)
@@ -385,7 +377,7 @@ block_crypto_co_truncate(BlockDriverState *bs, int64_t 
offset, bool exact,
 return bdrv_co_truncate(bs->file, offset, exact, prealloc, 0, errp);
 }
 
-static void block_crypto_close(BlockDriverState *bs)
+void block_crypto_close(BlockDriverState *bs)
 {
 BlockCrypto *crypto = bs->opaque;
 qcrypto_block_free(crypto->block);
@@ -404,7 +396,7 @@ static int block_crypto_reopen_prepare(BDRVReopenState 
*state,
  */
 #define BLOCK_CRYPTO_MAX_IO_SIZE (1024 * 1024)
 
-static int coroutine_fn GRAPH_RDLOCK
+int coroutine_fn GRAPH_RDLOCK
 block_crypto_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
@@ -466,7 +458,7 @@ block_crypto_co_preadv(BlockDriverState *bs, int64_t 
offset, int64_t bytes,
 }
 
 
-static int coroutine_fn GRAPH_RDLOCK
+int coroutine_fn GRAPH_RDLOCK
 block_crypto_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
 QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
diff --git a/block/crypto.h b/block/crypto.h
index 72e792c9af..06465009f0 100644
--- a/block/crypto.h
+++ b/block/crypto.h
@@ -21,6 +21,8 @@
 #ifndef BLOCK_CRYPTO_H
 #define BLOCK_CRYPTO_H
 
+#include "crypto/block.h"
+
 #define BLOCK_CRYPTO_OPT_DEF_KEY_SECRET(prefix, helpstr)\
 {   \
 .name = prefix BLOCK_CRYPTO_OPT_QCOW_KEY_SECRET,\
@@ -131,4 +133,25 @@ block_crypto_amend_opts_init(QDict *opts, Error **errp);
 QCryptoBlockOpenOptions *
 block_crypto_open_opts_init(QDict *opts, Error **errp);
 
+typedef struct BlockCrypto BlockCrypto;
+
+struct BlockCrypto {
+QCryptoBlock *block;
+bool updating_keys;
+};
+
+int coroutine_fn GRAPH_UNLOCKED
+block_crypto_co_create_generic(BlockDriverState *bs, int64_t size,
+   QCryptoBlockCreateOptions *opts,
+   PreallocMode prealloc, Error **errp);
+
+int coroutine_fn GRAPH_RDLOCK
+block_crypto_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+   QEMUIOVector *qiov, BdrvRequestFlags flags);
+
+int coroutine_fn GRAPH_RDLOCK
+block_crypto_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+QEMUIOVector *qiov, BdrvRequestFlags flags);
+
+void block_crypto_close(BlockDriverState *bs);
 #endif /* BLOCK_CRYPTO_H */
-- 
2.39.1




[RFC 5/8] qapi: Introduce Gluks types to qapi

2023-12-04 Thread Hyman Huang
Primarily using the Luks types again, Gluks adds an
extra option called "header", which points to the Luks
header node's description.

Signed-off-by: Hyman Huang 
---
 qapi/block-core.json | 22 +-
 qapi/crypto.json | 10 +++---
 2 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/qapi/block-core.json b/qapi/block-core.json
index ca390c5700..e2208f6891 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3185,12 +3185,14 @@
 #
 # @snapshot-access: Since 7.0
 #
+# @gluks: Since 9.0
+#
 # Since: 2.9
 ##
 { 'enum': 'BlockdevDriver',
   'data': [ 'blkdebug', 'blklogwrites', 'blkreplay', 'blkverify', 'bochs',
 'cloop', 'compress', 'copy-before-write', 'copy-on-read', 'dmg',
-'file', 'snapshot-access', 'ftp', 'ftps', 'gluster',
+'file', 'snapshot-access', 'ftp', 'ftps', 'gluks', 'gluster',
 {'name': 'host_cdrom', 'if': 'HAVE_HOST_BLOCK_DEVICE' },
 {'name': 'host_device', 'if': 'HAVE_HOST_BLOCK_DEVICE' },
 'http', 'https',
@@ -3957,6 +3959,23 @@
 '*debug': 'int',
 '*logfile': 'str' } }
 
+##
+# @BlockdevOptionsGLUKS:
+#
+# Driver specific block device options for GLUKS.
+#
+# @header: reference to the definition of the luks header node.
+#
+# @key-secret: the ID of a QCryptoSecret object providing the
+# decryption key.
+#
+# Since: 9.0
+##
+{ 'struct': 'BlockdevOptionsGLUKS',
+  'base': 'BlockdevOptionsGenericFormat',
+  'data': { 'header': 'BlockdevRef',
+'key-secret': 'str' } }
+
 ##
 # @BlockdevOptionsIoUring:
 #
@@ -4680,6 +4699,7 @@
   'file':   'BlockdevOptionsFile',
   'ftp':'BlockdevOptionsCurlFtp',
   'ftps':   'BlockdevOptionsCurlFtps',
+  'gluks':  'BlockdevOptionsGLUKS',
   'gluster':'BlockdevOptionsGluster',
   'host_cdrom':  { 'type': 'BlockdevOptionsFile',
'if': 'HAVE_HOST_BLOCK_DEVICE' },
diff --git a/qapi/crypto.json b/qapi/crypto.json
index fd3d46ebd1..9afb242b5b 100644
--- a/qapi/crypto.json
+++ b/qapi/crypto.json
@@ -154,11 +154,13 @@
 #
 # @luks: LUKS encryption format.  Recommended for new images
 #
+# @gluks: generic LUKS encryption format. (since 9.0)
+#
 # Since: 2.6
 ##
 { 'enum': 'QCryptoBlockFormat',
 #  'prefix': 'QCRYPTO_BLOCK_FORMAT',
-  'data': ['qcow', 'luks']}
+  'data': ['qcow', 'luks', 'gluks']}
 
 ##
 # @QCryptoBlockOptionsBase:
@@ -246,7 +248,8 @@
   'base': 'QCryptoBlockOptionsBase',
   'discriminator': 'format',
   'data': { 'qcow': 'QCryptoBlockOptionsQCow',
-'luks': 'QCryptoBlockOptionsLUKS' } }
+'luks': 'QCryptoBlockOptionsLUKS',
+'gluks': 'QCryptoBlockOptionsLUKS' } }
 
 ##
 # @QCryptoBlockCreateOptions:
@@ -260,7 +263,8 @@
   'base': 'QCryptoBlockOptionsBase',
   'discriminator': 'format',
   'data': { 'qcow': 'QCryptoBlockOptionsQCow',
-'luks': 'QCryptoBlockCreateOptionsLUKS' } }
+'luks': 'QCryptoBlockCreateOptionsLUKS',
+'gluks': 'QCryptoBlockCreateOptionsLUKS' } }
 
 ##
 # @QCryptoBlockInfoBase:
-- 
2.39.1




[PATCH] hw/ufs: avoid generating the same ID string for different LU devices

2023-12-04 Thread Akinobu Mita
QEMU would not start when trying to create two UFS host controllers and
a UFS logical unit for each with the following options:

-device ufs,id=bus0 \
-device ufs-lu,drive=drive1,bus=bus0,lun=0 \
-device ufs,id=bus1 \
-device ufs-lu,drive=drive2,bus=bus1,lun=0 \

This is because the same ID string ("0:0:0/scsi-disk") is generated
for both UFS logical units.

To fix this issue, prepend the parent pci device's path to make
the ID string unique.
(":00:03.0/0:0:0/scsi-disk" and ":00:04.0/0:0:0/scsi-disk")

Fixes: 096434fea13a ("hw/ufs: Modify lu.c to share codes with SCSI subsystem")
Signed-off-by: Akinobu Mita 
---
 hw/ufs/ufs.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/hw/ufs/ufs.c b/hw/ufs/ufs.c
index 68c5f1f6c9..eccdb852a0 100644
--- a/hw/ufs/ufs.c
+++ b/hw/ufs/ufs.c
@@ -1323,9 +1323,17 @@ static bool ufs_bus_check_address(BusState *qbus, 
DeviceState *qdev,
 return true;
 }
 
+static char *ufs_bus_get_dev_path(DeviceState *dev)
+{
+BusState *bus = qdev_get_parent_bus(dev);
+
+return qdev_get_dev_path(bus->parent);
+}
+
 static void ufs_bus_class_init(ObjectClass *class, void *data)
 {
 BusClass *bc = BUS_CLASS(class);
+bc->get_dev_path = ufs_bus_get_dev_path;
 bc->check_address = ufs_bus_check_address;
 }
 
-- 
2.34.1




[RFC 6/8] crypto: Provide the Luks crypto driver to Gluks

2023-12-04 Thread Hyman Huang
Hooks up the Luks crypto driver for Gluks.

Signed-off-by: Hyman Huang 
---
 crypto/block.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/crypto/block.c b/crypto/block.c
index 3dcf22a69f..7e695c0a04 100644
--- a/crypto/block.c
+++ b/crypto/block.c
@@ -27,6 +27,7 @@
 static const QCryptoBlockDriver *qcrypto_block_drivers[] = {
 [Q_CRYPTO_BLOCK_FORMAT_QCOW] = &qcrypto_block_driver_qcow,
 [Q_CRYPTO_BLOCK_FORMAT_LUKS] = &qcrypto_block_driver_luks,
+[Q_CRYPTO_BLOCK_FORMAT_GLUKS] = &qcrypto_block_driver_luks,
 };
 
 
-- 
2.39.1




[RFC 4/8] Gluks: Introduce Gluks options

2023-12-04 Thread Hyman Huang
Similar to Luks, the Gluks format primarily recycles the
Luks choices with the exception of the "size" option.

Signed-off-by: Hyman Huang 
---
 block/crypto.c   |  4 ++--
 block/generic-luks.c | 18 ++
 block/generic-luks.h |  3 +++
 3 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/block/crypto.c b/block/crypto.c
index 6afae1de2e..6f8528dccc 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -150,7 +150,7 @@ error:
 }
 
 
-static QemuOptsList block_crypto_runtime_opts_luks = {
+QemuOptsList block_crypto_runtime_opts_luks = {
 .name = "crypto",
 .head = QTAILQ_HEAD_INITIALIZER(block_crypto_runtime_opts_luks.head),
 .desc = {
@@ -181,7 +181,7 @@ static QemuOptsList block_crypto_create_opts_luks = {
 };
 
 
-static QemuOptsList block_crypto_amend_opts_luks = {
+QemuOptsList block_crypto_amend_opts_luks = {
 .name = "crypto",
 .head = QTAILQ_HEAD_INITIALIZER(block_crypto_create_opts_luks.head),
 .desc = {
diff --git a/block/generic-luks.c b/block/generic-luks.c
index f23e202991..ebc0365d40 100644
--- a/block/generic-luks.c
+++ b/block/generic-luks.c
@@ -35,6 +35,21 @@ typedef struct BDRVGLUKSState {
 uint64_t header_size;   /* In bytes */
 } BDRVGLUKSState;
 
+static QemuOptsList gluks_create_opts_luks = {
+.name = "crypto",
+.head = QTAILQ_HEAD_INITIALIZER(gluks_create_opts_luks.head),
+.desc = {
+BLOCK_CRYPTO_OPT_DEF_LUKS_KEY_SECRET(""),
+BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_ALG(""),
+BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_MODE(""),
+BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_ALG(""),
+BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_HASH_ALG(""),
+BLOCK_CRYPTO_OPT_DEF_LUKS_HASH_ALG(""),
+BLOCK_CRYPTO_OPT_DEF_LUKS_ITER_TIME(""),
+{ /* end of list */ }
+},
+};
+
 static int gluks_open(BlockDriverState *bs, QDict *options, int flags,
   Error **errp)
 {
@@ -71,6 +86,9 @@ static BlockDriver bdrv_generic_luks = {
 .bdrv_co_create_opts= gluks_co_create_opts,
 .bdrv_child_perm= gluks_child_perms,
 .bdrv_co_getlength  = gluks_co_getlength,
+
+.create_opts= &gluks_create_opts_luks,
+.amend_opts = &block_crypto_amend_opts_luks,
 };
 
 static void block_generic_luks_init(void)
diff --git a/block/generic-luks.h b/block/generic-luks.h
index 2aae866fa4..f18adf41ea 100644
--- a/block/generic-luks.h
+++ b/block/generic-luks.h
@@ -23,4 +23,7 @@
 #ifndef GENERIC_LUKS_H
 #define GENERIC_LUKS_H
 
+extern QemuOptsList block_crypto_runtime_opts_luks;
+extern QemuOptsList block_crypto_amend_opts_luks;
+
 #endif /* GENERIC_LUKS_H */
-- 
2.39.1




Re: [RFC 0/8] Support generic Luks encryption

2023-12-04 Thread Daniel P . Berrangé
On Tue, Dec 05, 2023 at 12:06:17AM +0800, Hyman Huang wrote:
> This functionality was motivated by the following to-do list seen
> in crypto documents:
> https://wiki.qemu.org/Features/Block/Crypto 
> 
> The last chapter says we should "separate header volume": 
> 
> The LUKS format has ability to store the header in a separate volume
> from the payload. We should extend the LUKS driver in QEMU to support
> this use case.
> 
> As a proof-of-concept, I've created this patchset, which I've named
> the Gluks: generic luks. As their name suggests, they offer encryption
> for any format that QEMU theoretically supports.

I don't see the point in creating a new driver.

I would expect detached header support to be implemented via an
optional new 'header' field in the existing driver. ie

diff --git a/qapi/block-core.json b/qapi/block-core.json
index ca390c5700..48d1f2a974 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3352,11 +3352,15 @@
 # decryption key (since 2.6). Mandatory except when doing a
 # metadata-only probe of the image.
 #
+# @header: optional reference to the location of a blockdev
+# storing a detached LUKS heaer
+#
 # Since: 2.9
 ##
 { 'struct': 'BlockdevOptionsLUKS',
   'base': 'BlockdevOptionsGenericFormat',
-  'data': { '*key-secret': 'str' } }
+  'data': { '*key-secret': 'str',
+"*header-file': 'BlockdevRef'} }
 
 ##
 # @BlockdevOptionsGenericCOWFormat:
@@ -4941,9 +4945,18 @@
 #
 # Driver specific image creation options for LUKS.
 #
-# @file: Node to create the image format on
+# @file: Node to create the image format on. Mandatory
+# unless a detached header file is specified using
+# @header.
 #
-# @size: Size of the virtual disk in bytes
+# @size: Size of the virtual disk in bytes.  Mandatory
+# unless a detached header file is specified using
+# @header.
+#
+# @header: optional reference to the location of a blockdev
+# storing a detached LUKS heaer. The @file option is
+# is optional when this is given, unless it is desired
+# to perform pre-allocation
 #
 # @preallocation: Preallocation mode for the new image (since: 4.2)
 # (default: off; allowed values: off, metadata, falloc, full)
@@ -4952,8 +4965,9 @@
 ##
 { 'struct': 'BlockdevCreateOptionsLUKS',
   'base': 'QCryptoBlockCreateOptionsLUKS',
-  'data': { 'file': 'BlockdevRef',
-'size': 'size',
+  'data': { '*file':'BlockdevRef',
+'*size':'size',
+'*header':  'BlockdevRef'
 '*preallocation':   'PreallocMode' } }
 
 ##

It ends up giving basicallly the same workflow as you outline,
without needing the new block driver

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




[PATCH] Revert "test/qga: use G_TEST_DIR to locate os-release test file"

2023-12-04 Thread Andrey Drobyshev
Since the commit a85d09269b QGA_OS_RELEASE variable points to the path
relative to the build dir.  Then on qemu-ga startup this path can't be
found as qemu-ga cwd is somewhere else, which leads to the test failure:

  # ./tests/unit/test-qga -p /qga/guest-get-osinfo
  # random seed: R02S3a90c22d77ff1070fbd844f4959cf4a4
  # Start of qga tests
  **
  ERROR:../tests/unit/test-qga.c:906:test_qga_guest_get_osinfo: 'str' should 
not be NULL
  Bail out! ERROR:../tests/unit/test-qga.c:906:test_qga_guest_get_osinfo: 'str' 
should not be NULL

Let's obtain the absolute path again.

This reverts commit a85d09269bb1a7071d3ce0f2957e3ca9dba7c047.

Signed-off-by: Andrey Drobyshev 
---
 tests/unit/test-qga.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/unit/test-qga.c b/tests/unit/test-qga.c
index 671e83cb86..47cf5e30ec 100644
--- a/tests/unit/test-qga.c
+++ b/tests/unit/test-qga.c
@@ -1034,10 +1034,12 @@ static void test_qga_guest_get_osinfo(gconstpointer 
data)
 g_autoptr(QDict) ret = NULL;
 char *env[2];
 QDict *val;
+g_autofree gchar *cwd = NULL;
 
+cwd = g_get_current_dir();
 env[0] = g_strdup_printf(
-"QGA_OS_RELEASE=%s%c..%cdata%ctest-qga-os-release",
-g_test_get_dir(G_TEST_DIST), G_DIR_SEPARATOR, G_DIR_SEPARATOR, 
G_DIR_SEPARATOR);
+"QGA_OS_RELEASE=%s%ctests%cdata%ctest-qga-os-release",
+cwd, G_DIR_SEPARATOR, G_DIR_SEPARATOR, G_DIR_SEPARATOR);
 env[1] = NULL;
 fixture_setup(&fixture, NULL, env);
 
-- 
2.39.3




Re: [RFC 0/8] Support generic Luks encryption

2023-12-04 Thread Yong Huang
On Tue, Dec 5, 2023 at 12:24 AM Daniel P. Berrangé 
wrote:

> On Tue, Dec 05, 2023 at 12:06:17AM +0800, Hyman Huang wrote:
> > This functionality was motivated by the following to-do list seen
> > in crypto documents:
> > https://wiki.qemu.org/Features/Block/Crypto
> >
> > The last chapter says we should "separate header volume":
> >
> > The LUKS format has ability to store the header in a separate volume
> > from the payload. We should extend the LUKS driver in QEMU to support
> > this use case.
> >
> > As a proof-of-concept, I've created this patchset, which I've named
> > the Gluks: generic luks. As their name suggests, they offer encryption
> > for any format that QEMU theoretically supports.
>
> I don't see the point in creating a new driver.
>

Indeed, this definitely makes things simple. The next version would do that
!

>
> I would expect detached header support to be implemented via an
> optional new 'header' field in the existing driver. ie
>
> diff --git a/qapi/block-core.json b/qapi/block-core.json
> index ca390c5700..48d1f2a974 100644
> --- a/qapi/block-core.json
> +++ b/qapi/block-core.json
> @@ -3352,11 +3352,15 @@
>  # decryption key (since 2.6). Mandatory except when doing a
>  # metadata-only probe of the image.
>  #
> +# @header: optional reference to the location of a blockdev
> +# storing a detached LUKS heaer
> +#
>  # Since: 2.9
>  ##
>  { 'struct': 'BlockdevOptionsLUKS',
>'base': 'BlockdevOptionsGenericFormat',
> -  'data': { '*key-secret': 'str' } }
> +  'data': { '*key-secret': 'str',
> +"*header-file': 'BlockdevRef'} }
>
>  ##
>  # @BlockdevOptionsGenericCOWFormat:
> @@ -4941,9 +4945,18 @@
>  #
>  # Driver specific image creation options for LUKS.
>  #
> -# @file: Node to create the image format on
> +# @file: Node to create the image format on. Mandatory
> +# unless a detached header file is specified using
> +# @header.
>  #
> -# @size: Size of the virtual disk in bytes
> +# @size: Size of the virtual disk in bytes.  Mandatory
> +# unless a detached header file is specified using
> +# @header.
> +#
> +# @header: optional reference to the location of a blockdev
> +# storing a detached LUKS heaer. The @file option is
> +# is optional when this is given, unless it is desired
> +# to perform pre-allocation
>  #
>  # @preallocation: Preallocation mode for the new image (since: 4.2)
>  # (default: off; allowed values: off, metadata, falloc, full)
> @@ -4952,8 +4965,9 @@
>  ##
>  { 'struct': 'BlockdevCreateOptionsLUKS',
>'base': 'QCryptoBlockCreateOptionsLUKS',
> -  'data': { 'file': 'BlockdevRef',
> -'size': 'size',
> +  'data': { '*file':'BlockdevRef',
> +'*size':'size',
> +'*header':  'BlockdevRef'
>  '*preallocation':   'PreallocMode' } }
>
>  ##
>
> It ends up giving basicallly the same workflow as you outline,
> without needing the new block driver
>
Yes, most of the logic reuse the pre-existing Luks driver.


> With regards,
> Daniel
> --
> |: https://berrange.com  -o-
> https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org -o-
> https://fstop138.berrange.com :|
> |: https://entangle-photo.org-o-
> https://www.instagram.com/dberrange :|
>
>

-- 
Best regards


Re: [PATCH V6 03/14] cpus: stop vm in suspended runstate

2023-12-04 Thread Peter Xu
On Fri, Dec 01, 2023 at 12:11:32PM -0500, Steven Sistare wrote:
> >> diff --git a/include/sysemu/runstate.h b/include/sysemu/runstate.h
> >> index f6a337b..1d6828f 100644
> >> --- a/include/sysemu/runstate.h
> >> +++ b/include/sysemu/runstate.h
> >> @@ -40,6 +40,11 @@ static inline bool 
> >> shutdown_caused_by_guest(ShutdownCause cause)
> >>  return cause >= SHUTDOWN_CAUSE_GUEST_SHUTDOWN;
> >>  }
> >>  
> >> +static inline bool runstate_is_started(RunState state)
> > 
> > Would runstate_has_vm_running() sound better?  It is a bit awkward when
> > saying something like "start a runstate".
> 
> I have been searching for the perfect name for this accessor.
> IMO using "running" in this accessor is confusing because it applies to both
> the running and suspended state.  So, I invented a new aggregate state called
> started.  vm_start transitions the machine to a started state.
> 
> How about runstate_was_started?  It works well at both start and stop call 
> sites:
> 
> void vm_resume(RunState state)
> if (runstate_was_started(state)) {

This one looks fine, but...

> vm_start();
> 
> int vm_stop_force_state(RunState state)
> if (runstate_was_started(runstate_get())) {

.. this one makes the past tense not looking good.

> return vm_stop(state);

How about runstate_is_alive()?  So far the best I can come up with. :)

Even if you prefer "started", I'd vote for not using past tense, hence
runstate_is_started().

Thanks,

-- 
Peter Xu




Re: [PATCH 1/4] scsi: only access SCSIDevice->requests from one thread

2023-12-04 Thread Stefan Hajnoczi
On Fri, Dec 01, 2023 at 05:03:13PM +0100, Kevin Wolf wrote:
> Am 23.11.2023 um 20:49 hat Stefan Hajnoczi geschrieben:
> > Stop depending on the AioContext lock and instead access
> > SCSIDevice->requests from only one thread at a time:
> > - When the VM is running only the BlockBackend's AioContext may access
> >   the requests list.
> > - When the VM is stopped only the main loop may access the requests
> >   list.
> > 
> > These constraints protect the requests list without the need for locking
> > in the I/O code path.
> > 
> > Note that multiple IOThreads are not supported yet because the code
> > assumes all SCSIRequests are executed from a single AioContext. Leave
> > that as future work.
> > 
> > Signed-off-by: Stefan Hajnoczi 
> > ---
> >  include/hw/scsi/scsi.h |   7 +-
> >  hw/scsi/scsi-bus.c | 174 -
> >  2 files changed, 124 insertions(+), 57 deletions(-)
> > 
> > diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h
> > index 3692ca82f3..10c4e8288d 100644
> > --- a/include/hw/scsi/scsi.h
> > +++ b/include/hw/scsi/scsi.h
> > @@ -69,14 +69,19 @@ struct SCSIDevice
> >  {
> >  DeviceState qdev;
> >  VMChangeStateEntry *vmsentry;
> > -QEMUBH *bh;
> >  uint32_t id;
> >  BlockConf conf;
> >  SCSISense unit_attention;
> >  bool sense_is_ua;
> >  uint8_t sense[SCSI_SENSE_BUF_SIZE];
> >  uint32_t sense_len;
> > +
> > +/*
> > + * The requests list is only accessed from the AioContext that executes
> > + * requests or from the main loop when IOThread processing is stopped.
> > + */
> >  QTAILQ_HEAD(, SCSIRequest) requests;
> > +
> >  uint32_t channel;
> >  uint32_t lun;
> >  int blocksize;
> > diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
> > index fc4b77fdb0..b8bfde9565 100644
> > --- a/hw/scsi/scsi-bus.c
> > +++ b/hw/scsi/scsi-bus.c
> > @@ -85,6 +85,82 @@ SCSIDevice *scsi_device_get(SCSIBus *bus, int channel, 
> > int id, int lun)
> >  return d;
> >  }
> >  
> > +/*
> > + * Invoke @fn() for each enqueued request in device @s. Must be called 
> > from the
> > + * main loop thread while the guest is stopped. This is only suitable for
> > + * vmstate ->put(), use scsi_device_for_each_req_async() for other cases.
> > + */
> > +static void scsi_device_for_each_req_sync(SCSIDevice *s,
> > +  void (*fn)(SCSIRequest *, void 
> > *),
> > +  void *opaque)
> > +{
> > +SCSIRequest *req;
> > +SCSIRequest *next_req;
> > +
> > +assert(!runstate_is_running());
> > +assert(qemu_in_main_thread());
> > +
> > +QTAILQ_FOREACH_SAFE(req, &s->requests, next, next_req) {
> > +fn(req, opaque);
> > +}
> > +}
> > +
> > +typedef struct {
> > +SCSIDevice *s;
> > +void (*fn)(SCSIRequest *, void *);
> > +void *fn_opaque;
> > +} SCSIDeviceForEachReqAsyncData;
> > +
> > +static void scsi_device_for_each_req_async_bh(void *opaque)
> > +{
> > +g_autofree SCSIDeviceForEachReqAsyncData *data = opaque;
> > +SCSIDevice *s = data->s;
> > +SCSIRequest *req;
> > +SCSIRequest *next;
> > +
> > +/*
> > + * It is unlikely that the AioContext will change before this BH is 
> > called,
> > + * but if it happens then ->requests must not be accessed from this
> > + * AioContext.
> > + */
> 
> What is the scenario where this happens? I would have expected that
> switching the AioContext of a node involves draining the node first,
> which would execute this BH before the context changes.

I don't think aio_poll() is invoked by bdrv_drained_begin() when there
are no requests in flight. In that case the BH could remain pending
across bdrv_drained_begin()/bdrv_drained_end().

> The other option I see is an empty BlockBackend, which can change its
> AioContext without polling BHs, but in that case there is no connection
> to other users, so the only change could come from virtio-scsi itself.
> If there is such a case, it would probably be helpful to be specific in
> the comment.
>
> > +if (blk_get_aio_context(s->conf.blk) == 
> > qemu_get_current_aio_context()) {
> > +QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) {
> > +data->fn(req, data->fn_opaque);
> > +}
> > +}
> 
> Of course, if the situation does happen, the question is why just doing
> nothing is correct. Wouldn't that mean that the guest still sees stuck
> requests?
> 
> Would rescheduling the BH in the new context be better?

In the case where there are no requests it is correct to do nothing, but
it's not a general solution.

> > +
> > +/* Drop the reference taken by scsi_device_for_each_req_async() */
> > +object_unref(OBJECT(s));
> > +}
> > +
> > +/*
> > + * Schedule @fn() to be invoked for each enqueued request in device @s. 
> > @fn()
> > + * runs in the AioContext that is executing the request.
> > + */
> > +static void scsi_device_for_each_req_async(SCSIDevice *

Re: [PATCH V6 03/14] cpus: stop vm in suspended runstate

2023-12-04 Thread Steven Sistare
On 12/4/2023 11:35 AM, Peter Xu wrote:
> On Fri, Dec 01, 2023 at 12:11:32PM -0500, Steven Sistare wrote:
 diff --git a/include/sysemu/runstate.h b/include/sysemu/runstate.h
 index f6a337b..1d6828f 100644
 --- a/include/sysemu/runstate.h
 +++ b/include/sysemu/runstate.h
 @@ -40,6 +40,11 @@ static inline bool 
 shutdown_caused_by_guest(ShutdownCause cause)
  return cause >= SHUTDOWN_CAUSE_GUEST_SHUTDOWN;
  }
  
 +static inline bool runstate_is_started(RunState state)
>>>
>>> Would runstate_has_vm_running() sound better?  It is a bit awkward when
>>> saying something like "start a runstate".
>>
>> I have been searching for the perfect name for this accessor.
>> IMO using "running" in this accessor is confusing because it applies to both
>> the running and suspended state.  So, I invented a new aggregate state called
>> started.  vm_start transitions the machine to a started state.
>>
>> How about runstate_was_started?  It works well at both start and stop call 
>> sites:
>>
>> void vm_resume(RunState state)
>> if (runstate_was_started(state)) {
> 
> This one looks fine, but...
> 
>> vm_start();
>>
>> int vm_stop_force_state(RunState state)
>> if (runstate_was_started(runstate_get())) {
> 
> .. this one makes the past tense not looking good.
> 
>> return vm_stop(state);
> 
> How about runstate_is_alive()?  So far the best I can come up with. :)
> 
> Even if you prefer "started", I'd vote for not using past tense, hence
> runstate_is_started().

runstate_is_live also occurred to me.  I'll use that.

- Steve



[PATCH v2 3/4] scsi: don't lock AioContext in I/O code path

2023-12-04 Thread Stefan Hajnoczi
blk_aio_*() doesn't require the AioContext lock and the SCSI subsystem's
internal state also does not anymore.

Signed-off-by: Stefan Hajnoczi 
Reviewed-by: Eric Blake 
Acked-by: Kevin Wolf 
---
 hw/scsi/scsi-disk.c| 23 ---
 hw/scsi/scsi-generic.c | 20 +++-
 2 files changed, 3 insertions(+), 40 deletions(-)

diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 6691f5edb8..2c1bbb3530 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -273,8 +273,6 @@ static void scsi_aio_complete(void *opaque, int ret)
 SCSIDiskReq *r = (SCSIDiskReq *)opaque;
 SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
 
-aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
-
 assert(r->req.aiocb != NULL);
 r->req.aiocb = NULL;
 
@@ -286,7 +284,6 @@ static void scsi_aio_complete(void *opaque, int ret)
 scsi_req_complete(&r->req, GOOD);
 
 done:
-aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
 scsi_req_unref(&r->req);
 }
 
@@ -394,8 +391,6 @@ static void scsi_read_complete(void *opaque, int ret)
 SCSIDiskReq *r = (SCSIDiskReq *)opaque;
 SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
 
-aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
-
 assert(r->req.aiocb != NULL);
 r->req.aiocb = NULL;
 
@@ -406,7 +401,6 @@ static void scsi_read_complete(void *opaque, int ret)
 trace_scsi_disk_read_complete(r->req.tag, r->qiov.size);
 }
 scsi_read_complete_noio(r, ret);
-aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
 }
 
 /* Actually issue a read to the block device.  */
@@ -448,8 +442,6 @@ static void scsi_do_read_cb(void *opaque, int ret)
 SCSIDiskReq *r = (SCSIDiskReq *)opaque;
 SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
 
-aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
-
 assert (r->req.aiocb != NULL);
 r->req.aiocb = NULL;
 
@@ -459,7 +451,6 @@ static void scsi_do_read_cb(void *opaque, int ret)
 block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
 }
 scsi_do_read(opaque, ret);
-aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
 }
 
 /* Read more data from scsi device into buffer.  */
@@ -533,8 +524,6 @@ static void scsi_write_complete(void * opaque, int ret)
 SCSIDiskReq *r = (SCSIDiskReq *)opaque;
 SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
 
-aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
-
 assert (r->req.aiocb != NULL);
 r->req.aiocb = NULL;
 
@@ -544,7 +533,6 @@ static void scsi_write_complete(void * opaque, int ret)
 block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
 }
 scsi_write_complete_noio(r, ret);
-aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
 }
 
 static void scsi_write_data(SCSIRequest *req)
@@ -1742,8 +1730,6 @@ static void scsi_unmap_complete(void *opaque, int ret)
 SCSIDiskReq *r = data->r;
 SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
 
-aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
-
 assert(r->req.aiocb != NULL);
 r->req.aiocb = NULL;
 
@@ -1754,7 +1740,6 @@ static void scsi_unmap_complete(void *opaque, int ret)
 block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
 scsi_unmap_complete_noio(data, ret);
 }
-aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
 }
 
 static void scsi_disk_emulate_unmap(SCSIDiskReq *r, uint8_t *inbuf)
@@ -1822,8 +1807,6 @@ static void scsi_write_same_complete(void *opaque, int 
ret)
 SCSIDiskReq *r = data->r;
 SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
 
-aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
-
 assert(r->req.aiocb != NULL);
 r->req.aiocb = NULL;
 
@@ -1847,7 +1830,6 @@ static void scsi_write_same_complete(void *opaque, int 
ret)
data->sector << BDRV_SECTOR_BITS,
&data->qiov, 0,
scsi_write_same_complete, data);
-aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
 return;
 }
 
@@ -1857,7 +1839,6 @@ done:
 scsi_req_unref(&r->req);
 qemu_vfree(data->iov.iov_base);
 g_free(data);
-aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
 }
 
 static void scsi_disk_emulate_write_same(SCSIDiskReq *r, uint8_t *inbuf)
@@ -2810,7 +2791,6 @@ static void scsi_block_sgio_complete(void *opaque, int 
ret)
 {
 SCSIBlockReq *req = (SCSIBlockReq *)opaque;
 SCSIDiskReq *r = &req->req;
-SCSIDevice *s = r->req.dev;
 sg_io_hdr_t *io_hdr = &req->io_header;
 
 if (ret == 0) {
@@ -2827,13 +2807,10 @@ static void scsi_block_sgio_complete(void *opaque, int 
ret)
 }
 
 if (ret > 0) {
-aio_context_acquire(blk_get_aio_context(s->conf.blk));
 if (scsi_handle_rw_erro

[PATCH v2 4/4] dma-helpers: don't lock AioContext in dma_blk_cb()

2023-12-04 Thread Stefan Hajnoczi
Commit abfcd2760b3e ("dma-helpers: prevent dma_blk_cb() vs
dma_aio_cancel() race") acquired the AioContext lock inside dma_blk_cb()
to avoid a race with scsi_device_purge_requests() running in the main
loop thread.

The SCSI code no longer calls dma_aio_cancel() from the main loop thread
while I/O is running in the IOThread AioContext. Therefore it is no
longer necessary to take this lock to protect DMAAIOCB fields. The
->cb() function also does not require the lock because blk_aio_*() and
friends do not need the AioContext lock.

Both hw/ide/core.c and hw/ide/macio.c also call dma_blk_io() but don't
rely on it taking the AioContext lock, so this change is safe.

Signed-off-by: Stefan Hajnoczi 
Reviewed-by: Eric Blake 
Reviewed-by: Kevin Wolf 
---
 system/dma-helpers.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/system/dma-helpers.c b/system/dma-helpers.c
index 36211acc7e..528117f256 100644
--- a/system/dma-helpers.c
+++ b/system/dma-helpers.c
@@ -119,13 +119,12 @@ static void dma_blk_cb(void *opaque, int ret)
 
 trace_dma_blk_cb(dbs, ret);
 
-aio_context_acquire(ctx);
 dbs->acb = NULL;
 dbs->offset += dbs->iov.size;
 
 if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) {
 dma_complete(dbs, ret);
-goto out;
+return;
 }
 dma_blk_unmap(dbs);
 
@@ -168,7 +167,7 @@ static void dma_blk_cb(void *opaque, int ret)
 trace_dma_map_wait(dbs);
 dbs->bh = aio_bh_new(ctx, reschedule_dma, dbs);
 cpu_register_map_client(dbs->bh);
-goto out;
+return;
 }
 
 if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) {
@@ -179,8 +178,6 @@ static void dma_blk_cb(void *opaque, int ret)
 dbs->acb = dbs->io_func(dbs->offset, &dbs->iov,
 dma_blk_cb, dbs, dbs->io_func_opaque);
 assert(dbs->acb);
-out:
-aio_context_release(ctx);
 }
 
 static void dma_aio_cancel(BlockAIOCB *acb)
-- 
2.43.0




[PATCH v2 2/4] virtio-scsi: don't lock AioContext around virtio_queue_aio_attach_host_notifier()

2023-12-04 Thread Stefan Hajnoczi
virtio_queue_aio_attach_host_notifier() does not require the AioContext
lock. Stop taking the lock and add an explicit smp_wmb() because we were
relying on the implicit barrier in the AioContext lock before.

Signed-off-by: Stefan Hajnoczi 
Reviewed-by: Eric Blake 
Reviewed-by: Kevin Wolf 
---
 hw/scsi/virtio-scsi-dataplane.c | 8 +---
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c
index 1e684beebe..135e23fe54 100644
--- a/hw/scsi/virtio-scsi-dataplane.c
+++ b/hw/scsi/virtio-scsi-dataplane.c
@@ -149,23 +149,17 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev)
 
 memory_region_transaction_commit();
 
-/*
- * These fields are visible to the IOThread so we rely on implicit barriers
- * in aio_context_acquire() on the write side and aio_notify_accept() on
- * the read side.
- */
 s->dataplane_starting = false;
 s->dataplane_started = true;
+smp_wmb(); /* paired with aio_notify_accept() */
 
 if (s->bus.drain_count == 0) {
-aio_context_acquire(s->ctx);
 virtio_queue_aio_attach_host_notifier(vs->ctrl_vq, s->ctx);
 virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq, s->ctx);
 
 for (i = 0; i < vs->conf.num_queues; i++) {
 virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], s->ctx);
 }
-aio_context_release(s->ctx);
 }
 return 0;
 
-- 
2.43.0




[PATCH v2 1/4] scsi: only access SCSIDevice->requests from one thread

2023-12-04 Thread Stefan Hajnoczi
Stop depending on the AioContext lock and instead access
SCSIDevice->requests from only one thread at a time:
- When the VM is running only the BlockBackend's AioContext may access
  the requests list.
- When the VM is stopped only the main loop may access the requests
  list.

These constraints protect the requests list without the need for locking
in the I/O code path.

Note that multiple IOThreads are not supported yet because the code
assumes all SCSIRequests are executed from a single AioContext. Leave
that as future work.

Signed-off-by: Stefan Hajnoczi 
Reviewed-by: Eric Blake 
---
 include/hw/scsi/scsi.h |   7 +-
 hw/scsi/scsi-bus.c | 180 -
 2 files changed, 130 insertions(+), 57 deletions(-)

diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h
index 3692ca82f3..10c4e8288d 100644
--- a/include/hw/scsi/scsi.h
+++ b/include/hw/scsi/scsi.h
@@ -69,14 +69,19 @@ struct SCSIDevice
 {
 DeviceState qdev;
 VMChangeStateEntry *vmsentry;
-QEMUBH *bh;
 uint32_t id;
 BlockConf conf;
 SCSISense unit_attention;
 bool sense_is_ua;
 uint8_t sense[SCSI_SENSE_BUF_SIZE];
 uint32_t sense_len;
+
+/*
+ * The requests list is only accessed from the AioContext that executes
+ * requests or from the main loop when IOThread processing is stopped.
+ */
 QTAILQ_HEAD(, SCSIRequest) requests;
+
 uint32_t channel;
 uint32_t lun;
 int blocksize;
diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
index fc4b77fdb0..f3ec11f892 100644
--- a/hw/scsi/scsi-bus.c
+++ b/hw/scsi/scsi-bus.c
@@ -85,6 +85,88 @@ SCSIDevice *scsi_device_get(SCSIBus *bus, int channel, int 
id, int lun)
 return d;
 }
 
+/*
+ * Invoke @fn() for each enqueued request in device @s. Must be called from the
+ * main loop thread while the guest is stopped. This is only suitable for
+ * vmstate ->put(), use scsi_device_for_each_req_async() for other cases.
+ */
+static void scsi_device_for_each_req_sync(SCSIDevice *s,
+  void (*fn)(SCSIRequest *, void *),
+  void *opaque)
+{
+SCSIRequest *req;
+SCSIRequest *next_req;
+
+assert(!runstate_is_running());
+assert(qemu_in_main_thread());
+
+QTAILQ_FOREACH_SAFE(req, &s->requests, next, next_req) {
+fn(req, opaque);
+}
+}
+
+typedef struct {
+SCSIDevice *s;
+void (*fn)(SCSIRequest *, void *);
+void *fn_opaque;
+} SCSIDeviceForEachReqAsyncData;
+
+static void scsi_device_for_each_req_async_bh(void *opaque)
+{
+g_autofree SCSIDeviceForEachReqAsyncData *data = opaque;
+SCSIDevice *s = data->s;
+AioContext *ctx;
+SCSIRequest *req;
+SCSIRequest *next;
+
+/*
+ * If the AioContext changed before this BH was called then reschedule into
+ * the new AioContext before accessing ->requests. This can happen when
+ * scsi_device_for_each_req_async() is called and then the AioContext is
+ * changed before BHs are run.
+ */
+ctx = blk_get_aio_context(s->conf.blk);
+if (ctx != qemu_get_current_aio_context()) {
+aio_bh_schedule_oneshot(ctx, scsi_device_for_each_req_async_bh, data);
+return;
+}
+
+QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) {
+data->fn(req, data->fn_opaque);
+}
+
+/* Drop the reference taken by scsi_device_for_each_req_async() */
+object_unref(OBJECT(s));
+}
+
+/*
+ * Schedule @fn() to be invoked for each enqueued request in device @s. @fn()
+ * runs in the AioContext that is executing the request.
+ */
+static void scsi_device_for_each_req_async(SCSIDevice *s,
+   void (*fn)(SCSIRequest *, void *),
+   void *opaque)
+{
+assert(qemu_in_main_thread());
+
+SCSIDeviceForEachReqAsyncData *data =
+g_new(SCSIDeviceForEachReqAsyncData, 1);
+
+data->s = s;
+data->fn = fn;
+data->fn_opaque = opaque;
+
+/*
+ * Hold a reference to the SCSIDevice until
+ * scsi_device_for_each_req_async_bh() finishes.
+ */
+object_ref(OBJECT(s));
+
+aio_bh_schedule_oneshot(blk_get_aio_context(s->conf.blk),
+scsi_device_for_each_req_async_bh,
+data);
+}
+
 static void scsi_device_realize(SCSIDevice *s, Error **errp)
 {
 SCSIDeviceClass *sc = SCSI_DEVICE_GET_CLASS(s);
@@ -144,20 +226,18 @@ void scsi_bus_init_named(SCSIBus *bus, size_t bus_size, 
DeviceState *host,
 qbus_set_bus_hotplug_handler(BUS(bus));
 }
 
-static void scsi_dma_restart_bh(void *opaque)
+void scsi_req_retry(SCSIRequest *req)
 {
-SCSIDevice *s = opaque;
-SCSIRequest *req, *next;
+req->retry = true;
+}
 
-qemu_bh_delete(s->bh);
-s->bh = NULL;
-
-aio_context_acquire(blk_get_aio_context(s->conf.blk));
-QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) {
-scsi_req_ref(req);
-if (req->retry) {
-req->ret

Re: [RFC 0/8] Support generic Luks encryption

2023-12-04 Thread Yong Huang
On Tue, Dec 5, 2023 at 12:24 AM Daniel P. Berrangé 
wrote:

> On Tue, Dec 05, 2023 at 12:06:17AM +0800, Hyman Huang wrote:
> > This functionality was motivated by the following to-do list seen
> > in crypto documents:
> > https://wiki.qemu.org/Features/Block/Crypto
> >
> > The last chapter says we should "separate header volume":
> >
> > The LUKS format has ability to store the header in a separate volume
> > from the payload. We should extend the LUKS driver in QEMU to support
> > this use case.
> >
> > As a proof-of-concept, I've created this patchset, which I've named
> > the Gluks: generic luks. As their name suggests, they offer encryption
> > for any format that QEMU theoretically supports.
>
> I don't see the point in creating a new driver.
>
> I would expect detached header support to be implemented via an
> optional new 'header' field in the existing driver. ie
>
> diff --git a/qapi/block-core.json b/qapi/block-core.json
> index ca390c5700..48d1f2a974 100644
> --- a/qapi/block-core.json
> +++ b/qapi/block-core.json
> @@ -3352,11 +3352,15 @@
>  # decryption key (since 2.6). Mandatory except when doing a
>  # metadata-only probe of the image.
>  #
> +# @header: optional reference to the location of a blockdev
> +# storing a detached LUKS heaer
> +#
>  # Since: 2.9
>  ##
>  { 'struct': 'BlockdevOptionsLUKS',
>'base': 'BlockdevOptionsGenericFormat',
> -  'data': { '*key-secret': 'str' } }
> +  'data': { '*key-secret': 'str',
> +"*header-file': 'BlockdevRef'} }
>
>  ##
>  # @BlockdevOptionsGenericCOWFormat:
> @@ -4941,9 +4945,18 @@
>  #
>  # Driver specific image creation options for LUKS.
>  #
> -# @file: Node to create the image format on
> +# @file: Node to create the image format on. Mandatory
> +# unless a detached header file is specified using
> +# @header.
>  #
> -# @size: Size of the virtual disk in bytes
> +# @size: Size of the virtual disk in bytes.  Mandatory
> +# unless a detached header file is specified using
> +# @header.
> +#
> +# @header: optional reference to the location of a blockdev
> +# storing a detached LUKS heaer. The @file option is
> +# is optional when this is given, unless it is desired
> +# to perform pre-allocation
>  #
>  # @preallocation: Preallocation mode for the new image (since: 4.2)
>  # (default: off; allowed values: off, metadata, falloc, full)
> @@ -4952,8 +4965,9 @@
>  ##
>  { 'struct': 'BlockdevCreateOptionsLUKS',
>'base': 'QCryptoBlockCreateOptionsLUKS',
> -  'data': { 'file': 'BlockdevRef',
> -'size': 'size',
> +  'data': { '*file':'BlockdevRef',
> +'*size':'size',
> +'*header':  'BlockdevRef'
>  '*preallocation':   'PreallocMode' } }
>
>  ##
>
> It ends up giving basicallly the same workflow as you outline,
> without needing the new block driver
>

How about the design and usage, could it be simpler? Any advice? :)


As you can see below, the Gluks format block layer driver's design is
quite simple.

 virtio-blk/vhost-user-blk...(front-end device)
  ^
  |
 Gluks   (format-like disk node)
  / \
   file   header (blockdev reference)
/ \
 filefile (protocol node)
   |   |
   disk data   Luks data


>
> With regards,
> Daniel
> --
> |: https://berrange.com  -o-
> https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org -o-
> https://fstop138.berrange.com :|
> |: https://entangle-photo.org-o-
> https://www.instagram.com/dberrange :|
>
>

-- 
Best regards


[PATCH v2 0/4] scsi: eliminate AioContext lock

2023-12-04 Thread Stefan Hajnoczi
v2:
- Reschedule BH in new AioContext if change is detected [Kevin]
- Drop stray "remember" in Patch 2's commit description [Eric]

The SCSI subsystem uses the AioContext lock to protect internal state. This is
necessary because the main loop and the IOThread can access SCSI state in
parallel. This inter-thread access happens during scsi_device_purge_requests()
and scsi_dma_restart_cb().

This patch series modifies the code so SCSI state is only accessed from the
IOThread that is executing requests. Once this has been achieved the AioContext
lock is no longer necessary.

Note that a few aio_context_acquire()/aio_context_release() calls still remain
after this series. They surround API calls that invoke AIO_WAIT_WHILE() and
therefore still rely on the AioContext lock for now.

Stefan Hajnoczi (4):
  scsi: only access SCSIDevice->requests from one thread
  virtio-scsi: don't lock AioContext around
virtio_queue_aio_attach_host_notifier()
  scsi: don't lock AioContext in I/O code path
  dma-helpers: don't lock AioContext in dma_blk_cb()

 include/hw/scsi/scsi.h  |   7 +-
 hw/scsi/scsi-bus.c  | 180 ++--
 hw/scsi/scsi-disk.c |  23 
 hw/scsi/scsi-generic.c  |  20 +---
 hw/scsi/virtio-scsi-dataplane.c |   8 +-
 system/dma-helpers.c|   7 +-
 6 files changed, 136 insertions(+), 109 deletions(-)

-- 
2.43.0




Re: [PATCH-for-8.2?] hw/ufs: avoid generating the same ID string for different LU devices

2023-12-04 Thread Philippe Mathieu-Daudé

On 4/12/23 16:05, Akinobu Mita wrote:

QEMU would not start when trying to create two UFS host controllers and
a UFS logical unit for each with the following options:

-device ufs,id=bus0 \
-device ufs-lu,drive=drive1,bus=bus0,lun=0 \
-device ufs,id=bus1 \
-device ufs-lu,drive=drive2,bus=bus1,lun=0 \

This is because the same ID string ("0:0:0/scsi-disk") is generated
for both UFS logical units.

To fix this issue, prepend the parent pci device's path to make
the ID string unique.
(":00:03.0/0:0:0/scsi-disk" and ":00:04.0/0:0:0/scsi-disk")

Fixes: 096434fea13a ("hw/ufs: Modify lu.c to share codes with SCSI subsystem")
Signed-off-by: Akinobu Mita 


Reviewed-by: Philippe Mathieu-Daudé 


---
  hw/ufs/ufs.c | 8 
  1 file changed, 8 insertions(+)

diff --git a/hw/ufs/ufs.c b/hw/ufs/ufs.c
index 68c5f1f6c9..eccdb852a0 100644
--- a/hw/ufs/ufs.c
+++ b/hw/ufs/ufs.c
@@ -1323,9 +1323,17 @@ static bool ufs_bus_check_address(BusState *qbus, 
DeviceState *qdev,
  return true;
  }
  
+static char *ufs_bus_get_dev_path(DeviceState *dev)

+{
+BusState *bus = qdev_get_parent_bus(dev);
+
+return qdev_get_dev_path(bus->parent);
+}
+
  static void ufs_bus_class_init(ObjectClass *class, void *data)
  {
  BusClass *bc = BUS_CLASS(class);
+bc->get_dev_path = ufs_bus_get_dev_path;
  bc->check_address = ufs_bus_check_address;
  }
  





Re: [PATCH] Revert "test/qga: use G_TEST_DIR to locate os-release test file"

2023-12-04 Thread Marc-André Lureau
Hi

On Mon, Dec 4, 2023 at 8:33 PM Andrey Drobyshev
 wrote:
>
> Since the commit a85d09269b QGA_OS_RELEASE variable points to the path
> relative to the build dir.  Then on qemu-ga startup this path can't be
> found as qemu-ga cwd is somewhere else, which leads to the test failure:
>
>   # ./tests/unit/test-qga -p /qga/guest-get-osinfo
>   # random seed: R02S3a90c22d77ff1070fbd844f4959cf4a4
>   # Start of qga tests
>   **
>   ERROR:../tests/unit/test-qga.c:906:test_qga_guest_get_osinfo: 'str' should 
> not be NULL
>   Bail out! ERROR:../tests/unit/test-qga.c:906:test_qga_guest_get_osinfo: 
> 'str' should not be NULL
>
> Let's obtain the absolute path again.

Can you detail how the build and the test is done?

If I recall correctly, this change was done in order to move qga to a
subproject(), but isn't strictly required at this point. Although I
believe it is more correct to lookup test data relative to
G_TEST_DIST.

>
> This reverts commit a85d09269bb1a7071d3ce0f2957e3ca9dba7c047.
>
> Signed-off-by: Andrey Drobyshev 
> ---
>  tests/unit/test-qga.c | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/tests/unit/test-qga.c b/tests/unit/test-qga.c
> index 671e83cb86..47cf5e30ec 100644
> --- a/tests/unit/test-qga.c
> +++ b/tests/unit/test-qga.c
> @@ -1034,10 +1034,12 @@ static void test_qga_guest_get_osinfo(gconstpointer 
> data)
>  g_autoptr(QDict) ret = NULL;
>  char *env[2];
>  QDict *val;
> +g_autofree gchar *cwd = NULL;
>
> +cwd = g_get_current_dir();
>  env[0] = g_strdup_printf(
> -"QGA_OS_RELEASE=%s%c..%cdata%ctest-qga-os-release",
> -g_test_get_dir(G_TEST_DIST), G_DIR_SEPARATOR, G_DIR_SEPARATOR, 
> G_DIR_SEPARATOR);
> +"QGA_OS_RELEASE=%s%ctests%cdata%ctest-qga-os-release",
> +cwd, G_DIR_SEPARATOR, G_DIR_SEPARATOR, G_DIR_SEPARATOR);
>  env[1] = NULL;
>  fixture_setup(&fixture, NULL, env);
>
> --
> 2.39.3
>
>


-- 
Marc-André Lureau



Re: [RFC 0/8] Support generic Luks encryption

2023-12-04 Thread Daniel P . Berrangé
On Tue, Dec 05, 2023 at 12:41:16AM +0800, Yong Huang wrote:
> On Tue, Dec 5, 2023 at 12:24 AM Daniel P. Berrangé 
> wrote:
> 
> > On Tue, Dec 05, 2023 at 12:06:17AM +0800, Hyman Huang wrote:
> > > This functionality was motivated by the following to-do list seen
> > > in crypto documents:
> > > https://wiki.qemu.org/Features/Block/Crypto
> > >
> > > The last chapter says we should "separate header volume":
> > >
> > > The LUKS format has ability to store the header in a separate volume
> > > from the payload. We should extend the LUKS driver in QEMU to support
> > > this use case.
> > >
> > > As a proof-of-concept, I've created this patchset, which I've named
> > > the Gluks: generic luks. As their name suggests, they offer encryption
> > > for any format that QEMU theoretically supports.
> >
> > I don't see the point in creating a new driver.
> >
> > I would expect detached header support to be implemented via an
> > optional new 'header' field in the existing driver. ie
> >
> > diff --git a/qapi/block-core.json b/qapi/block-core.json
> > index ca390c5700..48d1f2a974 100644
> > --- a/qapi/block-core.json
> > +++ b/qapi/block-core.json
> > @@ -3352,11 +3352,15 @@
> >  # decryption key (since 2.6). Mandatory except when doing a
> >  # metadata-only probe of the image.
> >  #
> > +# @header: optional reference to the location of a blockdev
> > +# storing a detached LUKS heaer
> > +#
> >  # Since: 2.9
> >  ##
> >  { 'struct': 'BlockdevOptionsLUKS',
> >'base': 'BlockdevOptionsGenericFormat',
> > -  'data': { '*key-secret': 'str' } }
> > +  'data': { '*key-secret': 'str',
> > +"*header-file': 'BlockdevRef'} }
> >
> >  ##
> >  # @BlockdevOptionsGenericCOWFormat:
> > @@ -4941,9 +4945,18 @@
> >  #
> >  # Driver specific image creation options for LUKS.
> >  #
> > -# @file: Node to create the image format on
> > +# @file: Node to create the image format on. Mandatory
> > +# unless a detached header file is specified using
> > +# @header.
> >  #
> > -# @size: Size of the virtual disk in bytes
> > +# @size: Size of the virtual disk in bytes.  Mandatory
> > +# unless a detached header file is specified using
> > +# @header.
> > +#
> > +# @header: optional reference to the location of a blockdev
> > +# storing a detached LUKS heaer. The @file option is
> > +# is optional when this is given, unless it is desired
> > +# to perform pre-allocation
> >  #
> >  # @preallocation: Preallocation mode for the new image (since: 4.2)
> >  # (default: off; allowed values: off, metadata, falloc, full)
> > @@ -4952,8 +4965,9 @@
> >  ##
> >  { 'struct': 'BlockdevCreateOptionsLUKS',
> >'base': 'QCryptoBlockCreateOptionsLUKS',
> > -  'data': { 'file': 'BlockdevRef',
> > -'size': 'size',
> > +  'data': { '*file':'BlockdevRef',
> > +'*size':'size',
> > +'*header':  'BlockdevRef'
> >  '*preallocation':   'PreallocMode' } }
> >
> >  ##
> >
> > It ends up giving basicallly the same workflow as you outline,
> > without needing the new block driver
> >
> 
> How about the design and usage, could it be simpler? Any advice? :)
> 
> 
> As you can see below, the Gluks format block layer driver's design is
> quite simple.
> 
>  virtio-blk/vhost-user-blk...(front-end device)
>   ^
>   |
>  Gluks   (format-like disk node)
>   / \
>file   header (blockdev reference)
> / \
>  filefile (protocol node)
>|   |
>disk data   Luks data

What I suggested above ends up with the exact same block driver
graph, unless I'm missing something.

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




Re: [PATCH 2/2] linux-user: Fix openat() emulation to not modify atime

2023-12-04 Thread Daniel P . Berrangé
On Thu, Nov 30, 2023 at 07:21:40PM -0800, Shu-Chun Weng wrote:
> Commit b8002058 strengthened openat()'s /proc detection by calling
> realpath(3) on the given path, which allows various paths and symlinks
> that points to the /proc file system to be intercepted correctly.
> 
> Using realpath(3), though, has a side effect that it reads the symlinks
> along the way, and thus changes their atime. The results in the
> following code snippet already get ~now instead of the real atime:
> 
>   int fd = open("/path/to/a/symlink", O_PATH | O_NOFOLLOW);
>   struct stat st;
>   fstat(fd, st);
>   return st.st_atime;
> 
> This change opens a path that doesn't appear to be part of /proc
> directly and checks the destination of /proc/self/fd/n to determine if
> it actually refers to a file in /proc.
> 
> Neither this nor the existing code works with symlinks or indirect paths
> (e.g.  /tmp/../proc/self/exe) that points to /proc/self/exe because it
> is itself a symlink, and both realpath(3) and /proc/self/fd/n will
> resolve into the location of QEMU.

I wonder if we can detect that by opening with O_NOFOLLOW, then
calling fstatfs() on the FD, and checking f_type == PROCFS_SUPER_MAGIC


> diff --git a/linux-user/syscall.c b/linux-user/syscall.c
> index e384e14248..25e2cda10a 100644
> --- a/linux-user/syscall.c
> +++ b/linux-user/syscall.c
> @@ -8308,8 +8308,6 @@ static int open_net_route(CPUArchState *cpu_env, int fd)
>  int do_guest_openat(CPUArchState *cpu_env, int dirfd, const char *fname,
>  int flags, mode_t mode, bool safe)
>  {
> -g_autofree char *proc_name = NULL;
> -const char *pathname;
>  struct fake_open {
>  const char *filename;
>  int (*fill)(CPUArchState *cpu_env, int fd);
> @@ -8333,13 +8331,39 @@ int do_guest_openat(CPUArchState *cpu_env, int dirfd, 
> const char *fname,
>  #endif
>  { NULL, NULL, NULL }
>  };
> +char pathname[PATH_MAX];
>  
> -/* if this is a file from /proc/ filesystem, expand full name */
> -proc_name = realpath(fname, NULL);
> -if (proc_name && strncmp(proc_name, "/proc/", 6) == 0) {
> -pathname = proc_name;
> +if (strncmp(fname, "/proc/", 6) == 0) {
> +pstrcpy(pathname, sizeof(pathname), fname);
>  } else {
> -pathname = fname;
> +char procpath[PATH_MAX];
> +int fd, n;
> +
> +if (safe) {
> +fd = safe_openat(dirfd, path(fname), flags, mode);
> +} else {
> +fd = openat(dirfd, path(fname), flags, mode);
> +}
> +if (fd < 0) {
> +return fd;
> +}
> +
> +/*
> + * Try to get the real path of the file we just opened. We avoid 
> calling
> + * `realpath(3)` because it calls `readlink(2)` on symlinks which
> + * changes their atime. Note that since `/proc/self/exe` is a 
> symlink,
> + * `pathname` will never resolves to it (neither will `realpath(3)`).
> + * That's why we check `fname` against the "/proc/" prefix first.
> + */
> +snprintf(procpath, sizeof(procpath), "/proc/self/fd/%d", fd);

g_strdup_printf() + g_autofree to avoid this PATH_MAX buffer

> +n = readlink(procpath, pathname, sizeof(pathname));
> +pathname[n < sizeof(pathname) ? n : sizeof(pathname)] = '\0';

If you call lstat() then sb_size will tell you how big the buffer
needs to be for a subsequent readlink(), whcih can be allocated
on the heap and released with g_autofree, avoiding the othuer PATH_MAX
buffer

> +
> +/* if this is not a file from /proc/ filesystem, the fd is good 
> as-is */
> +if (strncmp(pathname, "/proc/", 6) != 0) {
> +return fd;
> +}
> +close(fd);
>  }
>  
>  if (is_proc_myself(pathname, "exe")) {
> @@ -8390,9 +8414,9 @@ int do_guest_openat(CPUArchState *cpu_env, int dirfd, 
> const char *fname,
>  }
>  
>  if (safe) {
> -return safe_openat(dirfd, path(pathname), flags, mode);
> +return safe_openat(dirfd, pathname, flags, mode);
>  } else {
> -return openat(dirfd, path(pathname), flags, mode);
> +return openat(dirfd, pathname, flags, mode);
>  }
>  }
>  
> 

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




Re: [PATCH] Revert "test/qga: use G_TEST_DIR to locate os-release test file"

2023-12-04 Thread Andrey Drobyshev
On 12/4/23 18:51, Marc-André Lureau wrote:
> Hi
> 
> On Mon, Dec 4, 2023 at 8:33 PM Andrey Drobyshev
>  wrote:
>>
>> Since the commit a85d09269b QGA_OS_RELEASE variable points to the path
>> relative to the build dir.  Then on qemu-ga startup this path can't be
>> found as qemu-ga cwd is somewhere else, which leads to the test failure:
>>
>>   # ./tests/unit/test-qga -p /qga/guest-get-osinfo
>>   # random seed: R02S3a90c22d77ff1070fbd844f4959cf4a4
>>   # Start of qga tests
>>   **
>>   ERROR:../tests/unit/test-qga.c:906:test_qga_guest_get_osinfo: 'str' should 
>> not be NULL
>>   Bail out! ERROR:../tests/unit/test-qga.c:906:test_qga_guest_get_osinfo: 
>> 'str' should not be NULL
>>
>> Let's obtain the absolute path again.
> 
> Can you detail how the build and the test is done?
> 

Simple as:

> ./configure --cc=gcc --target-list=x86_64-softmmu --enable-guest-agent && 
> make -j16
> cd build; tests/unit/test-qga -p /qga/guest-get-osinfo


> If I recall correctly, this change was done in order to move qga to a
> subproject(), but isn't strictly required at this point. Although I
> believe it is more correct to lookup test data relative to
> G_TEST_DIST.
> 

Then we'd have to change cwd of qemu-ga at startup to ensure relative
paths work.  Right now (with the initial change) it appears broken.

>>
>> This reverts commit a85d09269bb1a7071d3ce0f2957e3ca9dba7c047.
>>
>> Signed-off-by: Andrey Drobyshev 
>> ---
>>  tests/unit/test-qga.c | 6 --
>>  1 file changed, 4 insertions(+), 2 deletions(-)
>>
>> diff --git a/tests/unit/test-qga.c b/tests/unit/test-qga.c
>> index 671e83cb86..47cf5e30ec 100644
>> --- a/tests/unit/test-qga.c
>> +++ b/tests/unit/test-qga.c
>> @@ -1034,10 +1034,12 @@ static void test_qga_guest_get_osinfo(gconstpointer 
>> data)
>>  g_autoptr(QDict) ret = NULL;
>>  char *env[2];
>>  QDict *val;
>> +g_autofree gchar *cwd = NULL;
>>
>> +cwd = g_get_current_dir();
>>  env[0] = g_strdup_printf(
>> -"QGA_OS_RELEASE=%s%c..%cdata%ctest-qga-os-release",
>> -g_test_get_dir(G_TEST_DIST), G_DIR_SEPARATOR, G_DIR_SEPARATOR, 
>> G_DIR_SEPARATOR);
>> +"QGA_OS_RELEASE=%s%ctests%cdata%ctest-qga-os-release",
>> +cwd, G_DIR_SEPARATOR, G_DIR_SEPARATOR, G_DIR_SEPARATOR);
>>  env[1] = NULL;
>>  fixture_setup(&fixture, NULL, env);
>>
>> --
>> 2.39.3
>>
>>
> 
> 




Re: [PATCH] Revert "test/qga: use G_TEST_DIR to locate os-release test file"

2023-12-04 Thread Marc-André Lureau
Hi

On Mon, Dec 4, 2023 at 9:01 PM Andrey Drobyshev
 wrote:
>
> On 12/4/23 18:51, Marc-André Lureau wrote:
> > Hi
> >
> > On Mon, Dec 4, 2023 at 8:33 PM Andrey Drobyshev
> >  wrote:
> >>
> >> Since the commit a85d09269b QGA_OS_RELEASE variable points to the path
> >> relative to the build dir.  Then on qemu-ga startup this path can't be
> >> found as qemu-ga cwd is somewhere else, which leads to the test failure:
> >>
> >>   # ./tests/unit/test-qga -p /qga/guest-get-osinfo
> >>   # random seed: R02S3a90c22d77ff1070fbd844f4959cf4a4
> >>   # Start of qga tests
> >>   **
> >>   ERROR:../tests/unit/test-qga.c:906:test_qga_guest_get_osinfo: 'str' 
> >> should not be NULL
> >>   Bail out! ERROR:../tests/unit/test-qga.c:906:test_qga_guest_get_osinfo: 
> >> 'str' should not be NULL
> >>
> >> Let's obtain the absolute path again.
> >
> > Can you detail how the build and the test is done?
> >
>
> Simple as:
>
> > ./configure --cc=gcc --target-list=x86_64-softmmu --enable-guest-agent && 
> > make -j16
> > cd build; tests/unit/test-qga -p /qga/guest-get-osinfo
>
>
> > If I recall correctly, this change was done in order to move qga to a
> > subproject(), but isn't strictly required at this point. Although I
> > believe it is more correct to lookup test data relative to
> > G_TEST_DIST.
> >
>
> Then we'd have to change cwd of qemu-ga at startup to ensure relative
> paths work.  Right now (with the initial change) it appears broken.

By reverting the patch, it is _still_ broken if you run the test
manually from a different directory (say from tests/unit for example)

With G_TEST_DIST, and proper testing environment, it works from any directory.

Tests are not meant to be run manually, you should run them through
the test runner: meson test -v test-qga

>
> >>
> >> This reverts commit a85d09269bb1a7071d3ce0f2957e3ca9dba7c047.
> >>
> >> Signed-off-by: Andrey Drobyshev 
> >> ---
> >>  tests/unit/test-qga.c | 6 --
> >>  1 file changed, 4 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/tests/unit/test-qga.c b/tests/unit/test-qga.c
> >> index 671e83cb86..47cf5e30ec 100644
> >> --- a/tests/unit/test-qga.c
> >> +++ b/tests/unit/test-qga.c
> >> @@ -1034,10 +1034,12 @@ static void 
> >> test_qga_guest_get_osinfo(gconstpointer data)
> >>  g_autoptr(QDict) ret = NULL;
> >>  char *env[2];
> >>  QDict *val;
> >> +g_autofree gchar *cwd = NULL;
> >>
> >> +cwd = g_get_current_dir();
> >>  env[0] = g_strdup_printf(
> >> -"QGA_OS_RELEASE=%s%c..%cdata%ctest-qga-os-release",
> >> -g_test_get_dir(G_TEST_DIST), G_DIR_SEPARATOR, G_DIR_SEPARATOR, 
> >> G_DIR_SEPARATOR);
> >> +"QGA_OS_RELEASE=%s%ctests%cdata%ctest-qga-os-release",
> >> +cwd, G_DIR_SEPARATOR, G_DIR_SEPARATOR, G_DIR_SEPARATOR);
> >>  env[1] = NULL;
> >>  fixture_setup(&fixture, NULL, env);
> >>
> >> --
> >> 2.39.3
> >>
> >>
> >
> >
>


-- 
Marc-André Lureau



Re: [PATCH V6 05/14] migration: propagate suspended runstate

2023-12-04 Thread Peter Xu
On Fri, Dec 01, 2023 at 11:23:33AM -0500, Steven Sistare wrote:
> >> @@ -109,6 +117,7 @@ static int global_state_post_load(void *opaque, int 
> >> version_id)
> >>  return -EINVAL;
> >>  }
> >>  s->state = r;
> >> +vm_set_suspended(s->vm_was_suspended || r == RUN_STATE_SUSPENDED);
> > 
> > IIUC current vm_was_suspended (based on my read of your patch) was not the
> > same as a boolean representing "whether VM is suspended", but only a
> > temporary field to remember that for a VM stop request.  To be explicit, I
> > didn't see this flag set in qemu_system_suspend() in your previous patch.
> > 
> > If so, we can already do:
> > 
> >   vm_set_suspended(s->vm_was_suspended);
> > 
> > Irrelevant of RUN_STATE_SUSPENDED?
> 
> We need both terms of the expression.
> 
> If the vm *is* suspended (RUN_STATE_SUSPENDED), then vm_was_suspended = false.
> We call global_state_store prior to vm_stop_force_state, so the incoming
> side sees s->state = RUN_STATE_SUSPENDED and s->vm_was_suspended = false.

Right.

> However, the runstate is RUN_STATE_INMIGRATE.  When incoming finishes by
> calling vm_start, we need to restore the suspended state.  Thus in 
> global_state_post_load, we must set vm_was_suspended = true.

With above, shouldn't global_state_get_runstate() (on dest) fetch SUSPENDED
already?  Then I think it should call vm_start(SUSPENDED) if to start.

Maybe you're talking about the special case where autostart==false?  We
used to have this (existing process_incoming_migration_bh()):

if (!global_state_received() ||
global_state_get_runstate() == RUN_STATE_RUNNING) {
if (autostart) {
vm_start();
} else {
runstate_set(RUN_STATE_PAUSED);
}
}

If so maybe I get you, because in the "else" path we do seem to lose the
SUSPENDED state again, but in that case IMHO we should logically set
vm_was_suspended only when we "lose" it - we didn't lose it during
migration, but only until we decided to switch to PAUSED (due to
autostart==false). IOW, change above to something like:

state = global_state_get_runstate();
if (!global_state_received() || runstate_is_alive(state)) {
if (autostart) {
vm_start(state);
} else {
if (runstate_is_suspended(state)) {
/* Remember suspended state before setting system to STOPed */
vm_was_suspended = true;
}
runstate_set(RUN_STATE_PAUSED);
}
}

It may or may not have a functional difference even if current patch,
though.  However maybe clearer to follow vm_was_suspended's strict
definition.

> 
> If the vm *was* suspended, but is currently stopped (eg RUN_STATE_PAUSED),
> then vm_was_suspended = true.  Migration from that state sets
> vm_was_suspended = s->vm_was_suspended = true in global_state_post_load and 
> ends with runstate_set(RUN_STATE_PAUSED).
> 
> I will add a comment here in the code.
>  
> >>  return 0;
> >>  }
> >> @@ -134,6 +143,7 @@ static const VMStateDescription vmstate_globalstate = {
> >>  .fields = (VMStateField[]) {
> >>  VMSTATE_UINT32(size, GlobalState),
> >>  VMSTATE_BUFFER(runstate, GlobalState),
> >> +VMSTATE_BOOL(vm_was_suspended, GlobalState),
> >>  VMSTATE_END_OF_LIST()
> >>  },
> >>  };
> > 
> > I think this will break migration between old/new, unfortunately.  And
> > since the global state exist mostly for every VM, all VM setup should be
> > affected, and over all archs.
> 
> Thanks, I keep forgetting that my binary tricks are no good here.  However,
> I have one other trick up my sleeve, which is to store vm_was_running in
> global_state.runstate[strlen(runstate) + 2].  It is forwards and backwards
> compatible, since that byte is always 0 in older qemu.  It can be implemented
> with a few lines of code change confined to global_state.c, versus many lines 
> spread across files to do it the conventional way using a compat property and
> a subsection.  Sound OK?  

Tricky!  But sounds okay to me.  I think you're inventing some of your own
way of being compatible, not relying on machine type as a benefit.  If go
this route please document clearly on the layout and also what it looked
like in old binaries.

I think maybe it'll be good to keep using strings, so in the new binaries
we allow >1 strings, then we define properly on those strings (index 0:
runstate, existed since start; index 2: suspended, perhaps using "1"/"0" to
express, while 0x00 means old binary, etc.).

I hope this trick will need less code than the subsection solution,
otherwise I'd still consider going with that, which is the "common
solution".

Let's also see whether Juan/Fabiano/others has any opinions.

-- 
Peter Xu




Re: [RFC 0/8] Support generic Luks encryption

2023-12-04 Thread Yong Huang
On Tue, Dec 5, 2023 at 12:51 AM Daniel P. Berrangé 
wrote:

> On Tue, Dec 05, 2023 at 12:41:16AM +0800, Yong Huang wrote:
> > On Tue, Dec 5, 2023 at 12:24 AM Daniel P. Berrangé 
> > wrote:
> >
> > > On Tue, Dec 05, 2023 at 12:06:17AM +0800, Hyman Huang wrote:
> > > > This functionality was motivated by the following to-do list seen
> > > > in crypto documents:
> > > > https://wiki.qemu.org/Features/Block/Crypto
> > > >
> > > > The last chapter says we should "separate header volume":
> > > >
> > > > The LUKS format has ability to store the header in a separate volume
> > > > from the payload. We should extend the LUKS driver in QEMU to support
> > > > this use case.
> > > >
> > > > As a proof-of-concept, I've created this patchset, which I've named
> > > > the Gluks: generic luks. As their name suggests, they offer
> encryption
> > > > for any format that QEMU theoretically supports.
> > >
> > > I don't see the point in creating a new driver.
> > >
> > > I would expect detached header support to be implemented via an
> > > optional new 'header' field in the existing driver. ie
> > >
> > > diff --git a/qapi/block-core.json b/qapi/block-core.json
> > > index ca390c5700..48d1f2a974 100644
> > > --- a/qapi/block-core.json
> > > +++ b/qapi/block-core.json
> > > @@ -3352,11 +3352,15 @@
> > >  # decryption key (since 2.6). Mandatory except when doing a
> > >  # metadata-only probe of the image.
> > >  #
> > > +# @header: optional reference to the location of a blockdev
> > > +# storing a detached LUKS heaer
> > > +#
> > >  # Since: 2.9
> > >  ##
> > >  { 'struct': 'BlockdevOptionsLUKS',
> > >'base': 'BlockdevOptionsGenericFormat',
> > > -  'data': { '*key-secret': 'str' } }
> > > +  'data': { '*key-secret': 'str',
> > > +"*header-file': 'BlockdevRef'} }
> > >
> > >  ##
> > >  # @BlockdevOptionsGenericCOWFormat:
> > > @@ -4941,9 +4945,18 @@
> > >  #
> > >  # Driver specific image creation options for LUKS.
> > >  #
> > > -# @file: Node to create the image format on
> > > +# @file: Node to create the image format on. Mandatory
> > > +# unless a detached header file is specified using
> > > +# @header.
> > >  #
> > > -# @size: Size of the virtual disk in bytes
> > > +# @size: Size of the virtual disk in bytes.  Mandatory
> > > +# unless a detached header file is specified using
> > > +# @header.
> > > +#
> > > +# @header: optional reference to the location of a blockdev
> > > +# storing a detached LUKS heaer. The @file option is
> > > +# is optional when this is given, unless it is desired
> > > +# to perform pre-allocation
> > >  #
> > >  # @preallocation: Preallocation mode for the new image (since: 4.2)
> > >  # (default: off; allowed values: off, metadata, falloc, full)
> > > @@ -4952,8 +4965,9 @@
> > >  ##
> > >  { 'struct': 'BlockdevCreateOptionsLUKS',
> > >'base': 'QCryptoBlockCreateOptionsLUKS',
> > > -  'data': { 'file': 'BlockdevRef',
> > > -'size': 'size',
> > > +  'data': { '*file':'BlockdevRef',
> > > +'*size':'size',
> > > +'*header':  'BlockdevRef'
> > >  '*preallocation':   'PreallocMode' } }
> > >
> > >  ##
> > >
> > > It ends up giving basicallly the same workflow as you outline,
> > > without needing the new block driver
> > >
> >
> > How about the design and usage, could it be simpler? Any advice? :)
> >
> >
> > As you can see below, the Gluks format block layer driver's design is
> > quite simple.
> >
> >  virtio-blk/vhost-user-blk...(front-end device)
> >   ^
> >   |
> >  Gluks   (format-like disk node)
> >   / \
> >file   header (blockdev reference)
> > / \
> >  filefile (protocol node)
> >|   |
> >disk data   Luks data
>
> What I suggested above ends up with the exact same block driver
> graph, unless I'm missing something.
>

I could overlook something or fail to adequately convey the goal of the
patchset. :(

Indeed, utilizing the same block driver might be effective if our only goal
is to divide the header volume, giving us an additional way to use Luks.

While supporting encryption for any disk format that QEMU is capable of
supporting is another feature of this patchset. This implies that we might
link the Luks header to other blockdev references, which might alter how
the Luks are used and make them incompatible with it. It's not
user-friendly in my opinion, and I'm not aware of a more elegant solution.



> With regards,
> Daniel
> --
> |: https://berrange.com  -o-
> https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org -o-
> https://fstop138.berrange.com :|
> |: https://entangle-photo.org-o-
> https://www.instagram.com/dberrange :|
>
>

-- 
Best regards


Re: [PATCH v2] system/memory: use ldn_he_p/stn_he_p

2023-12-04 Thread Patrick Venture
On Mon, Dec 4, 2023 at 3:24 AM Philippe Mathieu-Daudé 
wrote:

> Hi Patrick,
>
> On 3/12/23 16:42, Patrick Venture wrote:
>
> > Friendly ping? Is this going to be applied or do I need to add another
> > CC or?  I do think it should go into stable.
>
> I'll send a PR with this patch included.
>

Thanks!

>
> Regards,
>
> Phil.
>


Re: [PATCH] Revert "test/qga: use G_TEST_DIR to locate os-release test file"

2023-12-04 Thread Andrey Drobyshev
On 12/4/23 19:09, Marc-André Lureau wrote:
> Hi
> 
> On Mon, Dec 4, 2023 at 9:01 PM Andrey Drobyshev
>  wrote:
>>
>> On 12/4/23 18:51, Marc-André Lureau wrote:
>>> Hi
>>>
>>> On Mon, Dec 4, 2023 at 8:33 PM Andrey Drobyshev
>>>  wrote:

 Since the commit a85d09269b QGA_OS_RELEASE variable points to the path
 relative to the build dir.  Then on qemu-ga startup this path can't be
 found as qemu-ga cwd is somewhere else, which leads to the test failure:

   # ./tests/unit/test-qga -p /qga/guest-get-osinfo
   # random seed: R02S3a90c22d77ff1070fbd844f4959cf4a4
   # Start of qga tests
   **
   ERROR:../tests/unit/test-qga.c:906:test_qga_guest_get_osinfo: 'str' 
 should not be NULL
   Bail out! ERROR:../tests/unit/test-qga.c:906:test_qga_guest_get_osinfo: 
 'str' should not be NULL

 Let's obtain the absolute path again.
>>>
>>> Can you detail how the build and the test is done?
>>>
>>
>> Simple as:
>>
>>> ./configure --cc=gcc --target-list=x86_64-softmmu --enable-guest-agent && 
>>> make -j16
>>> cd build; tests/unit/test-qga -p /qga/guest-get-osinfo
>>
>>
>>> If I recall correctly, this change was done in order to move qga to a
>>> subproject(), but isn't strictly required at this point. Although I
>>> believe it is more correct to lookup test data relative to
>>> G_TEST_DIST.
>>>
>>
>> Then we'd have to change cwd of qemu-ga at startup to ensure relative
>> paths work.  Right now (with the initial change) it appears broken.
> 
> By reverting the patch, it is _still_ broken if you run the test
> manually from a different directory (say from tests/unit for example)
>
> With G_TEST_DIST, and proper testing environment, it works from any directory.
> 

No, it seems to be failing as well, only earlier.  Before the revert:
> cd build/tests/unit; ./test-qga 
> # random seed: R02S450ef942c699b5af6dff48f9c5b73b33
> **
> ERROR:../tests/unit/test-qga.c:79:fixture_setup: assertion failed (error == 
> NULL): Failed to execute child process “$SRC/build/tests/unit/qga/qemu-ga” 
> (No such file or directory) (g-exec-error-quark, 8)
> Bail out! ERROR:../tests/unit/test-qga.c:79:fixture_setup: assertion failed 
> (error == NULL): Failed to execute child process 
> “$SRC/build/tests/unit/qga/qemu-ga” (No such file or directory) 
> (g-exec-error-quark, 8)

But maybe my testing environment isn't proper?

> Tests are not meant to be run manually, you should run them through
> the test runner: meson test -v test-qga
> 

That's a good point, but I just found it suspicious that this is
literally the *only* case of the *only* unit test which fails (when run
directly from ./build).  Could we fix the direct execution as well then?

Btw test runner also cannot be run from just any directory, otherwise it
complains:
> meson test -v test-qga 
> 
> ERROR: No such build data file as 
> '$SRC/build/tests/unit/meson-private/build.dat'.


>>

 This reverts commit a85d09269bb1a7071d3ce0f2957e3ca9dba7c047.

 Signed-off-by: Andrey Drobyshev 
 ---
  tests/unit/test-qga.c | 6 --
  1 file changed, 4 insertions(+), 2 deletions(-)

 diff --git a/tests/unit/test-qga.c b/tests/unit/test-qga.c
 index 671e83cb86..47cf5e30ec 100644
 --- a/tests/unit/test-qga.c
 +++ b/tests/unit/test-qga.c
 @@ -1034,10 +1034,12 @@ static void 
 test_qga_guest_get_osinfo(gconstpointer data)
  g_autoptr(QDict) ret = NULL;
  char *env[2];
  QDict *val;
 +g_autofree gchar *cwd = NULL;

 +cwd = g_get_current_dir();
  env[0] = g_strdup_printf(
 -"QGA_OS_RELEASE=%s%c..%cdata%ctest-qga-os-release",
 -g_test_get_dir(G_TEST_DIST), G_DIR_SEPARATOR, G_DIR_SEPARATOR, 
 G_DIR_SEPARATOR);
 +"QGA_OS_RELEASE=%s%ctests%cdata%ctest-qga-os-release",
 +cwd, G_DIR_SEPARATOR, G_DIR_SEPARATOR, G_DIR_SEPARATOR);
  env[1] = NULL;
  fixture_setup(&fixture, NULL, env);

 --
 2.39.3


>>>
>>>
>>
> 
> 




Re: [RFC 0/8] Support generic Luks encryption

2023-12-04 Thread Daniel P . Berrangé
On Tue, Dec 05, 2023 at 01:32:51AM +0800, Yong Huang wrote:
> On Tue, Dec 5, 2023 at 12:51 AM Daniel P. Berrangé 
> wrote:
> 
> > On Tue, Dec 05, 2023 at 12:41:16AM +0800, Yong Huang wrote:
> > > On Tue, Dec 5, 2023 at 12:24 AM Daniel P. Berrangé 
> > > wrote:
> > >
> > > > On Tue, Dec 05, 2023 at 12:06:17AM +0800, Hyman Huang wrote:
> > > > > This functionality was motivated by the following to-do list seen
> > > > > in crypto documents:
> > > > > https://wiki.qemu.org/Features/Block/Crypto
> > > > >
> > > > > The last chapter says we should "separate header volume":
> > > > >
> > > > > The LUKS format has ability to store the header in a separate volume
> > > > > from the payload. We should extend the LUKS driver in QEMU to support
> > > > > this use case.
> > > > >
> > > > > As a proof-of-concept, I've created this patchset, which I've named
> > > > > the Gluks: generic luks. As their name suggests, they offer
> > encryption
> > > > > for any format that QEMU theoretically supports.
> > > >
> > > > I don't see the point in creating a new driver.
> > > >
> > > > I would expect detached header support to be implemented via an
> > > > optional new 'header' field in the existing driver. ie
> > > >
> > > > diff --git a/qapi/block-core.json b/qapi/block-core.json
> > > > index ca390c5700..48d1f2a974 100644
> > > > --- a/qapi/block-core.json
> > > > +++ b/qapi/block-core.json
> > > > @@ -3352,11 +3352,15 @@
> > > >  # decryption key (since 2.6). Mandatory except when doing a
> > > >  # metadata-only probe of the image.
> > > >  #
> > > > +# @header: optional reference to the location of a blockdev
> > > > +# storing a detached LUKS heaer
> > > > +#
> > > >  # Since: 2.9
> > > >  ##
> > > >  { 'struct': 'BlockdevOptionsLUKS',
> > > >'base': 'BlockdevOptionsGenericFormat',
> > > > -  'data': { '*key-secret': 'str' } }
> > > > +  'data': { '*key-secret': 'str',
> > > > +"*header-file': 'BlockdevRef'} }
> > > >
> > > >  ##
> > > >  # @BlockdevOptionsGenericCOWFormat:
> > > > @@ -4941,9 +4945,18 @@
> > > >  #
> > > >  # Driver specific image creation options for LUKS.
> > > >  #
> > > > -# @file: Node to create the image format on
> > > > +# @file: Node to create the image format on. Mandatory
> > > > +# unless a detached header file is specified using
> > > > +# @header.
> > > >  #
> > > > -# @size: Size of the virtual disk in bytes
> > > > +# @size: Size of the virtual disk in bytes.  Mandatory
> > > > +# unless a detached header file is specified using
> > > > +# @header.
> > > > +#
> > > > +# @header: optional reference to the location of a blockdev
> > > > +# storing a detached LUKS heaer. The @file option is
> > > > +# is optional when this is given, unless it is desired
> > > > +# to perform pre-allocation
> > > >  #
> > > >  # @preallocation: Preallocation mode for the new image (since: 4.2)
> > > >  # (default: off; allowed values: off, metadata, falloc, full)
> > > > @@ -4952,8 +4965,9 @@
> > > >  ##
> > > >  { 'struct': 'BlockdevCreateOptionsLUKS',
> > > >'base': 'QCryptoBlockCreateOptionsLUKS',
> > > > -  'data': { 'file': 'BlockdevRef',
> > > > -'size': 'size',
> > > > +  'data': { '*file':'BlockdevRef',
> > > > +'*size':'size',
> > > > +'*header':  'BlockdevRef'
> > > >  '*preallocation':   'PreallocMode' } }
> > > >
> > > >  ##
> > > >
> > > > It ends up giving basicallly the same workflow as you outline,
> > > > without needing the new block driver
> > > >
> > >
> > > How about the design and usage, could it be simpler? Any advice? :)
> > >
> > >
> > > As you can see below, the Gluks format block layer driver's design is
> > > quite simple.
> > >
> > >  virtio-blk/vhost-user-blk...(front-end device)
> > >   ^
> > >   |
> > >  Gluks   (format-like disk node)
> > >   / \
> > >file   header (blockdev reference)
> > > / \
> > >  filefile (protocol node)
> > >|   |
> > >disk data   Luks data
> >
> > What I suggested above ends up with the exact same block driver
> > graph, unless I'm missing something.
> >
> 
> I could overlook something or fail to adequately convey the goal of the
> patchset. :(
> 
> Indeed, utilizing the same block driver might be effective if our only goal
> is to divide the header volume, giving us an additional way to use Luks.
> 
> While supporting encryption for any disk format that QEMU is capable of
> supporting is another feature of this patchset. This implies that we might
> link the Luks header to other blockdev references, which might alter how
> the Luks are used and make them incompatible with it. It's not
> user-friendly in my opinion, and I'm not aware of a more elegant solution.

That existing LUKS driver can already be used in combination with
any QEMU block driver, and 

qemu ppc64 crash when adding CPU

2023-12-04 Thread Michal Suchánek
Hello,

When running a VM with libvirt I get:

/usr/bin/qemu-system-ppc64 --version
QEMU emulator version 8.1.3 (Virtualization / 15.5)
Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project developers

/usr/bin/qemu-system-ppc64 -name
guest=sles12sp5-ppc64le,debug-threads=on -S -object
{"qom-type":"secret","id":"masterKey0","format":"raw","file":"/var/lib/libvirt/qemu/domain-11-sles12sp5-ppc64le/master-key.aes"}
-machine
pseries-7.1,usb=off,dump-guest-core=off,memory-backend=ppc_spapr.ram
-accel tcg -cpu POWER9 -m 4096 -object
{"qom-type":"memory-backend-ram","id":"ppc_spapr.ram","size":4294967296}
-overcommit mem-lock=off -smp 16,sockets=1,dies=1,cores=2,threads=8
-uuid a6ad6a7d-125b-4525-b452-241ce2000eda -display none -no-user-config
-nodefaults -chardev socket,id=charmonitor,fd=29,server=on,wait=off -mon
chardev=charmonitor,id=monitor,mode=control -rtc base=utc -no-shutdown
-boot strict=on -device
{"driver":"qemu-xhci","p2":15,"p3":15,"id":"usb","bus":"pci.0","addr":"0x3"}
-device
{"driver":"virtio-scsi-pci","id":"scsi0","bus":"pci.0","addr":"0x4"}
-device
{"driver":"virtio-serial-pci","id":"virtio-serial0","bus":"pci.0","addr":"0x2"}
-blockdev
{"driver":"file","filename":"/home/hramrach/Downloads/SLE-12-SP5-Server-MINI-ISO-ppc64le-GM-DVD.iso","node-name":"libvirt-2-storage","auto-read-only":true,"discard":"unmap"}
-blockdev
{"node-name":"libvirt-2-format","read-only":true,"driver":"raw","file":"libvirt-2-storage"}
-device
{"driver":"scsi-cd","bus":"scsi0.0","channel":0,"scsi-id":0,"lun":0,"device_id":"drive-scsi0-0-0-0","drive":"libvirt-2-format","id":"scsi0-0-0-0","bootindex":2}
-blockdev
{"driver":"file","filename":"/var/lib/libvirt/images/sles12sp5-ppc64le.qcow2","node-name":"libvirt-1-storage","auto-read-only":true,"discard":"unmap"}
-blockdev
{"node-name":"libvirt-1-format","read-only":false,"discard":"unmap","driver":"qcow2","file":"libvirt-1-storage","backing":null}
-device
{"driver":"scsi-hd","bus":"scsi0.0","channel":0,"scsi-id":0,"lun":1,"device_id":"drive-scsi0-0-0-1","drive":"libvirt-1-format","id":"scsi0-0-0-1","bootindex":1}
-netdev {"type":"tap","fd":"30","id":"hostnet0"} -device
{"driver":"e1000","netdev":"hostnet0","id":"net0","mac":"52:54:00:3b:d5:a5","bus":"pci.0","addr":"0x1"}
-chardev pty,id=charserial0 -device
{"driver":"spapr-vty","chardev":"charserial0","id":"serial0","reg":805306368}
-audiodev {"id":"audio1","driver":"none"} -device
{"driver":"virtio-balloon-pci","id":"balloon0","bus":"pci.0","addr":"0x5"}
-object
{"qom-type":"rng-random","id":"objrng0","filename":"/dev/urandom"}
-device
{"driver":"virtio-rng-pci","rng":"objrng0","id":"rng0","bus":"pci.0","addr":"0x6"}
-sandbox
on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny
-msg timestamp=on

virsh qemu-monitor-command sles12sp5-ppc64le query-hotpluggable-cpus | jq . | 
cat
{
  "return": [
{
  "props": {
"core-id": 8,
"node-id": 0
  },
  "vcpus-count": 8,
  "qom-path": "/machine/unattached/device[2]",
  "type": "power9_v2.2-spapr-cpu-core"
},
{
  "props": {
"core-id": 0,
"node-id": 0
  },
  "vcpus-count": 8,
  "qom-path": "/machine/unattached/device[1]",
  "type": "power9_v2.2-spapr-cpu-core"
}
  ],
  "id": "libvirt-155"
}

virsh qemu-monitor-command sles12sp5-ppc64le device_del 
'"id":"/machine/unattached/device[2]"' | jq . 
{
  "return": {},
  "id": "libvirt-218"
}

virsh qemu-monitor-command sles12sp5-ppc64le query-hotpluggable-cpus | jq . | 
cat
{
  "return": [
{
  "props": {
"core-id": 8,
"node-id": 0
  },
  "vcpus-count": 8,
  "type": "power9_v2.2-spapr-cpu-core"
},
{
  "props": {
"core-id": 0,
"node-id": 0
  },
  "vcpus-count": 8,
  "qom-path": "/machine/unattached/device[1]",
  "type": "power9_v2.2-spapr-cpu-core"
}
  ],
  "id": "libvirt-235"
}

virsh qemu-monitor-command sles12sp5-ppc64le device_add '"id":"cpu-666"' 
'"driver":"power9_v2.2-spapr-cpu-core"' '"core-id":8' '"node-id":0'  | jq .

__GI_raise (sig=6) at ../sysdeps/unix/sysv/linux/raise.c:51
51  }
(gdb) up
#1  0x7f7839c553e5 in __GI_abort () at abort.c:79
79raise (SIGABRT);
(gdb) up
#2  0x7f783c54a125 in g_assertion_message (domain=domain@entry=0x0, 
file=file@entry=0x556b3baf9242 "../tcg/tcg.c", line=line@entry=784, 
func=func@entry=0x556b3bb55720 <__func__.55816> "tcg_register_thread", 
message=message@entry=0x7f76a46e8f40 "assertion failed: (n < 
tcg_max_ctxs)") at ../glib/gtestutils.c:3223
3223g_abort ();

This ends the usable part of stacktrace, going upp the call stack gdb
locks up.

Looking at tcg.c line 784 is here:

ster_thread(void)
{
TCGContext *s = g_malloc(sizeof(*s));
unsigned int i, n;

*s = tcg_init_ctx;

/* Relink mem_base.  */
for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
if (tcg_init_ctx.temps[i].mem_base) {
ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - t

  1   2   >