date:20221212

Re: [PATCH v1 05/24] vfio-user: add device IO ops vector

2022-12-12 Thread Cédric Le Goater


On 11/9/22 00:13, John Johnson wrote:

Used for communication with VFIO driver
(prep work for vfio-user, which will communicate over a socket)

Signed-off-by: John G Johnson 



I would keep the short rw handling for a subsequent patch to make
sure the changes do not introduce any functional change.


---
  hw/vfio/ap.c  |   1 +
  hw/vfio/ccw.c |   1 +
  hw/vfio/common.c  | 107 +++-
  hw/vfio/pci.c | 140 ++
  hw/vfio/platform.c|   1 +
  include/hw/vfio/vfio-common.h |  27 
  6 files changed, 209 insertions(+), 68 deletions(-)

diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c
index e0dd561..7ef42f1 100644
--- a/hw/vfio/ap.c
+++ b/hw/vfio/ap.c
@@ -102,6 +102,7 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp)
  mdevid = basename(vapdev->vdev.sysfsdev);
  vapdev->vdev.name = g_strdup_printf("%s", mdevid);
  vapdev->vdev.dev = dev;
+vapdev->vdev.io_ops = &vfio_dev_io_ioctl;
  
  /*

   * vfio-ap devices operate in a way compatible with discarding of
diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
index 06b588c..cbd1c25 100644
--- a/hw/vfio/ccw.c
+++ b/hw/vfio/ccw.c
@@ -614,6 +614,7 @@ static void vfio_ccw_get_device(VFIOGroup *group, 
VFIOCCWDevice *vcdev,
  vcdev->vdev.type = VFIO_DEVICE_TYPE_CCW;
  vcdev->vdev.name = name;
  vcdev->vdev.dev = &vcdev->cdev.parent_obj.parent_obj;
+vcdev->vdev.io_ops = &vfio_dev_io_ioctl;
  
  return;
  
diff --git a/hw/vfio/common.c b/hw/vfio/common.c

index dd9104f..c7bf0aa 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -71,7 +71,7 @@ void vfio_disable_irqindex(VFIODevice *vbasedev, int index)
  .count = 0,
  };
  
-ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set);

+VDEV_SET_IRQS(vbasedev, &irq_set);
  }
  
  void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index)

@@ -84,7 +84,7 @@ void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int 
index)
  .count = 1,
  };
  
-ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set);

+VDEV_SET_IRQS(vbasedev, &irq_set);
  }
  
  void vfio_mask_single_irqindex(VFIODevice *vbasedev, int index)

@@ -97,7 +97,7 @@ void vfio_mask_single_irqindex(VFIODevice *vbasedev, int 
index)
  .count = 1,
  };
  
-ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set);

+VDEV_SET_IRQS(vbasedev, &irq_set);
  }
  
  static inline const char *action_to_str(int action)

@@ -178,9 +178,7 @@ int vfio_set_irq_signaling(VFIODevice *vbasedev, int index, 
int subindex,
  pfd = (int32_t *)&irq_set->data;
  *pfd = fd;
  
-if (ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set)) {

-ret = -errno;
-}
+ret = VDEV_SET_IRQS(vbasedev, irq_set);
  g_free(irq_set);
  
  if (!ret) {

@@ -215,6 +213,7 @@ void vfio_region_write(void *opaque, hwaddr addr,
  uint32_t dword;
  uint64_t qword;
  } buf;
+int ret;
  
  switch (size) {

  case 1:
@@ -234,13 +233,15 @@ void vfio_region_write(void *opaque, hwaddr addr,
  break;
  }
  
-if (pwrite(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) {

+ret = VDEV_REGION_WRITE(vbasedev, region->nr, addr, size, &buf);
+if (ret != size) {
+const char *err = ret < 0 ? strerror(-ret) : "short write";
+
  error_report("%s(%s:region%d+0x%"HWADDR_PRIx", 0x%"PRIx64
- ",%d) failed: %m",
+ ",%d) failed: %s",
   __func__, vbasedev->name, region->nr,
- addr, data, size);
+ addr, data, size, err);
  }
-
  trace_vfio_region_write(vbasedev->name, region->nr, addr, data, size);
  
  /*

@@ -266,13 +267,18 @@ uint64_t vfio_region_read(void *opaque,
  uint64_t qword;
  } buf;
  uint64_t data = 0;
+int ret;
+
+ret = VDEV_REGION_READ(vbasedev, region->nr, addr, size, &buf);
+if (ret != size) {
+const char *err = ret < 0 ? strerror(-ret) : "short read";
  
-if (pread(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) {

-error_report("%s(%s:region%d+0x%"HWADDR_PRIx", %d) failed: %m",
+error_report("%s(%s:region%d+0x%"HWADDR_PRIx", %d) failed: %s",
   __func__, vbasedev->name, region->nr,
- addr, size);
+ addr, size, err);
  return (uint64_t)-1;
  }
+
  switch (size) {
  case 1:
  data = buf.byte;
@@ -2450,6 +2456,7 @@ int vfio_get_region_info(VFIODevice *vbasedev, int index,
   struct vfio_region_info **info)
  {
  size_t argsz = sizeof(struct vfio_region_info);
+int ret;
  
  /* create region cache */

  if (vbasedev->regions == NULL) {
@@ -2468,10 +2475,11 @@ int vfio_get_region_info(VFIODevice *vbasedev, int 
index,
  retry:
  (*info)->argsz = argsz;
  
-if (ioctl(v

Re: [PATCH-for-8.0 1/7] hw/mips/Kconfig: Introduce CONFIG_GT64120 to select gt64xxx_pci.c

2022-12-12 Thread Philippe Mathieu-Daudé


On 12/12/22 01:13, Bernhard Beschow wrote:



Am 9. Dezember 2022 15:15:27 UTC schrieb "Philippe Mathieu-Daudé" 
:

From: Philippe Mathieu-Daudé 

Signed-off-by: Philippe Mathieu-Daudé 
---
hw/mips/Kconfig | 6 ++
hw/mips/meson.build | 3 ++-
2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/hw/mips/Kconfig b/hw/mips/Kconfig
index 725525358d..d6bbbe7069 100644
--- a/hw/mips/Kconfig
+++ b/hw/mips/Kconfig
@@ -1,5 +1,6 @@
config MALTA
 bool
+select GT64120
 select ISA_SUPERIO

config MIPSSIM
@@ -59,3 +60,8 @@ config MIPS_BOSTON

config FW_CFG_MIPS
 bool
+
+config GT64120
+bool
+select PCI
+select I8259


s/select I8259/depends on I8259/ since the model needs but doesn't provide 
I8259? Then just take my mail regarding the last patch as a reminder.


I try to remember the 'depends on' directive as "depends on BUS".
If there is no BUS, then the option is pointless. Here "select PCI"
means 'provide/expose a PCI bus on the machine'.

I8259 must be available for GT64120 to be working. If you need a
GT64120, its 'select' directive will select the minimum options
required, so it will implicitly select a I8259.

See docs/devel/kconfig.rst:

**dependencies**: ``depends on ``

  This defines a dependency for this configurable element. Dependencies
  evaluate an expression and force the value of the variable to false
  if the expression is false.

**reverse dependencies**: ``select  [if ]``

  While ``depends on`` can force a symbol to false, reverse dependencies
  can be used to force another symbol to true.

**devices**

  Example::

config MEGASAS_SCSI_PCI
  bool
  default y if PCI_DEVICES
  depends on PCI
  select SCSI

  Devices are the most complex of the five.  They can have a variety
  of directives that cooperate so that a default configuration includes
  all the devices that can be accessed from QEMU.

  Devices *depend on* the bus that they lie on, for example a PCI
  device would specify ``depends on PCI``.  An MMIO device will likely
  have no ``depends on`` directive.  Devices also *select* the buses
  that the device provides, for example a SCSI adapter would specify
  ``select SCSI``.  Finally, devices are usually ``default y`` if and
  only if they have at least one ``depends on``; the default could be
  conditional on a device group.


Otherwise:
Reviewed-by: Bernhard Beschow 


Thanks!

Re: [PATCH v3] hw/rtc/mc146818rtc: Make this rtc device target independent

2022-12-12 Thread Philippe Mathieu-Daudé


On 12/12/22 08:56, Thomas Huth wrote:

The only reason for this code being target dependent is the apic-related
code in rtc_policy_slew_deliver_irq(). Since these apic functions are rather
simple, we can easily move them into a new, separate file (apic_irqcount.c)
which will always be compiled and linked if either APIC or the mc146818 device
are required. This way we can get rid of the #ifdef TARGET_I386 switches in
mc146818rtc.c and declare it in the softmmu_ss instead of specific_ss, so
that the code only gets compiled once for all targets.

Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Mark Cave-Ayland 
Signed-off-by: Thomas Huth 
---
  v3: Move TYPE_APIC_COMMON from apic_internal.h to apic.h and use it


Ah, clever.


  include/hw/i386/apic.h  |  2 ++
  include/hw/i386/apic_internal.h |  2 --
  include/hw/rtc/mc146818rtc.h|  1 +
  hw/intc/apic_common.c   | 27 -
  hw/intc/apic_irqcount.c | 53 +
  hw/rtc/mc146818rtc.c| 25 +---
  hw/intc/meson.build |  6 +++-
  hw/rtc/meson.build  |  3 +-
  8 files changed, 69 insertions(+), 50 deletions(-)
  create mode 100644 hw/intc/apic_irqcount.c

Re: [PATCH 06/30] meson: tweak hardening options for Windows

2022-12-12 Thread Marc-André Lureau

Hi

On Fri, Dec 9, 2022 at 3:36 PM Paolo Bonzini  wrote:
>
> -Wl,--dynamicbase has been enabled for DLLs upstream for roughly 2
> years (https://sourceware.org/bugzilla/show_bug.cgi?id=19011), and
> also by some distros including Debian for 6 years even
> (https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=836365), so
> just enable it unconditionally.
>
> Also add -Wl,--high-entropy-va.
>
> Signed-off-by: Paolo Bonzini 
> ---
>  meson.build | 5 +
>  1 file changed, 1 insertion(+), 4 deletions(-)
>
> diff --git a/meson.build b/meson.build
> index 5c6b5a1c757f..d61c7a82f112 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -193,10 +193,7 @@ qemu_ldflags += 
> cc.get_supported_link_arguments('-Wl,-z,relro', '-Wl,-z,now')
>
>  if targetos == 'windows'
>qemu_ldflags += cc.get_supported_link_arguments('-Wl,--no-seh', 
> '-Wl,--nxcompat')
> -  # Disable ASLR for debug builds to allow debugging with gdb
> -  if get_option('optimization') == '0'
> -qemu_ldflags += cc.get_supported_link_arguments('-Wl,--dynamicbase')
> -  endif
> +  qemu_ldflags += cc.get_supported_link_arguments('-Wl,--dynamicbase', 
> '-Wl,--high-entropy-va')

What about the comment for disabling ASLR on debug builds?

I wonder if we really have to add those flags ourself. Imho, we can
leave them to the compiler default or distrib.. I bet most of the deps
don't use those flags explicitly either.

-- 
Marc-André Lureau

Re: [PATCH 07/30] meson: support meson 0.64 -Doptimization=plain

2022-12-12 Thread Marc-André Lureau

Hi

On Fri, Dec 9, 2022 at 3:31 PM Paolo Bonzini  wrote:
>
> In Meson 0.64, the optimization built-in option now accepts the "plain" value,
> which will not set any optimization flags.  While QEMU does not check the
> contents of the option and therefore does not suffer any ill effect
> from the new value, it uses get_option to print the optimization flags
> in the summary.  Clean the code up to remove duplication, and check for
> -Doptimization=plain at the same time.
>
> Signed-off-by: Paolo Bonzini 

Reviewed-by: Marc-André Lureau 

> ---
>  meson.build | 16 +++-
>  1 file changed, 7 insertions(+), 9 deletions(-)
>
> diff --git a/meson.build b/meson.build
> index d61c7a82f112..dbd0b5563446 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -3752,18 +3752,16 @@ endif
>  if targetos == 'darwin'
>summary_info += {'Objective-C compiler': ' 
> '.join(meson.get_compiler('objc').cmd_array())}
>  endif
> -summary_info += {'CFLAGS':' '.join(get_option('c_args')
> -   + ['-O' + 
> get_option('optimization')]
> -   + (get_option('debug') ? 
> ['-g'] : []))}
> +option_cflags = (get_option('debug') ? ['-g'] : [])
> +if get_option('optimization') != 'plain'
> +  option_cflags += ['-O' + get_option('optimization')]
> +endif
> +summary_info += {'CFLAGS':' '.join(get_option('c_args') + 
> option_cflags)}
>  if link_language == 'cpp'
> -  summary_info += {'CXXFLAGS':' '.join(get_option('cpp_args')
> -   + ['-O' + 
> get_option('optimization')]
> -   + (get_option('debug') ? 
> ['-g'] : []))}
> +  summary_info += {'CXXFLAGS':' '.join(get_option('cpp_args') + 
> option_cflags)}
>  endif
>  if targetos == 'darwin'
> -  summary_info += {'OBJCFLAGS':   ' '.join(get_option('objc_args')
> -   + ['-O' + 
> get_option('optimization')]
> -   + (get_option('debug') ? 
> ['-g'] : []))}
> +  summary_info += {'OBJCFLAGS':   ' '.join(get_option('objc_args') + 
> option_cflags)}
>  endif
>  link_args = get_option(link_language + '_link_args')
>  if link_args.length() > 0
> --
> 2.38.1
>
>


-- 
Marc-André Lureau

Re: [PATCH 09/30] meson: use prefer_static option

2022-12-12 Thread Marc-André Lureau

On Fri, Dec 9, 2022 at 3:42 PM Paolo Bonzini  wrote:
>
> The option is new in Meson 0.63 and removes the need to pass "static:
> true" to all dependency and find_library invocation.  Actually cleaning
> up the invocations is left for a separate patch.
>
> Signed-off-by: Paolo Bonzini 

Reviewed-by: Marc-André Lureau 


> ---
>  configure   |  4 +---
>  docs/devel/build-system.rst |  3 +--
>  meson.build | 11 ---
>  qga/meson.build |  2 +-
>  4 files changed, 7 insertions(+), 13 deletions(-)
>
> diff --git a/configure b/configure
> index 411dfe977958..6efc2055ce09 100755
> --- a/configure
> +++ b/configure
> @@ -2315,9 +2315,6 @@ fi
>  if test "$solaris" = "yes" ; then
>echo "CONFIG_SOLARIS=y" >> $config_host_mak
>  fi
> -if test "$static" = "yes" ; then
> -  echo "CONFIG_STATIC=y" >> $config_host_mak
> -fi
>  echo "SRC_PATH=$source_path" >> $config_host_mak
>  echo "TARGET_DIRS=$target_list" >> $config_host_mak
>  if test "$modules" = "yes"; then
> @@ -2540,6 +2537,7 @@ if test "$skip_meson" = no; then
># Built-in options
>test "$bindir" != "bin" && meson_option_add "-Dbindir=$bindir"
>test "$default_feature" = no && meson_option_add -Dauto_features=disabled
> +  test "$static" = yes && meson_option_add -Dprefer_static=true
>test "$pie" = no && meson_option_add -Db_pie=false
>test "$werror" = yes && meson_option_add -Dwerror=true
>
> diff --git a/docs/devel/build-system.rst b/docs/devel/build-system.rst
> index 189472174340..9db18aff159e 100644
> --- a/docs/devel/build-system.rst
> +++ b/docs/devel/build-system.rst
> @@ -311,8 +311,7 @@ dependency will be used::
>sdl_image = not_found
>if not get_option('sdl_image').auto() or have_system
>  sdl_image = dependency('SDL2_image', required: get_option('sdl_image'),
> -   method: 'pkg-config',
> -   static: enable_static)
> +   method: 'pkg-config')
>endif
>
>  This avoids warnings on static builds of user-mode emulators, for example.
> diff --git a/meson.build b/meson.build
> index 19b023985325..dced840bfbee 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -18,10 +18,7 @@ sh = find_program('sh')
>  cc = meson.get_compiler('c')
>  config_host = keyval.load(meson.current_build_dir() / 'config-host.mak')
>  enable_modules = 'CONFIG_MODULES' in config_host
> -enable_static = 'CONFIG_STATIC' in config_host
> -
> -# Allow both shared and static libraries unless --enable-static
> -static_kwargs = enable_static ? {'static': true} : {}
> +static_kwargs = {}
>
>  # Temporary directory used for files created while
>  # configure runs. Since it is in the build directory
> @@ -183,7 +180,7 @@ qemu_cflags = config_host['QEMU_CFLAGS'].split()
>  qemu_objcflags = config_host['QEMU_OBJCFLAGS'].split()
>  qemu_ldflags = config_host['QEMU_LDFLAGS'].split()
>
> -if enable_static
> +if get_option('prefer_static')
>qemu_ldflags += get_option('b_pie') ? '-static-pie' : '-static'
>  endif
>
> @@ -830,7 +827,7 @@ if targetos == 'linux' and have_tools and 
> get_option('mpath').allowed()
>  kwargs: static_kwargs)
>if libmpathpersist.found()
>  mpathlibs += libmpathpersist
> -if enable_static
> +if get_option('prefer_static')
>mpathlibs += cc.find_library('devmapper',
>   required: get_option('mpath'),
>   kwargs: static_kwargs)
> @@ -1214,7 +1211,7 @@ if not gnutls_crypto.found()
>  # Debian has removed -lgpg-error from libgcrypt-config
>  # as it "spreads unnecessary dependencies" which in
>  # turn breaks static builds...
> -if gcrypt.found() and enable_static
> +if gcrypt.found() and get_option('prefer_static')
>gcrypt = declare_dependency(dependencies: [
>  gcrypt,
>  cc.find_library('gpg-error', required: true, kwargs: static_kwargs)])
> diff --git a/qga/meson.build b/qga/meson.build
> index 3cfb9166e5d8..ec67326b25f3 100644
> --- a/qga/meson.build
> +++ b/qga/meson.build
> @@ -22,7 +22,7 @@ have_qga_vss = get_option('qga_vss') \
>  Then run configure with: --extra-cxxflags="-isystem 
> /path/to/vss/inc/win2003"''') \
>.require(midl.found() or widl.found(),
> error_message: 'VSS support requires midl or widl') \
> -  .require(not enable_static,
> +  .require(not get_option('prefer_static'),
> error_message: 'VSS support requires dynamic linking with GLib') \
>.allowed()
>
> --
> 2.38.1
>
>


-- 
Marc-André Lureau

Re: [PATCH 10/30] meson: remove static_kwargs

2022-12-12 Thread Marc-André Lureau

On Fri, Dec 9, 2022 at 3:31 PM Paolo Bonzini  wrote:
>
> After static_kwargs has been changed to an empty dictionary, it has
> no functional effect and can be removed.
>
> Signed-off-by: Paolo Bonzini 

Reviewed-by: Marc-André Lureau 


> ---
>  meson.build | 212 +++-
>  tcg/meson.build |   2 +-
>  2 files changed, 84 insertions(+), 130 deletions(-)
>
> diff --git a/meson.build b/meson.build
> index dced840bfbee..9ccbe0f6e4ee 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -18,7 +18,6 @@ sh = find_program('sh')
>  cc = meson.get_compiler('c')
>  config_host = keyval.load(meson.current_build_dir() / 'config-host.mak')
>  enable_modules = 'CONFIG_MODULES' in config_host
> -static_kwargs = {}
>
>  # Temporary directory used for files created while
>  # configure runs. Since it is in the build directory
> @@ -510,7 +509,7 @@ gdbus_codegen = not_found
>  gdbus_codegen_error = '@0@ requires gdbus-codegen, please install libgio'
>  if not get_option('gio').auto() or have_system
>gio = dependency('gio-2.0', required: get_option('gio'),
> -   method: 'pkg-config', kwargs: static_kwargs)
> +   method: 'pkg-config')
>if gio.found() and not cc.links('''
>  #include 
>  int main(void)
> @@ -527,7 +526,7 @@ if not get_option('gio').auto() or have_system
>  gdbus_codegen = find_program(gio.get_variable('gdbus_codegen'),
>   required: get_option('gio'))
>  gio_unix = dependency('gio-unix-2.0', required: get_option('gio'),
> -  method: 'pkg-config', kwargs: static_kwargs)
> +  method: 'pkg-config')
>  gio = declare_dependency(dependencies: [gio, gio_unix],
>   version: gio.version())
>endif
> @@ -540,20 +539,19 @@ endif
>  lttng = not_found
>  if 'ust' in get_option('trace_backends')
>lttng = dependency('lttng-ust', required: true, version: '>= 2.1',
> - method: 'pkg-config', kwargs: static_kwargs)
> + method: 'pkg-config')
>  endif
>  pixman = not_found
>  if have_system or have_tools
>pixman = dependency('pixman-1', required: have_system, version:'>=0.21.8',
> -  method: 'pkg-config', kwargs: static_kwargs)
> +  method: 'pkg-config')
>  endif
> -zlib = dependency('zlib', required: true, kwargs: static_kwargs)
> +zlib = dependency('zlib', required: true)
>
>  libaio = not_found
>  if not get_option('linux_aio').auto() or have_block
>libaio = cc.find_library('aio', has_headers: ['libaio.h'],
> -   required: get_option('linux_aio'),
> -   kwargs: static_kwargs)
> +   required: get_option('linux_aio'))
>  endif
>
>  linux_io_uring_test = '''
> @@ -566,7 +564,7 @@ linux_io_uring = not_found
>  if not get_option('linux_io_uring').auto() or have_block
>linux_io_uring = dependency('liburing', version: '>=0.3',
>required: get_option('linux_io_uring'),
> -  method: 'pkg-config', kwargs: static_kwargs)
> +  method: 'pkg-config')
>if not cc.links(linux_io_uring_test)
>  linux_io_uring = not_found
>endif
> @@ -576,7 +574,7 @@ libnfs = not_found
>  if not get_option('libnfs').auto() or have_block
>libnfs = dependency('libnfs', version: '>=1.9.3',
>required: get_option('libnfs'),
> -  method: 'pkg-config', kwargs: static_kwargs)
> +  method: 'pkg-config')
>  endif
>
>  libattr_test = '''
> @@ -596,8 +594,7 @@ if get_option('attr').allowed()
>  libattr = declare_dependency()
>else
>  libattr = cc.find_library('attr', has_headers: ['attr/xattr.h'],
> -  required: get_option('attr'),
> -  kwargs: static_kwargs)
> +  required: get_option('attr'))
>  if libattr.found() and not \
>cc.links(libattr_test, dependencies: libattr, args: '-DCONFIG_LIBATTR')
>libattr = not_found
> @@ -632,7 +629,7 @@ seccomp_has_sysrawrc = false
>  if not get_option('seccomp').auto() or have_system or have_tools
>seccomp = dependency('libseccomp', version: '>=2.3.0',
> required: get_option('seccomp'),
> -   method: 'pkg-config', kwargs: static_kwargs)
> +   method: 'pkg-config')
>if seccomp.found()
>  seccomp_has_sysrawrc = cc.has_header_symbol('seccomp.h',
>  'SCMP_FLTATR_API_SYSRAWRC',
> @@ -643,8 +640,7 @@ endif
>  libcap_ng = not_found
>  if not get_option('cap_ng').auto() or have_system or have_tools
>libcap_ng = cc.find_library('cap-ng', has_headers: ['cap-ng.h'],
> -  required: get_option('cap_ng'),
> -  kwargs: st

Re: [PATCH 0/3] s390x/pci: rpcit fixes and enhancements

2022-12-12 Thread Thomas Huth


On 28/10/2022 21.47, Matthew Rosato wrote:

The following series fixes an issue with guest RPCIT processing discovered
during development of [1] as well as proposes a few additional optimizations
to the current RPCIT codepath.

[1] 
https://lore.kernel.org/linux-s390/20221019144435.369902-1-schne...@linux.ibm.com/

Matthew Rosato (3):
   s390x/pci: RPCIT second pass when mappings exhausted
   s390x/pci: coalesce unmap operations
   s390x/pci: shrink DMA aperture to be bound by vfio DMA limit


Thanks, I've queued patch 2 and 3 now to my s390x-next branch, too:

 https://gitlab.com/thuth/qemu/-/commits/s390x-next/

 Thomas

Re: [PATCH 11/30] meson: cleanup dummy-cpus.c rules

2022-12-12 Thread Marc-André Lureau

Hi

On Fri, Dec 9, 2022 at 3:48 PM Paolo Bonzini  wrote:
>
> Now that qtest is available on all targets including Windows, dummy-cpus.c
> is included unconditionally in the build.
>
> Signed-off-by: Paolo Bonzini 

(and doesn't have to be target specific)

Reviewed-by: Marc-André Lureau 


> ---
>  accel/meson.build | 9 ++---
>  1 file changed, 2 insertions(+), 7 deletions(-)
>
> diff --git a/accel/meson.build b/accel/meson.build
> index 259c35c4c882..3a480cc2efef 100644
> --- a/accel/meson.build
> +++ b/accel/meson.build
> @@ -11,10 +11,5 @@ if have_system
>subdir('stubs')
>  endif
>
> -dummy_ss = ss.source_set()
> -dummy_ss.add(files(
> -  'dummy-cpus.c',
> -))
> -
> -specific_ss.add_all(when: ['CONFIG_SOFTMMU'], if_true: dummy_ss)
> -specific_ss.add_all(when: ['CONFIG_XEN'], if_true: dummy_ss)
> +# qtest
> +softmmu_ss.add(files('dummy-cpus.c'))
> --
> 2.38.1
>
>


-- 
Marc-André Lureau

Re: [PATCH 12/30] modinfo: lookup compile_commands.json by object

2022-12-12 Thread Marc-André Lureau

On Fri, Dec 9, 2022 at 3:43 PM Paolo Bonzini  wrote:
>
> With Meson 0.63 having fixed various issues with extract_objects, the
> compile_commands.json lookups can be simplified.  If the lookup uses
> the object file as key, there is no need to use the command line to
> distinguish among all entries for a given source.
>
> Cc: Gerd Hoffmann 
> Signed-off-by: Paolo Bonzini 

Reviewed-by: Marc-André Lureau 


> ---
>  meson.build| 14 --
>  scripts/modinfo-collect.py | 23 +++
>  2 files changed, 15 insertions(+), 22 deletions(-)
>
> diff --git a/meson.build b/meson.build
> index 9ccbe0f6e4ee..8a9ed5628317 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -3123,16 +3123,11 @@ foreach d, list : modules
>  softmmu_mods += sl
>endif
>if module_ss.sources() != []
> -# FIXME: Should use sl.extract_all_objects(recursive: true) as
> -# input. Sources can be used multiple times but objects are
> -# unique when it comes to lookup in compile_commands.json.
> -# Depnds on a mesion version with
> -# https://github.com/mesonbuild/meson/pull/8900
>  modinfo_files += custom_target(d + '-' + m + '.modinfo',
> output: d + '-' + m + '.modinfo',
> -   input: module_ss.sources() + genh,
> +   input: 
> sl.extract_all_objects(recursive: true),
> capture: true,
> -   command: [modinfo_collect, 
> module_ss.sources()])
> +   command: [modinfo_collect, '@INPUT@'])
>endif
>  else
>if d == 'block'
> @@ -3165,12 +3160,11 @@ foreach d, list : target_modules
>  c_args: c_args,
>  pic: true)
>  softmmu_mods += sl
> -# FIXME: Should use sl.extract_all_objects(recursive: true) too.
>  modinfo_files += custom_target(module_name + '.modinfo',
> output: module_name + '.modinfo',
> -   input: target_module_ss.sources() 
> + genh,
> +   input: 
> sl.extract_all_objects(recursive: true),
> capture: true,
> -   command: [modinfo_collect, 
> '--target', target, target_module_ss.sources()])
> +   command: [modinfo_collect, 
> '--target', target, '@INPUT@'])
>endif
>  endif
>endforeach
> diff --git a/scripts/modinfo-collect.py b/scripts/modinfo-collect.py
> index 4e7584df6676..48bd92bd6180 100755
> --- a/scripts/modinfo-collect.py
> +++ b/scripts/modinfo-collect.py
> @@ -7,15 +7,6 @@
>  import shlex
>  import subprocess
>
> -def find_command(src, target, compile_commands):
> -for command in compile_commands:
> -if command['file'] != src:
> -continue
> -if target != '' and command['command'].find(target) == -1:
> -continue
> -return command['command']
> -return 'false'
> -
>  def process_command(src, command):
>  skip = False
>  out = []
> @@ -43,14 +34,22 @@ def main(args):
>  print("MODINFO_DEBUG target %s" % target)
>  arch = target[:-8] # cut '-softmmu'
>  print("MODINFO_START arch \"%s\" MODINFO_END" % arch)
> +
>  with open('compile_commands.json') as f:
> -compile_commands = json.load(f)
> -for src in args:
> +compile_commands_json = json.load(f)
> +compile_commands = { x['output']: x for x in compile_commands_json }
> +
> +for obj in args:
> +entry = compile_commands.get(obj, None)
> +if not entry:
> +sys.stderr.print('modinfo: Could not find object file', obj)
> +sys.exit(1)
> +src = entry['file']
>  if not src.endswith('.c'):
>  print("MODINFO_DEBUG skip %s" % src)
>  continue
> +command = entry['command']
>  print("MODINFO_DEBUG src %s" % src)
> -command = find_command(src, target, compile_commands)
>  cmdline = process_command(src, command)
>  print("MODINFO_DEBUG cmd", cmdline)
>  result = subprocess.run(cmdline, stdout = subprocess.PIPE,
> --
> 2.38.1
>
>


-- 
Marc-André Lureau

Re: [PATCH 06/30] meson: tweak hardening options for Windows

2022-12-12 Thread Paolo Bonzini


On 12/12/22 09:18, Marc-André Lureau wrote:

-  # Disable ASLR for debug builds to allow debugging with gdb
-  if get_option('optimization') == '0'
-qemu_ldflags += cc.get_supported_link_arguments('-Wl,--dynamicbase')
-  endif
+  qemu_ldflags += cc.get_supported_link_arguments('-Wl,--dynamicbase', 
'-Wl,--high-entropy-va')


What about the comment for disabling ASLR on debug builds?


The problem with that line is that it _enables_ ASLR for debug builds, 
and nobody has complained about gdb since last April...  And nobody has 
complained to Debian or other distros that have enabled the flag for 
years now.


I'll clarify the commit message.


I wonder if we really have to add those flags ourself. Imho, we can
leave them to the compiler default or distrib.. I bet most of the deps
don't use those flags explicitly either.


I think so, at least Firefox does.  In general QEMU tries to do more 
build-time hardening than the average pacakge.


Paolo

Re: [PATCH v13 0/7] s390x: CPU Topology

2022-12-12 Thread Pierre Morel





On 12/9/22 14:32, Thomas Huth wrote:

On 08/12/2022 10.44, Pierre Morel wrote:

Hi,

Implementation discussions
==

CPU models
--

Since the S390_FEAT_CONFIGURATION_TOPOLOGY is already in the CPU model
for old QEMU we could not activate it as usual from KVM but needed
a KVM capability: KVM_CAP_S390_CPU_TOPOLOGY.
Checking and enabling this capability enables
S390_FEAT_CONFIGURATION_TOPOLOGY.

Migration
-

Once the S390_FEAT_CONFIGURATION_TOPOLOGY is enabled in the source
host the STFL(11) is provided to the guest.
Since the feature is already in the CPU model of older QEMU,
a migration from a new QEMU enabling the topology to an old QEMU
will keep STFL(11) enabled making the guest get an exception for
illegal operation as soon as it uses the PTF instruction.


I now thought that it is not possible to enable "ctop" on older QEMUs 
since the don't enable the KVM capability? ... or is it still somehow 
possible? What did I miss?


  Thomas


Enabling ctop with ctop=on on old QEMU is not possible, this is right.
But, if STFL(11) is enable in the source KVM by a new QEMU, I can see 
that even with -ctop=off the STFL(11) is migrated to the destination.


It is highly possible that I missed something in the cpu model.

A solution proposed by Cedric was to add a new machine but we did not 
want this because we decided that we do not want to wait for a new machine.


Another solution could be to have a we can have a new CPU feature 
overruling ctop like S390_FEAT_CPU_TOPOLOGY in the last series version 12.

I am not sure it must be linked with the creation of a new machine.

The solution here in this series is to add a VMState which will block 
the migration with older QEMU if the topology is activated with ctop on 
a new QEMU.


Regards,
Pierre





A VMState keeping track of the S390_FEAT_CONFIGURATION_TOPOLOGY
allows to forbid the migration in such a case.

Note that the VMState will be used to hold information on the
topology once we implement topology change for a running guest.




--
Pierre Morel
IBM Lab Boeblingen

Re: [PATCH v13 1/7] s390x/cpu topology: Creating CPU topology device

2022-12-12 Thread Pierre Morel





On 12/9/22 14:50, Thomas Huth wrote:

On 08/12/2022 10.44, Pierre Morel wrote:

We will need a Topology device to transfer the topology
during migration and to implement machine reset.

The device creation is fenced by s390_has_topology().

Signed-off-by: Pierre Morel 
---

[...]

diff --git a/hw/s390x/cpu-topology.c b/hw/s390x/cpu-topology.c
new file mode 100644
index 00..b3e59873f6
--- /dev/null
+++ b/hw/s390x/cpu-topology.c
@@ -0,0 +1,149 @@
+/*
+ * CPU Topology
+ *
+ * Copyright IBM Corp. 2022
+ * Author(s): Pierre Morel 
+
+ * This work is licensed under the terms of the GNU GPL, version 2 or 
(at

+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "hw/qdev-properties.h"
+#include "hw/boards.h"
+#include "qemu/typedefs.h"
+#include "target/s390x/cpu.h"
+#include "hw/s390x/s390-virtio-ccw.h"
+#include "hw/s390x/cpu-topology.h"
+
+/**
+ * s390_has_topology
+ *
+ * Return false until the commit activating the topology is
+ * commited.
+ */
+bool s390_has_topology(void)
+{
+    return false;
+}
+
+/**
+ * s390_get_topology
+ *
+ * Returns a pointer to the topology.
+ *
+ * This function is called when we know the topology exist.
+ * Testing if the topology exist is done with s390_has_topology()
+ */
+S390Topology *s390_get_topology(void)
+{
+    static S390Topology *s390Topology;
+
+    if (!s390Topology) {
+    s390Topology = S390_CPU_TOPOLOGY(
+    object_resolve_path(TYPE_S390_CPU_TOPOLOGY, NULL));
+    }
+
+    assert(s390Topology);


I think you can move the assert() into the body of the if-statement.


Yes, clearly.




+    return s390Topology;
+}
+
+/**
+ * s390_init_topology
+ * @machine: The Machine state, used to retrieve the SMP parameters
+ * @errp: the error pointer in case of problem
+ *
+ * This function creates and initialize the S390Topology with
+ * the QEMU -smp parameters we will use during adding cores to the
+ * topology.
+ */
+void s390_init_topology(MachineState *machine, Error **errp)
+{
+    DeviceState *dev;
+
+    if (machine->smp.threads > 1) {
+    error_setg(errp, "CPU Topology do not support multithreading");


s/CPU Toplogy do/CPU topology does/


Yes, thanks.

Regards,
Pierre

--
Pierre Morel
IBM Lab Boeblingen

Re: [PATCH 13/30] configure: remove backwards-compatibility code

2022-12-12 Thread Marc-André Lureau

Hi

On Fri, Dec 9, 2022 at 3:28 PM Paolo Bonzini  wrote:
>
> The cmd_line.txt mangling is only needed when rebuilding from very old
> trees and is kept mostly as an example of how to extend it.  However,
> Meson 0.63 introduces a deprecation mechanism for meson_options.txt
> that can be used instead, so get rid of our home-grown hack.

For the curious
https://mesonbuild.com/Release-notes-for-0-63-0.html#deprecate-an-option-and-replace-it-with-a-new-one

>
> Signed-off-by: Paolo Bonzini 

Reviewed-by: Marc-André Lureau 

> ---
>  configure | 10 --
>  1 file changed, 10 deletions(-)
>
> diff --git a/configure b/configure
> index 6efc2055ce09..113db838a16f 100755
> --- a/configure
> +++ b/configure
> @@ -2556,16 +2556,6 @@ if test "$skip_meson" = no; then
>if test "$?" -ne 0 ; then
>error_exit "meson setup failed"
>fi
> -else
> -  if test -f meson-private/cmd_line.txt; then
> -# Adjust old command line options whose type was changed
> -# Avoids having to use "setup --wipe" when Meson is upgraded
> -perl -i -ne '
> -  s/^gettext = true$/gettext = auto/;
> -  s/^gettext = false$/gettext = disabled/;
> -  /^b_staticpic/ && next;
> -  print;' meson-private/cmd_line.txt
> -  fi
>  fi

It's unlikely that users will jump directly from those old versions to
this, so it's fine to me if don't handle adjustments in that case.

>
>  # Save the configure command line for later reuse.
> --
> 2.38.1
>
>


-- 
Marc-André Lureau

Re: [PATCH v1 06/24] vfio-user: Define type vfio_user_pci_dev_info

2022-12-12 Thread Cédric Le Goater


On 11/9/22 00:13, John Johnson wrote:

New class for vfio-user with its class and instance
constructors and destructors, and its pci ops.

Signed-off-by: Elena Ufimtseva 
Signed-off-by: John G Johnson 
Signed-off-by: Jagannathan Raman 
---
  hw/vfio/Kconfig  | 10 +++
  hw/vfio/common.c |  5 
  hw/vfio/pci.c| 89 
  hw/vfio/pci.h|  8 +
  4 files changed, 112 insertions(+)

diff --git a/hw/vfio/Kconfig b/hw/vfio/Kconfig
index 7cdba05..301894e 100644
--- a/hw/vfio/Kconfig
+++ b/hw/vfio/Kconfig
@@ -2,6 +2,10 @@ config VFIO
  bool
  depends on LINUX
  
+config VFIO_USER

+bool
+depends on VFIO
+
  config VFIO_PCI
  bool
  default y
@@ -9,6 +13,12 @@ config VFIO_PCI
  select EDID
  depends on LINUX && PCI
  
+config VFIO_USER_PCI

+bool
+default y
+select VFIO_USER
+depends on VFIO_PCI
+
  config VFIO_CCW
  bool
  default y
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index c7bf0aa..c589bd9 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1774,6 +1774,11 @@ void vfio_reset_handler(void *opaque)
  QLIST_FOREACH(group, &vfio_group_list, next) {
  QLIST_FOREACH(vbasedev, &group->device_list, next) {
  if (vbasedev->dev->realized && vbasedev->needs_reset) {
+if (vbasedev->ops->vfio_hot_reset_multi == NULL) {
+error_printf("%s: No hot reset handler specified\n",
+ vbasedev->name);
+continue;
+}


Since needs_reset is false, which is the case for VFIO User,
vfio_hot_reset_multi won't be called. I don't think we care
much about adding this error message. In another patch, may be.


  vbasedev->ops->vfio_hot_reset_multi(vbasedev);
  }
  }
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 80b03a2..dc19869 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -19,6 +19,7 @@
   */
  
  #include "qemu/osdep.h"

+#include CONFIG_DEVICES
  #include 
  #include 
  
@@ -3421,3 +3422,91 @@ static void register_vfio_pci_dev_type(void)

  }
  
  type_init(register_vfio_pci_dev_type)

+
+
+#ifdef CONFIG_VFIO_USER_PCI


Why not introduce a new file hw/vfio/user.c file ? It would be
cleaner.


+
+/*
+ * vfio-user routines.
+ */
+
+/*
+ * Emulated devices don't use host hot reset
+ */
+static void vfio_user_compute_needs_reset(VFIODevice *vbasedev)
+{
+vbasedev->needs_reset = false;
+}
+
+static VFIODeviceOps vfio_user_pci_ops = {
+.vfio_compute_needs_reset = vfio_user_compute_needs_reset,
+.vfio_eoi = vfio_intx_eoi,
+.vfio_get_object = vfio_pci_get_object,
+.vfio_save_config = vfio_pci_save_config,
+.vfio_load_config = vfio_pci_load_config,
+};
+
+static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp)
+{
+ERRP_GUARD();
+VFIOUserPCIDevice *udev = VFIO_USER_PCI(pdev);
+VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
+VFIODevice *vbasedev = &vdev->vbasedev;
+
+/*
+ * TODO: make option parser understand SocketAddress
+ * and use that instead of having scalar options
+ * for each socket type.
+ */
+if (!udev->sock_name) {
+error_setg(errp, "No socket specified");
+error_append_hint(errp, "Use -device vfio-user-pci,socket=\n");
+return;
+}
+
+vbasedev->name = g_strdup_printf("VFIO user <%s>", udev->sock_name);
+vbasedev->ops = &vfio_user_pci_ops;
+vbasedev->type = VFIO_DEVICE_TYPE_PCI;
+vbasedev->dev = DEVICE(vdev);
+
+}
+
+static void vfio_user_instance_finalize(Object *obj)
+{
+VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
+
+vfio_put_device(vdev);
+}
+
+static Property vfio_user_pci_dev_properties[] = {
+DEFINE_PROP_STRING("socket", VFIOUserPCIDevice, sock_name),


This looks like a good candidate for using a chardev. It could only
support AF_UNIX to start with if fd passing is the required feature.
But at least, the model would be using a well known backend. I think
vhost has the same kind of constraints.

Thanks,

C.




+DEFINE_PROP_END_OF_LIST(),
+};
+
+static void vfio_user_pci_dev_class_init(ObjectClass *klass, void *data)
+{
+DeviceClass *dc = DEVICE_CLASS(klass);
+PCIDeviceClass *pdc = PCI_DEVICE_CLASS(klass);
+
+device_class_set_props(dc, vfio_user_pci_dev_properties);
+dc->desc = "VFIO over socket PCI device assignment";
+pdc->realize = vfio_user_pci_realize;
+}
+
+static const TypeInfo vfio_user_pci_dev_info = {
+.name = TYPE_VFIO_USER_PCI,
+.parent = TYPE_VFIO_PCI_BASE,
+.instance_size = sizeof(VFIOUserPCIDevice),
+.class_init = vfio_user_pci_dev_class_init,
+.instance_init = vfio_instance_init,
+.instance_finalize = vfio_user_instance_finalize,
+};
+
+static void register_vfio_user_dev_type(void)
+{
+type_register_static(&vfio_user_pci_dev_info);
+}
+
+type_init(register_vfio_user_dev_type)
+
+#endif /* VFIO_USER_PCI */
diff --git a/hw/vfio/pci.h b/hw/vfio/

Re: [PATCH 15/30] meson: cleanup compiler detection

2022-12-12 Thread Marc-André Lureau

On Fri, Dec 9, 2022 at 3:29 PM Paolo Bonzini  wrote:
>
> Detect all compilers at the beginning of meson.build, and store
> the available languages in an array.
>
> Signed-off-by: Paolo Bonzini 

Reviewed-by: Marc-André Lureau 

> ---
>  meson.build | 62 ++---
>  1 file changed, 35 insertions(+), 27 deletions(-)
>
> diff --git a/meson.build b/meson.build
> index 8a9ed5628317..c4fa82ae8ba4 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -15,9 +15,21 @@ ss = import('sourceset')
>  fs = import('fs')
>
>  sh = find_program('sh')
> -cc = meson.get_compiler('c')
>  config_host = keyval.load(meson.current_build_dir() / 'config-host.mak')
>  enable_modules = 'CONFIG_MODULES' in config_host
> +targetos = host_machine.system()
> +
> +cc = meson.get_compiler('c')
> +all_languages = ['c']
> +if add_languages('cpp', required: false, native: false)
> +  all_languages += ['cpp']
> +  cxx = meson.get_compiler('cpp')
> +endif
> +if targetos == 'darwin' and \
> +   add_languages('objc', required: get_option('cocoa'), native: false)
> +  all_languages += ['objc']
> +  objc = meson.get_compiler('objc')
> +endif
>
>  # Temporary directory used for files created while
>  # configure runs. Since it is in the build directory
> @@ -54,8 +66,6 @@ if cpu in ['riscv32', 'riscv64']
>cpu = 'riscv'
>  endif
>
> -targetos = host_machine.system()
> -
>  target_dirs = config_host['TARGET_DIRS'].split()
>  have_linux_user = false
>  have_bsd_user = false
> @@ -161,7 +171,7 @@ if 'dtrace' in get_option('trace_backends')
>  # semaphores are linked into the main binary and not the module's shared
>  # object.
>  add_global_arguments('-DSTAP_SDT_V2',
> - native: false, language: ['c', 'cpp', 'objc'])
> + native: false, language: all_languages)
>endif
>  endif
>
> @@ -203,7 +213,7 @@ endif
>  if get_option('fuzzing')
>add_project_link_arguments(['-Wl,-T,',
>(meson.current_source_dir() / 
> 'tests/qtest/fuzz/fork_fuzz.ld')],
> - native: false, language: ['c', 'cpp', 'objc'])
> + native: false, language: all_languages)
>
># Specify a filter to only instrument code that is directly related to
># virtual-devices.
> @@ -216,7 +226,7 @@ if get_option('fuzzing')
>   args: ['-fsanitize-coverage-allowlist=/dev/null',
>  '-fsanitize-coverage=trace-pc'] )
>  
> add_global_arguments('-fsanitize-coverage-allowlist=instrumentation-filter',
> - native: false, language: ['c', 'cpp', 'objc'])
> + native: false, language: all_languages)
>endif
>
>if get_option('fuzzing_engine') == ''
> @@ -225,9 +235,9 @@ if get_option('fuzzing')
>  # everything with fsanitize=fuzzer-no-link. Otherwise, the linker will be
>  # unable to bind the fuzzer-related callbacks added by instrumentation.
>  add_global_arguments('-fsanitize=fuzzer-no-link',
> - native: false, language: ['c', 'cpp', 'objc'])
> + native: false, language: all_languages)
>  add_global_link_arguments('-fsanitize=fuzzer-no-link',
> -  native: false, language: ['c', 'cpp', 'objc'])
> +  native: false, language: all_languages)
>  # For the actual fuzzer binaries, we need to link against the libfuzzer
>  # library. They need to be configurable, to support OSS-Fuzz
>  fuzz_exe_ldflags = ['-fsanitize=fuzzer']
> @@ -238,15 +248,11 @@ if get_option('fuzzing')
>endif
>  endif
>
> -add_global_arguments(qemu_cflags, native: false, language: ['c'])
> -add_global_arguments(qemu_objcflags, native: false, language: ['objc'])
> -
>  # Check that the C++ compiler exists and works with the C compiler.
>  link_language = 'c'
>  linker = cc
>  qemu_cxxflags = []
> -if add_languages('cpp', required: false, native: false)
> -  cxx = meson.get_compiler('cpp')
> +if 'cpp' in all_languages
>add_global_arguments(['-D__STDC_LIMIT_MACROS', '-D__STDC_CONSTANT_MACROS', 
> '-D__STDC_FORMAT_MACROS'],
> native: false, language: 'cpp')
>foreach k: qemu_cflags
> @@ -255,7 +261,6 @@ if add_languages('cpp', required: false, native: false)
>qemu_cxxflags += [k]
>  endif
>endforeach
> -  add_global_arguments(qemu_cxxflags, native: false, language: 'cpp')
>
>if cxx.links(files('scripts/main.c'), args: qemu_cflags)
>  link_language = 'cpp'
> @@ -271,22 +276,21 @@ if targetos != 'sunos' and not 
> config_host.has_key('CONFIG_TSAN')
>qemu_ldflags += linker.get_supported_link_arguments('-Wl,--warn-common')
>  endif
>
> -add_global_link_arguments(qemu_ldflags, native: false, language: ['c', 
> 'cpp', 'objc'])
> +add_global_link_arguments(qemu_ldflags, native: false, language: 
> all_languages)
>
> +add_global_arguments(qemu_cflags, native: false, lan

Re: [PATCH v13 0/7] s390x: CPU Topology

2022-12-12 Thread Thomas Huth


On 12/12/2022 09.51, Pierre Morel wrote:



On 12/9/22 14:32, Thomas Huth wrote:

On 08/12/2022 10.44, Pierre Morel wrote:

Hi,

Implementation discussions
==

CPU models
--

Since the S390_FEAT_CONFIGURATION_TOPOLOGY is already in the CPU model
for old QEMU we could not activate it as usual from KVM but needed
a KVM capability: KVM_CAP_S390_CPU_TOPOLOGY.
Checking and enabling this capability enables
S390_FEAT_CONFIGURATION_TOPOLOGY.

Migration
-

Once the S390_FEAT_CONFIGURATION_TOPOLOGY is enabled in the source
host the STFL(11) is provided to the guest.
Since the feature is already in the CPU model of older QEMU,
a migration from a new QEMU enabling the topology to an old QEMU
will keep STFL(11) enabled making the guest get an exception for
illegal operation as soon as it uses the PTF instruction.


I now thought that it is not possible to enable "ctop" on older QEMUs 
since the don't enable the KVM capability? ... or is it still somehow 
possible? What did I miss?


  Thomas


Enabling ctop with ctop=on on old QEMU is not possible, this is right.
But, if STFL(11) is enable in the source KVM by a new QEMU, I can see that 
even with -ctop=off the STFL(11) is migrated to the destination.


Is this with the "host" CPU model or another one? And did you explicitly 
specify "ctop=off" at the command line, or are you just using the default 
setting by not specifying it?


 Thomas

Re: [PATCH 18/30] configure, meson: move --enable-modules to Meson

2022-12-12 Thread Marc-André Lureau

On Fri, Dec 9, 2022 at 3:44 PM Paolo Bonzini  wrote:
>
> Signed-off-by: Paolo Bonzini 

On Fri, Dec 9, 2022 at 3:39 PM Paolo Bonzini  wrote:
>
> All uses of pkg-config have been moved to Meson.
>
> Signed-off-by: Paolo Bonzini 

Reviewed-by: Marc-André Lureau 

> ---
>  configure | 21 +
>  meson.build   |  7 ++-
>  meson_options.txt |  2 ++
>  scripts/meson-buildoptions.sh |  3 +++
>  4 files changed, 12 insertions(+), 21 deletions(-)
>
> diff --git a/configure b/configure
> index 9c336203d8d9..26d10aeffd82 100755
> --- a/configure
> +++ b/configure
> @@ -273,7 +273,6 @@ sanitizers="no"
>  tsan="no"
>  fortify_source="yes"
>  EXESUF=""
> -modules="no"
>  prefix="/usr/local"
>  qemu_suffix="qemu"
>  softmmu="yes"
> @@ -705,12 +704,6 @@ for opt do
>;;
>--disable-debug-info) meson_option_add -Ddebug=false
>;;
> -  --enable-modules)
> -  modules="yes"
> -  ;;
> -  --disable-modules)
> -  modules="no"
> -  ;;
>--cpu=*)
>;;
>--target-list=*) target_list="$optarg"
> @@ -1001,7 +994,6 @@ cat << EOF
>linux-user  all linux usermode emulation targets
>bsd-userall BSD usermode emulation targets
>pie Position Independent Executables
> -  modules modules support (non-Windows)
>debug-tcg   TCG debugging (default is disabled)
>debug-info  debugging information
>safe-stack  SafeStack Stack Smash Protection. Depends on
> @@ -1260,16 +1252,8 @@ else
>QEMU_CFLAGS="$QEMU_CFLAGS -Wno-missing-braces"
>  fi
>
> -# Our module code doesn't support Windows
> -if test "$modules" = "yes" && test "$mingw32" = "yes" ; then
> -  error_exit "Modules are not available for Windows"
> -fi
> -
> -# Static linking is not possible with plugins, modules or PIE
> +# Resolve default for --enable-plugins
>  if test "$static" = "yes" ; then
> -  if test "$modules" = "yes" ; then
> -error_exit "static and modules are mutually incompatible"
> -  fi
>if test "$plugins" = "yes"; then
>  error_exit "static and plugins are mutually incompatible"
>else
> @@ -2229,9 +2213,6 @@ if test "$solaris" = "yes" ; then
>  fi
>  echo "SRC_PATH=$source_path" >> $config_host_mak
>  echo "TARGET_DIRS=$target_list" >> $config_host_mak
> -if test "$modules" = "yes"; then
> -  echo "CONFIG_MODULES=y" >> $config_host_mak
> -fi
>
>  # XXX: suppress that
>  if [ "$bsd" = "yes" ] ; then
> diff --git a/meson.build b/meson.build
> index f63ab7f83bed..99c1bde4d154 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -16,7 +16,6 @@ fs = import('fs')
>
>  sh = find_program('sh')
>  config_host = keyval.load(meson.current_build_dir() / 'config-host.mak')
> -enable_modules = 'CONFIG_MODULES' in config_host
>  targetos = host_machine.system()
>
>  cc = meson.get_compiler('c')
> @@ -84,6 +83,12 @@ have_ga = get_option('guest_agent') \
>.require(targetos in ['sunos', 'linux', 'windows', 'freebsd'],
> error_message: 'unsupported OS for QEMU guest agent') \
>.allowed()
> +enable_modules = get_option('modules') \
> +  .require(targetos != 'windows',
> +   error_message: 'Modules are not available for Windows') \
> +  .require(not get_option('prefer_static'),
> +   error_message: 'Modules are incompatible with static linking') \
> +  .allowed()
>  have_block = have_system or have_tools
>
>  python = import('python').find_installation()
> diff --git a/meson_options.txt b/meson_options.txt
> index 4b749ca54900..e492aaa73fbc 100644
> --- a/meson_options.txt
> +++ b/meson_options.txt
> @@ -44,6 +44,8 @@ option('fuzzing', type : 'boolean', value: false,
> description: 'build fuzzing targets')
>  option('gettext', type : 'feature', value : 'auto',
> description: 'Localization of the GTK+ user interface')
> +option('modules', type : 'feature', value : 'disabled',
> +   description: 'modules support (non Windows)')
>  option('module_upgrades', type : 'boolean', value : false,
> description: 'try to load modules from alternate paths for upgrades')
>  option('install_blobs', type : 'boolean', value : true,
> diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
> index aa6e30ea911e..f91797741eef 100644
> --- a/scripts/meson-buildoptions.sh
> +++ b/scripts/meson-buildoptions.sh
> @@ -119,6 +119,7 @@ meson_options_help() {
>printf "%s\n" '  lzo lzo compression support'
>printf "%s\n" '  malloc-trim enable libc malloc_trim() for memory 
> optimization'
>printf "%s\n" '  membarrier  membarrier system call (for Linux 4.14+ 
> or Windows'
> +  printf "%s\n" '  modules modules support (non Windows)'
>printf "%s\n" '  mpath   Multipath persistent reservation 
> passthrough'
>printf "%s\n" '  multiprocessOut of process device emulation support'
>printf "%s\n" '  netmap  netmap network backend support'
> @@ -338,6 +339,8 @@ _meson_option_parse() {
>  --disable-memb

Re: [PATCH 17/30] configure: remove pkg-config functions

2022-12-12 Thread Marc-André Lureau

On Fri, Dec 9, 2022 at 3:39 PM Paolo Bonzini  wrote:
>
> All uses of pkg-config have been moved to Meson.
>
> Signed-off-by: Paolo Bonzini 

Reviewed-by: Marc-André Lureau 

> ---
>  configure   | 19 +++
>  docs/devel/build-system.rst |  4 
>  2 files changed, 3 insertions(+), 20 deletions(-)
>
> diff --git a/configure b/configure
> index fb28dd3963bd..9c336203d8d9 100755
> --- a/configure
> +++ b/configure
> @@ -365,11 +365,7 @@ smbd="$SMBD"
>  strip="${STRIP-${cross_prefix}strip}"
>  widl="${WIDL-${cross_prefix}widl}"
>  windres="${WINDRES-${cross_prefix}windres}"
> -pkg_config_exe="${PKG_CONFIG-${cross_prefix}pkg-config}"
> -query_pkg_config() {
> -"${pkg_config_exe}" ${QEMU_PKG_CONFIG_FLAGS} "$@"
> -}
> -pkg_config=query_pkg_config
> +pkg_config="${PKG_CONFIG-${cross_prefix}pkg-config}"
>  sdl2_config="${SDL2_CONFIG-${cross_prefix}sdl2-config}"
>
>  # default flags for all hosts
> @@ -745,9 +741,7 @@ for opt do
>;;
>--without-default-features) # processed above
>;;
> -  --static)
> -static="yes"
> -QEMU_PKG_CONFIG_FLAGS="--static $QEMU_PKG_CONFIG_FLAGS"
> +  --static) static="yes"
>;;
>--bindir=*) bindir="$optarg"
>;;
> @@ -1419,13 +1413,6 @@ EOF
>fi
>  fi
>
> -##
> -# pkg-config probe
> -
> -if ! has "$pkg_config_exe"; then
> -  error_exit "pkg-config binary '$pkg_config_exe' not found"
> -fi
> -
>  ##
>  # fdt probe
>
> @@ -2423,7 +2410,7 @@ if test "$skip_meson" = no; then
>test -n "$objcc" && echo "objc = [$(meson_quote $objcc $CPU_CFLAGS)]" >> 
> $cross
>echo "ar = [$(meson_quote $ar)]" >> $cross
>echo "nm = [$(meson_quote $nm)]" >> $cross
> -  echo "pkgconfig = [$(meson_quote $pkg_config_exe)]" >> $cross
> +  echo "pkgconfig = [$(meson_quote $pkg_config)]" >> $cross
>echo "ranlib = [$(meson_quote $ranlib)]" >> $cross
>if has $sdl2_config; then
>  echo "sdl2-config = [$(meson_quote $sdl2_config)]" >> $cross
> diff --git a/docs/devel/build-system.rst b/docs/devel/build-system.rst
> index 9db18aff159e..66cfe7b8bdc8 100644
> --- a/docs/devel/build-system.rst
> +++ b/docs/devel/build-system.rst
> @@ -103,10 +103,6 @@ developers in checking for system features:
> Print $MESSAGE to stderr, followed by $MORE... and then exit from the
> configure script with non-zero status
>
> -``query_pkg_config $ARGS...``
> -   Run pkg-config passing it $ARGS. If QEMU is doing a static build,
> -   then --static will be automatically added to $ARGS
> -
>
>  Stage 2: Meson
>  ==
> --
> 2.38.1
>
>


-- 
Marc-André Lureau

Re: [PATCH v2 14/15] block: Don't poll in bdrv_replace_child_noperm()

2022-12-12 Thread Paolo Bonzini


On 12/9/22 17:53, Paolo Bonzini wrote:

On 11/18/22 18:41, Kevin Wolf wrote:

In order to make sure that bdrv_replace_child_noperm() doesn't have to
poll any more, get rid of the bdrv_parent_drained_begin_single() call.

This is possible now because we can require that the parent is already
drained through the child in question when the function is called and we
don't call the parent drain callbacks more than once.

The additional drain calls needed in callers cause the test case to run
its code in the drain handler too early (bdrv_attach_child() drains
now), so modify it to only enable the code after the test setup has
completed.

Signed-off-by: Kevin Wolf


I hate to bear bad news, but this breaks the Windows builds on github 
(msys-32bit, msys-64bit) with an obscure but 100% reproducible


51/88 qemu:unit / test-bdrv-drain ERROR   1.30s   (exit status 
3221225477 or signal 3221225349 SIGinvalid)


The exit status is 0xC005 aka a Windows SIGSEGV.  With some luck it 
could be reproducible with Wine (but no gdb).


I am testing dropping this patch and squashing

diff --git a/block.c b/block.c
index 1a2a8d9de907..bb9c85222168 100644
--- a/block.c
+++ b/block.c
@@ -2870,7 +2870,9 @@ static void bdrv_replace_child_noperm(BdrvChild 
*child,

  */
 new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
 if (new_bs_quiesce_counter && !child->quiesced_parent) {
-bdrv_parent_drained_begin_single(child, true);
+bdrv_parent_drained_begin_single(child);
+AIO_WAIT_WHILE(bdrv_child_get_parent_aio_context(child),
+   bdrv_parent_drained_poll_single(c));
 }

 if (old_bs) {

in patch 15/15, which should at least allow me to keep a lot of the 
cleanups I had on top of this series.


Paolo

Re: [PATCH v13 4/7] s390x/cpu_topology: CPU topology migration

2022-12-12 Thread Pierre Morel





On 12/9/22 15:56, Cédric Le Goater wrote:

On 12/8/22 10:44, Pierre Morel wrote:

The migration can only take place if both source and destination
of the migration both use or both do not use the CPU topology
facility.

We indicate a change in topology during migration postload for the
case the topology changed between source and destination.

Signed-off-by: Pierre Morel 




+
+const VMStateDescription vmstate_cpu_topology = {
+    .name = "cpu_topology",
+    .version_id = 1,
+    .post_load = cpu_topology_postload,


Please use 'cpu_topology_post_load', it is easier to catch with a grep.



OK

Regards,
Pierre


--
Pierre Morel
IBM Lab Boeblingen

Re: [PATCH 03/11] RISC-V: Adding T-Head SYNC instructions

2022-12-12 Thread LIU Zhiwei




On 2022/9/8 15:29, Richard Henderson wrote:

On 9/6/22 13:22, Christoph Muellner wrote:

+NOP_PRIVCHECK(th_sfence_vmas, REQUIRE_PRIV_MHS)
+NOP_PRIVCHECK(th_sync, REQUIRE_PRIV_MHSU)
+NOP_PRIVCHECK(th_sync_i, REQUIRE_PRIV_MHSU)
+NOP_PRIVCHECK(th_sync_is, REQUIRE_PRIV_MHSU)
+NOP_PRIVCHECK(th_sync_s, REQUIRE_PRIV_MHSU)


These should not be nops: th_sfence_vmas requires a tlb flush; 
th.sync{,.i} needs to end the current translation block; 
th.sync.{s,is} needs multiprocessor sync, which involves a call to 
async_safe_run_on_cpu.


Hi Richard,

I have fixed the description of T-Head custom synchronization 
instructions according to the implementation of hardware. Sorry for the 
misleading.


https://github.com/T-head-Semi/thead-extension-spec/tree/master/xtheadsync


The fix is as below:

1）th.sync.s has the same function with th.sync.

2) th.sync. has the same function with th.sync.i

3) th.sync has the function of memory barrier, but it is stricter than 
RISC-V fence instruction as it order all the instructions instead of 
load/store instructions.


4) th.sync.i has the function to flush the pipeline besides the function 
of th.sync.



On QEMU,  I think they should be emulated them as below:

1) th.sync should be emulated as " 'tcg_gen_mb()'  and  'end the current 
translation block'" on QEMU.


2) th.sync should be emulated as " 'tcg_gen_mb()'  and  'end the current 
translation block'" on QEMU because we don't have the cache model for 
guest on QEMU. Thus we don't have to synchronize between the icache and 
dcache for guest.



'tcg_gen_mb' is for the function of memory barrier,  and  'end the 
current translation block' is to reflect the influence of other 
instructions, such as to take interrupts which only at the end of 
translation block.
Maybe we can also just implement these instructions as 'tcg_gen_mb' 
because currently all CSR instructions which may influence the 
interrupts taking have ended the TB on QEMU.


Is it right?

Thanks,
Zhiwei




r~

Re: [RFC PATCH v2 01/22] include: import xen public headers

2022-12-12 Thread Paul Durrant


On 09/12/2022 09:55, David Woodhouse wrote:

From: Joao Martins 

Signed-off-by: Joao Martins 
[dwmw2: Update to Xen public headers from 4.16.2 release]
Signed-off-by: David Woodhouse 
---
  include/standard-headers/xen/arch-x86/cpuid.h |  118 ++
  .../xen/arch-x86/xen-x86_32.h |  194 +++
  .../xen/arch-x86/xen-x86_64.h |  241 
  include/standard-headers/xen/arch-x86/xen.h   |  398 +++
  include/standard-headers/xen/event_channel.h  |  388 ++
  include/standard-headers/xen/features.h   |  143 +++
  include/standard-headers/xen/grant_table.h|  686 +++
  include/standard-headers/xen/hvm/hvm_op.h |  395 +++
  include/standard-headers/xen/hvm/params.h |  318 +
  include/standard-headers/xen/memory.h |  754 
  include/standard-headers/xen/physdev.h|  383 ++
  include/standard-headers/xen/sched.h  |  202 
  include/standard-headers/xen/trace.h  |  341 ++
  include/standard-headers/xen/vcpu.h   |  248 
  include/standard-headers/xen/version.h|  113 ++
  include/standard-headers/xen/xen-compat.h |   46 +
  include/standard-headers/xen/xen.h| 1049 +
  17 files changed, 6017 insertions(+)
  create mode 100644 include/standard-headers/xen/arch-x86/cpuid.h
  create mode 100644 include/standard-headers/xen/arch-x86/xen-x86_32.h
  create mode 100644 include/standard-headers/xen/arch-x86/xen-x86_64.h
  create mode 100644 include/standard-headers/xen/arch-x86/xen.h
  create mode 100644 include/standard-headers/xen/event_channel.h
  create mode 100644 include/standard-headers/xen/features.h
  create mode 100644 include/standard-headers/xen/grant_table.h
  create mode 100644 include/standard-headers/xen/hvm/hvm_op.h
  create mode 100644 include/standard-headers/xen/hvm/params.h
  create mode 100644 include/standard-headers/xen/memory.h
  create mode 100644 include/standard-headers/xen/physdev.h
  create mode 100644 include/standard-headers/xen/sched.h
  create mode 100644 include/standard-headers/xen/trace.h
  create mode 100644 include/standard-headers/xen/vcpu.h
  create mode 100644 include/standard-headers/xen/version.h
  create mode 100644 include/standard-headers/xen/xen-compat.h
  create mode 100644 include/standard-headers/xen/xen.h



Reviewed-by: Paul Durrant

Re: [RFC PATCH v2 02/22] xen: add CONFIG_XENFV_MACHINE and CONFIG_XEN_EMU options for Xen emulation

2022-12-12 Thread Paul Durrant


On 09/12/2022 09:55, David Woodhouse wrote:

From: David Woodhouse 

The XEN_EMU option will cover core Xen support in target/, which exists
only for x86 with KVM today but could theoretically also be implemented
on Arm/Aarch64 and with TCG or other accelerators. It will also cover
the support for architecture-independent grant table and event channel
support which will be added in hw/xen/.

The XENFV_MACHINE option is for the xenfv platform support, which will
now be used both by XEN_EMU and by real Xen.

The XEN option remains dependent on the Xen runtime libraries, and covers
support for real Xen. Some code which currently resides under CONFIG_XEN
will be moving to CONFIG_XENFV_MACHINE over time.

Signed-off-by: David Woodhouse 
---
  accel/Kconfig  | 1 +
  hw/Kconfig | 1 +
  hw/xen/Kconfig | 3 +++
  meson.build| 1 +
  target/Kconfig | 4 
  5 files changed, 10 insertions(+)
  create mode 100644 hw/xen/Kconfig



Reviewed-by: Paul Durrant

Re: [PATCH v13 2/7] s390x/cpu topology: reporting the CPU topology to the guest

2022-12-12 Thread Pierre Morel





On 12/9/22 16:43, Cédric Le Goater wrote:

On 12/8/22 10:44, Pierre Morel wrote:

The guest uses the STSI instruction to get information on the
CPU topology.

Let us implement the STSI instruction for the basis CPU topology
level, level 2.

Signed-off-by: Pierre Morel 
---


...


+#define S390_TOPOLOGY_MAX_MNEST 2
+
+void insert_stsi_15_1_x(S390CPU *cpu, int sel2, __u64 addr, uint8_t ar)
+{
+    union {
+    char place_holder[S390_TOPOLOGY_SYSIB_SIZE];
+    SysIB_151x sysib;
+    } buffer QEMU_ALIGNED(8);
+    int len;
+
+    if (s390_is_pv() || !s390_has_topology() ||


Isn't the test s390_is_pv() redundant since patch 6 deactivates
S390_FEAT_CONFIGURATION_TOPOLOGY for PV guests ?


Yes you are right it is.
I remove it.

Regards,
Pierre



--
Pierre Morel
IBM Lab Boeblingen

Re: [PATCH 03/11] RISC-V: Adding T-Head SYNC instructions

2022-12-12 Thread LIU Zhiwei




On 2022/9/8 15:29, Richard Henderson wrote:

On 9/6/22 13:22, Christoph Muellner wrote:

+NOP_PRIVCHECK(th_sfence_vmas, REQUIRE_PRIV_MHS)
+NOP_PRIVCHECK(th_sync, REQUIRE_PRIV_MHSU)
+NOP_PRIVCHECK(th_sync_i, REQUIRE_PRIV_MHSU)
+NOP_PRIVCHECK(th_sync_is, REQUIRE_PRIV_MHSU)
+NOP_PRIVCHECK(th_sync_s, REQUIRE_PRIV_MHSU)


These should not be nops: th_sfence_vmas requires a tlb flush; 
th.sync{,.i} needs to end the current translation block; 
th.sync.{s,is} needs multiprocessor sync, which involves a call to 
async_safe_run_on_cpu.

Hi Richard,

I have fixed the description of T-Head custom synchronization 
instructions according to the implementation of hardware. Sorry for the 
misleading.


https://github.com/T-head-Semi/thead-extension-spec/tree/master/xtheadsync


The fix is as below:

1）th.sync.s has the same function with th.sync.

2) th.sync.is has the same function with th.sync.i

3) th.sync has the function of memory barrier, but it is stricter than 
RISC-V fence instruction as it order all the instructions instead of 
load/store instructions.


4) th.sync.i has the function to flush the pipeline besides the function 
of th.sync.



On QEMU,  I think they should be emulated them as below:

1) th.sync should be emulated as " 'tcg_gen_mb()'  and  'end the current 
translation block'" on QEMU.


2) th.sync.i should be emulated as " 'tcg_gen_mb()'  and  'end the 
current translation block'" on QEMU because we don't have the cache 
model for guest on QEMU. Thus we don't have to synchronize between the 
icache and dcache for guest.



'tcg_gen_mb' is for the function of memory barrier. And  'end the 
current translation block' is to reflect the influence of other 
instructions, such as taking interrupts which happens only at the end of 
a translation block.
Maybe we can also just implement these instructions as 'tcg_gen_mb' 
because currently all CSR instructions which may influence the 
interrupts taking have ended the TB on QEMU.



Is it right?

Thanks,
Zhiwei



r~

Re: [PATCH v1 03/24] vfio-user: add container IO ops vector

2022-12-12 Thread Philippe Mathieu-Daudé


On 9/12/22 17:10, Cédric Le Goater wrote:

Hello John,

On 11/9/22 00:13, John Johnson wrote:

Used for communication with VFIO driver
(prep work for vfio-user, which will communicate over a socket)

Signed-off-by: John G Johnson 
---
  hw/vfio/common.c  | 126 
--

  include/hw/vfio/vfio-common.h |  33 +++
  2 files changed, 117 insertions(+), 42 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index ace9562..83d69b9 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -432,12 +432,12 @@ static int vfio_dma_unmap_bitmap(VFIOContainer 
*container,

  goto unmap_exit;
  }
-    ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap);
+    ret = CONT_DMA_UNMAP(container, unmap, bitmap);


I am not sure these macros are very useful, compared to :

     container->ops->dma_unmap(container, unmap, bitmap);


I was going to report the same.



+/*
+ * The next 2 ops vectors are how Devices and Containers
+ * communicate with the server.  The default option is
+ * through ioctl() to the kernel VFIO driver, but vfio-user
+ * can use a socket to a remote process.
+ */
+
+struct VFIOContIO {


VFIOContainerOps seems more adequate with the current VFIO terminology
in QEMU.


Yes please, abbreviated "Cont" is not helpful.

Re: [PATCH v1 00/24] vfio-user client

2022-12-12 Thread Philippe Mathieu-Daudé


Cc'ing Mark & Edgar.

On 9/11/22 00:13, John Johnson wrote:

Hello,

This is the 6th revision of the vfio-user client implementation.
It is the first patch series (the previous revisions were RFCs)

First of all, thank you for your time reviewing the RFC versions.

The vfio-user framework consists of 3 parts:
  1) The VFIO user protocol specification.
  2) A client - the VFIO device in QEMU that encapsulates VFIO messages
 and sends them to the server.
  3) A server - a remote process that emulates a device.

This patchset implements parts 1 and 2.

The libvfio-user project (https://github.com/nutanix/libvfio-user)
can be used by a remote process to handle the protocol to implement the third 
part.
We also have upstreamed a patch series that implement a server using QEMU.


Contributors:

John G Johnson 
John Levon 
Thanos Makatos 
Elena Ufimtseva 
Jagannathan Raman 

John Johnson (23):
   vfio-user: add VFIO base abstract class
   vfio-user: add container IO ops vector
   vfio-user: add region cache
   vfio-user: add device IO ops vector
   vfio-user: Define type vfio_user_pci_dev_info
   vfio-user: connect vfio proxy to remote server
   vfio-user: define socket receive functions
   vfio-user: define socket send functions
   vfio-user: get device info
   vfio-user: get region info
   vfio-user: region read/write
   vfio-user: pci_user_realize PCI setup
   vfio-user: get and set IRQs
   vfio-user: forward msix BAR accesses to server
   vfio-user: proxy container connect/disconnect
   vfio-user: dma map/unmap operations
   vfio-user: add dma_unmap_all
   vfio-user: secure DMA support
   vfio-user: dma read/write operations
   vfio-user: pci reset
   vfio-user: add 'x-msg-timeout' option that specifies msg wait times
   vfio-user: add coalesced posted writes
   vfio-user: add trace points

[Bug 1523246] Re: Virtio-blk does not support TRIM

2022-12-12 Thread mohammadreza nasrabadi

On which version of qemu the discard option is supported?

I have emulated a VM with below qemu options related to disk type:

-drive file=disk.img,if=none,id=disk0,l2-cache-
size=8M,format=qcow2,discard=on,detect-zeroes=unmap,aio=io_uring -device
virtio-blk-pci,drive=disk0,scsi=off,bootindex=2

the disk file on the host side is located on xfs mountpoint on RAID
level 10 array.

After I downloaded a file with 1GB size on the guest OS, I saw the size
of the disk file on the host has been increased as well.

But when I delete the downloaded file and issue fstrim --all -v command,
the disk file on host has not been decreased.

The version of qemu I'm using is 5.2.0

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1523246

Title:
  Virtio-blk does not support TRIM

Status in QEMU:
  Fix Released

Bug description:
  When model=virtio is used, TRIM is not supported.

  # mount -o discard /dev/vda4 /mnt
  # mount | tail -1
  /dev/vda4 on /mnt type fuseblk 
(rw,nosuid,nodev,relatime,user_id=0,group_id=0,allow_other,blksize=4096)
  # fstrim /mnt/
  fstrim: /mnt/: the discard operation is not supported

  Booting without model=virtio allows using TRIM (in Windows as well).

  Full QEMU line:

  qemu-system-x86_64 -enable-kvm -cpu host -bios
  /usr/share/ovmf/ovmf_x64.bin -smp 2 -m 7G -vga qxl -usbdevice tablet
  -net nic,model=virtio -net user -drive discard=unmap,detect-
  zeroes=unmap,cache=none,file=vms/win10.hd.img.vmdk,format=vmdk,if=virtio

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1523246/+subscriptions

Re: [PATCH v3 1/2] hw/nvme: Implement shadow doorbell buffer support

2022-12-12 Thread Klaus Jensen

On Dec  8 12:39, Guenter Roeck wrote:
> On Thu, Dec 08, 2022 at 12:13:55PM -0800, Guenter Roeck wrote:
> > On Thu, Dec 08, 2022 at 10:47:42AM -0800, Guenter Roeck wrote:
> > > > 
> > > > A cq head doorbell mmio is skipped... And it is not the fault of the
> > > > kernel. The kernel is in it's good right to skip the mmio since the cq
> > > > eventidx is not properly updated.
> > > > 
> > > > Adding that and it boots properly on riscv. But I'm perplexed as to why
> > > > this didnt show up on our regularly tested platforms.
> > > > 
> > > > Gonna try to get this in for 7.2!
> > > 
> > > I see another problem with sparc64.
> > > 
> > > [5.261508] could not locate request for tag 0x0
> > > [5.261711] nvme nvme0: invalid id 0 completed on queue 1
> > > 
> > > That is seen repeatedly until the request times out. I'll test with
> > > your patch to see if it resolves this problem as well, and will bisect
> > > otherwise.
> > > 
> > The second problem is unrelated to the doorbell problem.
> > It is first seen in qemu v7.1. I'll try to bisect.
> > 
> 
> Unfortunately, the problem observed with sparc64 also bisects to this
> patch. Making things worse, "hw/nvme: fix missing cq eventidx update"
> does not fix it (which is why I initially thought it was unrelated).
> 
> I used the following qemu command line.
> 
> qemu-system-sparc64 -M sun4v -cpu "TI UltraSparc IIi" -m 512 -snapshot \
> -device nvme,serial=foo,drive=d0,bus=pciB \
> -drive file=rootfs.ext2,if=none,format=raw,id=d0 \
> -kernel arch/sparc/boot/image -no-reboot \
> -append "root=/dev/nvme0n1 console=ttyS0" \
> -nographic -monitor none
> 

Hi Guenter,

Thank you very much for the detailed reports and I apologize for the
fallout of this.

I think this is related to missing endian conversions when handling the
shadow doorbells. I'm not sure if there is a kernel issue here as well,
because as far as I can tell, the shadow doorbells are updated like so:

  old_value = *dbbuf_db;
  *dbbuf_db = value;

(where `value` is the new head/tail value depending on if this is an sq
or cq).

Keith, is the kernel doing something magically here I am not aware of,
or isn't this missing some cpu_to_le32() / le32_to_cpu() calls as well?


signature.asc
Description: PGP signature

Re: [PATCH v2 11/14] ui: Move HMP commands from monitor to new ui/ui-hmp-cmds.c

2022-12-12 Thread Markus Armbruster

Markus Armbruster  writes:

> This moves these commands from MAINTAINERS section "Human
> Monitor (HMP)" to "Graphics".
>
> Signed-off-by: Markus Armbruster 
> Reviewed-by: Daniel P. Berrangé 
> Reviewed-by: Philippe Mathieu-Daudé 

[...]

> diff --git a/ui/ui-hmp-cmds.c b/ui/ui-hmp-cmds.c
> new file mode 100644
> index 00..c26a1f00d0
> --- /dev/null
> +++ b/ui/ui-hmp-cmds.c
> @@ -0,0 +1,361 @@
> +/*
> + * Human Monitor Interface commands

Make this

* HMP commands related to UI

> + *
> + * Copyright IBM, Corp. 2011
> + *
> + * Authors:
> + *  Anthony Liguori   
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> + * the COPYING file in the top-level directory.
> + *
> + * Contributions after 2012-01-13 are licensed under the terms of the
> + * GNU GPL, version 2 or (at your option) any later version.
> + */

[...]

Re: [PATCH v13 0/7] s390x: CPU Topology

2022-12-12 Thread Pierre Morel





On 12/12/22 10:07, Thomas Huth wrote:

On 12/12/2022 09.51, Pierre Morel wrote:



On 12/9/22 14:32, Thomas Huth wrote:

On 08/12/2022 10.44, Pierre Morel wrote:

Hi,

Implementation discussions
==

CPU models
--

Since the S390_FEAT_CONFIGURATION_TOPOLOGY is already in the CPU model
for old QEMU we could not activate it as usual from KVM but needed
a KVM capability: KVM_CAP_S390_CPU_TOPOLOGY.
Checking and enabling this capability enables
S390_FEAT_CONFIGURATION_TOPOLOGY.

Migration
-

Once the S390_FEAT_CONFIGURATION_TOPOLOGY is enabled in the source
host the STFL(11) is provided to the guest.
Since the feature is already in the CPU model of older QEMU,
a migration from a new QEMU enabling the topology to an old QEMU
will keep STFL(11) enabled making the guest get an exception for
illegal operation as soon as it uses the PTF instruction.


I now thought that it is not possible to enable "ctop" on older QEMUs 
since the don't enable the KVM capability? ... or is it still somehow 
possible? What did I miss?


  Thomas


Enabling ctop with ctop=on on old QEMU is not possible, this is right.
But, if STFL(11) is enable in the source KVM by a new QEMU, I can see 
that even with -ctop=off the STFL(11) is migrated to the destination.


Is this with the "host" CPU model or another one? And did you explicitly 
specify "ctop=off" at the command line, or are you just using the 
default setting by not specifying it?


With explicit cpumodel and using ctop=off like in

sudo /usr/local/bin/qemu-system-s390x_master \
 -m 512M \
 -enable-kvm -smp 4,sockets=4,cores=2,maxcpus=8 \
 -cpu z14,ctop=off \
 -machine s390-ccw-virtio-7.2,accel=kvm \
 ...


regards,
Pierre

--
Pierre Morel
IBM Lab Boeblingen

Re: [PATCH 1/1] qemu-iotests/stream-under-throttle: do not shutdown QEMU

2022-12-12 Thread Christian Borntraeger





Am 07.12.22 um 14:14 schrieb Christian Borntraeger:

Without a kernel or boot disk a QEMU on s390 will exit (usually with a
disabled wait state). This breaks the stream-under-throttle test case.
Do not exit qemu if on s390.

Signed-off-by: Christian Borntraeger 
---
  tests/qemu-iotests/tests/stream-under-throttle | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/tests/qemu-iotests/tests/stream-under-throttle 
b/tests/qemu-iotests/tests/stream-under-throttle
index 8d2d9e16840d..c24dfbcaa2f2 100755
--- a/tests/qemu-iotests/tests/stream-under-throttle
+++ b/tests/qemu-iotests/tests/stream-under-throttle
@@ -88,6 +88,8 @@ class TestStreamWithThrottle(iotests.QMPTestCase):
 'x-iops-total=1,x-bps-total=104857600')
  self.vm.add_blockdev(self.vm.qmp_to_opts(blockdev))
  self.vm.add_device('virtio-blk,iothread=iothr0,drive=throttled-node')
+if iotests.qemu_default_machine == 's390-ccw-virtio':
+self.vm.add_args('-no-shutdown')
  self.vm.launch()
  
  def tearDown(self) -> None:



ping. I guess, this will come after the release?

Re: [PATCH v13 0/7] s390x: CPU Topology

2022-12-12 Thread Pierre Morel





On 12/9/22 15:45, Cédric Le Goater wrote:

On 12/8/22 10:44, Pierre Morel wrote:

Hi,

Implementation discussions
==

CPU models
--

Since the S390_FEAT_CONFIGURATION_TOPOLOGY is already in the CPU model
for old QEMU we could not activate it as usual from KVM but needed
a KVM capability: KVM_CAP_S390_CPU_TOPOLOGY.
Checking and enabling this capability enables
S390_FEAT_CONFIGURATION_TOPOLOGY.

Migration
-

Once the S390_FEAT_CONFIGURATION_TOPOLOGY is enabled in the source
host the STFL(11) is provided to the guest.
Since the feature is already in the CPU model of older QEMU,
a migration from a new QEMU enabling the topology to an old QEMU
will keep STFL(11) enabled making the guest get an exception for
illegal operation as soon as it uses the PTF instruction.

A VMState keeping track of the S390_FEAT_CONFIGURATION_TOPOLOGY
allows to forbid the migration in such a case.

Note that the VMState will be used to hold information on the
topology once we implement topology change for a running guest.

Topology


Until we introduce bookss and drawers, polarization and dedication
the topology is kept very simple and is specified uniquely by
the core_id of the vCPU which is also the vCPU address.

Testing
===

To use the QEMU patches, you will need Linux V6-rc1 or newer,
or use the following Linux mainline patches:

f5ecfee94493 2022-07-20 KVM: s390: resetting the Topology-Change-Report
24fe0195bc19 2022-07-20 KVM: s390: guest support for topology function
0130337ec45b 2022-07-20 KVM: s390: Cleanup ipte lock access and SIIF 
fac..


Currently this code is for KVM only, I have no idea if it is interesting
to provide a TCG patch. If ever it will be done in another series.

Documentation
=

To have a better understanding of the S390x CPU Topology and its
implementation in QEMU you can have a look at the documentation in the
last patch of this series.

The admin will want to match the host and the guest topology, taking
into account that the guest does not recognize multithreading.
Consequently, two vCPU assigned to threads of the same real CPU should
preferably be assigned to the same socket of the guest machine.

Future developments
===

Two series are actively prepared:
- Adding drawers, book, polarization and dedication to the vCPU.
- changing the topology with a running guest


Since we have time before the next QEMU 8.0 release, could you please
send the whole patchset ? Having the full picture would help in taking
decisions for downstream also.

I am still uncertain about the usefulness of S390Topology object because,
as for now, the state can be computed on the fly, the reset can be
handled at the machine level directly under s390_machine_reset() and so
could migration if the machine had a vmstate (not the case today but
quite easy to add). So before merging anything that could be difficult
to maintain and/or backport, I would prefer to see it all !



The goal of dedicating an object for topology was to ease the 
maintenance and portability by using the QEMU object framework.


If on contrary it is a problem for maintenance or backport we surely 
better not use it.


Any other opinion?

Regards,
Pierre

--
Pierre Morel
IBM Lab Boeblingen

Re: [PATCH v13 0/7] s390x: CPU Topology

2022-12-12 Thread Thomas Huth


On 12/12/2022 11.10, Pierre Morel wrote:



On 12/12/22 10:07, Thomas Huth wrote:

On 12/12/2022 09.51, Pierre Morel wrote:



On 12/9/22 14:32, Thomas Huth wrote:

On 08/12/2022 10.44, Pierre Morel wrote:

Hi,

Implementation discussions
==

CPU models
--

Since the S390_FEAT_CONFIGURATION_TOPOLOGY is already in the CPU model
for old QEMU we could not activate it as usual from KVM but needed
a KVM capability: KVM_CAP_S390_CPU_TOPOLOGY.
Checking and enabling this capability enables
S390_FEAT_CONFIGURATION_TOPOLOGY.

Migration
-

Once the S390_FEAT_CONFIGURATION_TOPOLOGY is enabled in the source
host the STFL(11) is provided to the guest.
Since the feature is already in the CPU model of older QEMU,
a migration from a new QEMU enabling the topology to an old QEMU
will keep STFL(11) enabled making the guest get an exception for
illegal operation as soon as it uses the PTF instruction.


I now thought that it is not possible to enable "ctop" on older QEMUs 
since the don't enable the KVM capability? ... or is it still somehow 
possible? What did I miss?


  Thomas


Enabling ctop with ctop=on on old QEMU is not possible, this is right.
But, if STFL(11) is enable in the source KVM by a new QEMU, I can see 
that even with -ctop=off the STFL(11) is migrated to the destination.


Is this with the "host" CPU model or another one? And did you explicitly 
specify "ctop=off" at the command line, or are you just using the default 
setting by not specifying it?


With explicit cpumodel and using ctop=off like in

sudo /usr/local/bin/qemu-system-s390x_master \
  -m 512M \
  -enable-kvm -smp 4,sockets=4,cores=2,maxcpus=8 \
  -cpu z14,ctop=off \
  -machine s390-ccw-virtio-7.2,accel=kvm \
  ...


Ok ... that sounds like a bug somewhere in your patches or in the kernel 
code ... the guest should never see STFL bit 11 if ctop=off, should it?


 Thomas

[PATCH v4] riscv: Allow user to set the satp mode

2022-12-12 Thread Alexandre Ghiti

RISC-V specifies multiple sizes for addressable memory and Linux probes for
the machine's support at startup via the satp CSR register (done in
csr.c:validate_vm).

As per the specification, sv64 must support sv57, which in turn must
support sv48...etc. So we can restrict machine support by simply setting the
"highest" supported mode and the bare mode is always supported.

You can set the satp mode using the new properties "mbare", "sv32",
"sv39", "sv48", "sv57" and "sv64" as follows:
-cpu rv64,sv57=on # Linux will boot using sv57 scheme
-cpu rv64,sv39=on # Linux will boot using sv39 scheme

We take the highest level set by the user:
-cpu rv64,sv48=on,sv57=on # Linux will boot using sv57 scheme

We make sure that invalid configurations are rejected:
-cpu rv64,sv32=on # Can't enable 32-bit satp mode in 64-bit
-cpu rv64,sv39=off,sv48=on # sv39 must be supported if higher modes are
   # enabled

We accept "redundant" configurations:
-cpu rv64,sv48=on,sv57=off # sv39 must be supported if higher modes are

In addition, we now correctly set the device-tree entry 'mmu-type' using
those new properties.

Co-Developed-by: Ludovic Henry 
Signed-off-by: Ludovic Henry 
Signed-off-by: Alexandre Ghiti 
---
v4:
- Use custom boolean properties instead of OnOffAuto properties, based
  on ARMVQMap, as suggested by Andrew

v3:
- Free sv_name as pointed by Bin
- Replace satp-mode with boolean properties as suggested by Andrew
- Removed RB from Atish as the patch considerably changed

v2:
- Use error_setg + return as suggested by Alistair
- Add RB from Atish
- Fixed checkpatch issues missed in v1
- Replaced Ludovic email address with the rivos one

 hw/riscv/virt.c|  20 +++--
 target/riscv/cpu.c | 217 +++--
 target/riscv/cpu.h |  25 ++
 target/riscv/csr.c |  13 ++-
 4 files changed, 256 insertions(+), 19 deletions(-)

diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index a5bc7353b4..9bb5ba7366 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -228,7 +228,8 @@ static void create_fdt_socket_cpus(RISCVVirtState *s, int 
socket,
 int cpu;
 uint32_t cpu_phandle;
 MachineState *mc = MACHINE(s);
-char *name, *cpu_name, *core_name, *intc_name;
+uint8_t satp_mode_max;
+char *name, *cpu_name, *core_name, *intc_name, *sv_name;
 
 for (cpu = s->soc[socket].num_harts - 1; cpu >= 0; cpu--) {
 cpu_phandle = (*phandle)++;
@@ -236,14 +237,15 @@ static void create_fdt_socket_cpus(RISCVVirtState *s, int 
socket,
 cpu_name = g_strdup_printf("/cpus/cpu@%d",
 s->soc[socket].hartid_base + cpu);
 qemu_fdt_add_subnode(mc->fdt, cpu_name);
-if (riscv_feature(&s->soc[socket].harts[cpu].env,
-  RISCV_FEATURE_MMU)) {
-qemu_fdt_setprop_string(mc->fdt, cpu_name, "mmu-type",
-(is_32_bit) ? "riscv,sv32" : "riscv,sv48");
-} else {
-qemu_fdt_setprop_string(mc->fdt, cpu_name, "mmu-type",
-"riscv,none");
-}
+
+satp_mode_max = satp_mode_max_from_map(
+s->soc[socket].harts[cpu].cfg.satp_mode.map,
+is_32_bit);
+sv_name = g_strdup_printf("riscv,%s",
+  satp_mode_str(satp_mode_max, is_32_bit));
+qemu_fdt_setprop_string(mc->fdt, cpu_name, "mmu-type", sv_name);
+g_free(sv_name);
+
 name = riscv_isa_string(&s->soc[socket].harts[cpu]);
 qemu_fdt_setprop_string(mc->fdt, cpu_name, "riscv,isa", name);
 g_free(name);
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index d14e95c9dc..639231ce2e 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -27,6 +27,7 @@
 #include "time_helper.h"
 #include "exec/exec-all.h"
 #include "qapi/error.h"
+#include "qapi/visitor.h"
 #include "qemu/error-report.h"
 #include "hw/qdev-properties.h"
 #include "migration/vmstate.h"
@@ -199,7 +200,7 @@ static const char * const riscv_intr_names[] = {
 "reserved"
 };
 
-static void register_cpu_props(DeviceState *dev);
+static void register_cpu_props(Object *obj);
 
 const char *riscv_cpu_get_trap_name(target_ulong cause, bool async)
 {
@@ -237,7 +238,7 @@ static void riscv_any_cpu_init(Object *obj)
 set_misa(env, MXL_RV64, RVI | RVM | RVA | RVF | RVD | RVC | RVU);
 #endif
 set_priv_version(env, PRIV_VERSION_1_12_0);
-register_cpu_props(DEVICE(obj));
+register_cpu_props(obj);
 }
 
 #if defined(TARGET_RISCV64)
@@ -246,7 +247,7 @@ static void rv64_base_cpu_init(Object *obj)
 CPURISCVState *env = &RISCV_CPU(obj)->env;
 /* We set this in the realise function */
 set_misa(env, MXL_RV64, 0);
-register_cpu_props(DEVICE(obj));
+register_cpu_props(obj);
 /* Set latest version of privileged specification */
 set_priv_version(env, PRIV_VERSION_1_12_0);
 }
@@ -279,7 +280,7 @@ static void rv128_base_cpu_init(Object *obj)
 CPURISCVStat

Re: [PATCH] include: Don't include qemu/osdep.h

2022-12-12 Thread Bin Meng

On Mon, Dec 12, 2022 at 3:05 PM Markus Armbruster  wrote:
>
> docs/devel/style.rst mandates:
>
> The "qemu/osdep.h" header contains preprocessor macros that affect
> the behavior of core system headers like .  It must be
> the first include so that core system headers included by external
> libraries get the preprocessor macros that QEMU depends on.
>
> Do not include "qemu/osdep.h" from header files since the .c file
> will have already included it.
>
> A few violations have crept in.  Fix them.
>
> Signed-off-by: Markus Armbruster 
> ---
>  bsd-user/qemu.h | 1 -
>  crypto/block-luks-priv.h| 1 -
>  include/hw/cxl/cxl_host.h   | 1 -
>  include/hw/input/pl050.h| 1 -
>  include/hw/tricore/triboard.h   | 1 -
>  include/qemu/userfaultfd.h  | 1 -
>  net/vmnet_int.h | 1 -
>  qga/cutils.h| 1 -
>  target/hexagon/hex_arch_types.h | 1 -
>  target/hexagon/mmvec/macros.h   | 1 -
>  target/riscv/pmu.h  | 1 -
>  qga/cutils.c| 3 ++-
>  12 files changed, 2 insertions(+), 12 deletions(-)
>

Reviewed-by: Bin Meng

Re: [PATCH 2/5] include/hw/cxl: Include hw/cxl/*.h where needed

2022-12-12 Thread Jonathan Cameron via

On Fri,  9 Dec 2022 14:47:59 +0100
Markus Armbruster  wrote:

> hw/cxl/cxl_component.h needs CDATObject from hw/cxl/cxl_cdat.h.
> 
> hw/cxl/cxl_device.h needs CXLComponentState from
> hw/cxl/cxl_component.h.
> 
> Signed-off-by: Markus Armbruster 

Acked-by: Jonathan Cameron 

Thanks,

> ---
>  include/hw/cxl/cxl_component.h | 1 +
>  include/hw/cxl/cxl_device.h| 1 +
>  2 files changed, 2 insertions(+)
> 
> diff --git a/include/hw/cxl/cxl_component.h b/include/hw/cxl/cxl_component.h
> index 34075cfb72..5dca21e95b 100644
> --- a/include/hw/cxl/cxl_component.h
> +++ b/include/hw/cxl/cxl_component.h
> @@ -18,6 +18,7 @@
>  #include "qemu/compiler.h"
>  #include "qemu/range.h"
>  #include "qemu/typedefs.h"
> +#include "hw/cxl/cxl_cdat.h"
>  #include "hw/register.h"
>  #include "qapi/error.h"
>  
> diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> index fd475b947b..3f91969db0 100644
> --- a/include/hw/cxl/cxl_device.h
> +++ b/include/hw/cxl/cxl_device.h
> @@ -10,6 +10,7 @@
>  #ifndef CXL_DEVICE_H
>  #define CXL_DEVICE_H
>  
> +#include "hw/cxl/cxl_component.h"
>  #include "hw/pci/pci.h"
>  #include "hw/register.h"
>

Re: [PATCH 3/5] hw/acpi/Kconfig: Add missing dependencies to ACPI_ICH9

2022-12-12 Thread Philippe Mathieu-Daudé


On 8/12/22 00:12, Bernhard Beschow wrote:

ich9_lpc_realize() uses apm_init() and ich9_smbus_realize() uses
pm_smbus_init(), so both APM and ACPI_SMBUS are provided by the device
models managed by ACPI_ICH9.

Signed-off-by: Bernhard Beschow 
---
  hw/acpi/Kconfig | 2 ++
  hw/i386/Kconfig | 1 -
  hw/isa/Kconfig  | 1 -
  3 files changed, 2 insertions(+), 2 deletions(-)


Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH v2 1/5] qemu-img.c: Move IO_BUF_SIZE to the top of the file

2022-12-12 Thread Hanna Reitz


On 28.11.22 15:15, Nir Soffer wrote:

This macro is used by various commands (compare, convert, rebase) but it
is defined somewhere in the middle of the file. I'm going to use it in
the new checksum command so lets clean up a bit before that.
---
  qemu-img.c | 3 +--
  1 file changed, 1 insertion(+), 2 deletions(-)


Looks good, but is missing your S-o-b – with it added:

Reviewed-by: Hanna Reitz

[Bug 1523246] Re: Virtio-blk does not support TRIM

2022-12-12 Thread Thomas Huth

$ git tag --contains 37b06f8d46fe602e630e4 | grep ^v | sort | head -n1
v4.0.0

How did you check the size of the file? It might appear bigger than it
is due to sparse blocks.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1523246

Title:
  Virtio-blk does not support TRIM

Status in QEMU:
  Fix Released

Bug description:
  When model=virtio is used, TRIM is not supported.

  # mount -o discard /dev/vda4 /mnt
  # mount | tail -1
  /dev/vda4 on /mnt type fuseblk 
(rw,nosuid,nodev,relatime,user_id=0,group_id=0,allow_other,blksize=4096)
  # fstrim /mnt/
  fstrim: /mnt/: the discard operation is not supported

  Booting without model=virtio allows using TRIM (in Windows as well).

  Full QEMU line:

  qemu-system-x86_64 -enable-kvm -cpu host -bios
  /usr/share/ovmf/ovmf_x64.bin -smp 2 -m 7G -vga qxl -usbdevice tablet
  -net nic,model=virtio -net user -drive discard=unmap,detect-
  zeroes=unmap,cache=none,file=vms/win10.hd.img.vmdk,format=vmdk,if=virtio

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1523246/+subscriptions

Re: [PATCH 4/5] hw/isa/Kconfig: Add missing dependency to VT82C686

2022-12-12 Thread Philippe Mathieu-Daudé


On 8/12/22 00:12, Bernhard Beschow wrote:

Both ACPI_PIIX4 (directly) and ACPI_ICH9 (indirectly) require ACPI to be
selected. Require it for VT82C686's ACPI controller too for consistency.

Signed-off-by: Bernhard Beschow 
---
  hw/isa/Kconfig | 1 +
  1 file changed, 1 insertion(+)

diff --git a/hw/isa/Kconfig b/hw/isa/Kconfig
index 0a6a04947c..bc2e3ecf02 100644
--- a/hw/isa/Kconfig
+++ b/hw/isa/Kconfig
@@ -63,6 +63,7 @@ config VT82C686
  select IDE_VIA
  select MC146818RTC
  select PARALLEL
+depends on ACPI


The VT82C686 *provides* the ACPI interface, so here we want to "select"
ACPI (if we need a VT82C686, then ACPI will be available).

Re: [PATCH 5/5] hw/ppc/Kconfig: Remove unused dependencies from PEGASOS2

2022-12-12 Thread Philippe Mathieu-Daudé


On 8/12/22 00:12, Bernhard Beschow wrote:

Removes the following dependencies from ppc-softmmu:
- CONFIG_ACPI_CPU_HOTPLUG
- CONFIG_ACPI_CXL
- CONFIG_ACPI_HMAT
- CONFIG_ACPI_MEMORY_HOTPLUG
- CONFIG_ACPI_NVDIMM
- CONFIG_ACPI_PCIHP
- CONFIG_ACPI_PIIX4
- CONFIG_ACPI_X86
- CONFIG_MEM_DEVICE

Signed-off-by: Bernhard Beschow 
---
  hw/ppc/Kconfig | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/ppc/Kconfig b/hw/ppc/Kconfig
index b8d2522f45..0ab77177a8 100644
--- a/hw/ppc/Kconfig
+++ b/hw/ppc/Kconfig
@@ -77,7 +77,7 @@ config PEGASOS2
  select SMBUS_EEPROM
  select VOF
  # This should come with VT82C686
-select ACPI_X86
+select ACPI


With the previous patch (fixed) you can remove both the comment and
the "select" line.

Re: [PATCH v2 2/5] Support format or cache specific out file

2022-12-12 Thread Hanna Reitz


On 28.11.22 15:15, Nir Soffer wrote:

Extend the test finder to find tests with format (*.out.qcow2) or cache
specific (*.out.nocache) out file. This worked before only for the
numbered tests.
---
  tests/qemu-iotests/findtests.py | 10 --
  1 file changed, 8 insertions(+), 2 deletions(-)


This patch lacks an S-o-b, too.


diff --git a/tests/qemu-iotests/findtests.py b/tests/qemu-iotests/findtests.py
index dd77b453b8..f4344ce78c 100644
--- a/tests/qemu-iotests/findtests.py
+++ b/tests/qemu-iotests/findtests.py
@@ -38,31 +38,37 @@ def chdir(path: Optional[str] = None) -> Iterator[None]:
  os.chdir(saved_dir)
  
  
  class TestFinder:

  def __init__(self, test_dir: Optional[str] = None) -> None:
  self.groups = defaultdict(set)
  
  with chdir(test_dir):

  self.all_tests = glob.glob('[0-9][0-9][0-9]')
  self.all_tests += [f for f in glob.iglob('tests/*')
-   if not f.endswith('.out') and
-   os.path.isfile(f + '.out')]
+   if self.is_test(f)]


So previously a file was only considered a test file if there was a 
corresponding reference output file (`f + '.out'`), so files without 
such a reference output aren’t considered test files...



  for t in self.all_tests:
  with open(t, encoding="utf-8") as f:
  for line in f:
  if line.startswith('# group: '):
  for g in line.split()[2:]:
  self.groups[g].add(t)
  break
  
+def is_test(self, fname: str) -> bool:

+"""
+The tests directory contains tests (no extension) and out files
+(*.out, *.out.{format}, *.out.{option}).
+"""
+return re.search(r'.+\.out(\.\w+)?$', fname) is None


...but this new function doesn’t check that.  I think we should check it 
(just whether there’s any variant of `/{fname}\.out(\.\w+)?/` to go with 
`fname`) so that behavior isn’t changed.


Hanna

Re: [PATCH 1/5] include/hw/pci: Clean up superfluous inclusion of pci/.h cxl/*.h

2022-12-12 Thread Jonathan Cameron via

On Fri,  9 Dec 2022 14:47:58 +0100
Markus Armbruster  wrote:

Hi Markus,

One comment on the CXL ones.  Others CXL related changes
all looks fine to me.

Thanks for cleaning these up.

Jonathan

> diff --git a/include/hw/cxl/cxl.h b/include/hw/cxl/cxl.h
> index 38e0e271d5..5129557bee 100644
> --- a/include/hw/cxl/cxl.h
> +++ b/include/hw/cxl/cxl.h
> @@ -13,7 +13,6 @@
>  
>  #include "qapi/qapi-types-machine.h"
>  #include "qapi/qapi-visit-machine.h"
> -#include "hw/pci/pci_bridge.h"

If we drop this, we probably want a forwards def of
struct PXBDev

I should probably be using the typedef in here as well rather
than struct PXBDev * in CXLFixed Window so we'd need
to deal with making that visible too.

>  #include "hw/pci/pci_host.h"
>  #include "cxl_pci.h"
>  #include "cxl_component.h"

>  #define CXL_VENDOR_ID 0x1e98

Re: [PATCH v2 4/5] iotests: Test qemu-img checksum

2022-12-12 Thread Hanna Reitz


On 28.11.22 15:15, Nir Soffer wrote:

Add simple tests computing a checksum for image with all kinds of
extents in raw and qcow2 formats.

The test can be extended later for other formats, format options (e..g
compressed qcow2), protocols (e.g. nbd), and image with a backing chain,
but I'm not sure this is really needed.

To help debugging in case of failures, the output includes a json map of
the test image.

Signed-off-by: Nir Soffer 
---
  tests/qemu-iotests/tests/qemu-img-checksum| 63 +++
  .../tests/qemu-img-checksum.out.qcow2 | 11 
  .../tests/qemu-img-checksum.out.raw   | 10 +++
  3 files changed, 84 insertions(+)
  create mode 100755 tests/qemu-iotests/tests/qemu-img-checksum
  create mode 100644 tests/qemu-iotests/tests/qemu-img-checksum.out.qcow2
  create mode 100644 tests/qemu-iotests/tests/qemu-img-checksum.out.raw


Thanks!

Reviewed-by: Hanna Reitz

Re: [PATCH v2 3/5] qemu-img: Add checksum command

2022-12-12 Thread Hanna Reitz


On 28.11.22 15:15, Nir Soffer wrote:

The checksum command compute a checksum for disk image content using the
blkhash library[1]. The blkhash library is not packaged yet, but it is
available via copr[2].

Example run:

 $ ./qemu-img checksum -p fedora-35.qcow2
 6e5c00c995056319d52395f8d91c7f84725ae3da69ffcba4de4c7d22cff713a5  
fedora-35.qcow2

The block checksum is constructed by splitting the image to fixed sized
blocks and computing a digest of every block. The image checksum is the
digest of the all block digests.

The checksum uses internally the "sha256" algorithm but it cannot be
compared with checksums created by other tools such as `sha256sum`.

The blkhash library supports sparse images, zero detection, and
optimizes zero block hashing (they are practically free). The library
uses multiple threads to speed up the computation.

Comparing to `sha256sum`, `qemu-img checksum` is 3.5-4800[3] times
faster, depending on the amount of data in the image:

 $ ./qemu-img info /scratch/50p.raw
 file format: raw
 virtual size: 6 GiB (6442450944 bytes)
 disk size: 2.91 GiB

 $ hyperfine -w2 -r5 -p "sleep 1" "./qemu-img checksum /scratch/50p.raw" \
  "sha256sum /scratch/50p.raw"
 Benchmark 1: ./qemu-img checksum /scratch/50p.raw
   Time (mean ± σ):  1.849 s ±  0.037 s[User: 7.764 s, System: 
0.962 s]
   Range (min … max):1.813 s …  1.908 s5 runs

 Benchmark 2: sha256sum /scratch/50p.raw
   Time (mean ± σ): 14.585 s ±  0.072 s[User: 13.537 s, System: 
1.003 s]
   Range (min … max):   14.501 s … 14.697 s5 runs

 Summary
   './qemu-img checksum /scratch/50p.raw' ran
 7.89 ± 0.16 times faster than 'sha256sum /scratch/50p.raw'

The new command is available only when `blkhash` is available during
build. To test the new command please install the `blkhash-devel`
package:

 $ dnf copr enable nsoffer/blkhash
 $ sudo dnf install blkhash-devel

[1] https://gitlab.com/nirs/blkhash
[2] https://copr.fedorainfracloud.org/coprs/nsoffer/blkhash/
[3] Computing checksum for 8T empty image: qemu-img checksum: 3.7s,
 sha256sum (estimate): 17,749s

Signed-off-by: Nir Soffer 
---
  docs/tools/qemu-img.rst |  24 ++
  meson.build |  10 ++-
  meson_options.txt   |   2 +
  qemu-img-cmds.hx|   8 ++
  qemu-img.c  | 183 
  5 files changed, 226 insertions(+), 1 deletion(-)


[...]


diff --git a/qemu-img.c b/qemu-img.c
index c03d6b4b31..4b4ca7add3 100644
--- a/qemu-img.c
+++ b/qemu-img.c


[...]


@@ -1613,20 +1617,199 @@ out:
  qemu_vfree(buf1);
  qemu_vfree(buf2);
  blk_unref(blk2);
  out2:
  blk_unref(blk1);
  out3:
  qemu_progress_end();
  return ret;
  }
  
+#ifdef CONFIG_BLKHASH

+/*
+ * Compute image checksum.
+ */
+static int img_checksum(int argc, char **argv)
+{
+const char *digest_name = "sha256";
+const size_t block_size = 64 * KiB;
+
+_Static_assert(QEMU_IS_ALIGNED(IO_BUF_SIZE, block_size),
+   "IO_BUF_SIZE should be alligned to block_size");


(s/alligned/aligned/)

A suggestion: We have a `QEMU_BUILD_BUG_MSG()` macro in 
include/qemu/compiler.h.  Nowadays it just unconditionally resolves to 
_Static_assert, I think before C11 was adopted it used a custom 
implementation.  Still, it is what seems to be used throughout the 
actual qemu code (disregarding roms/ and pc-bios/), so I think it would 
be more fitting to use.


But that’s just a suggestion.  It always resolves to _Static_assert 
anyway, so using _Static_assert seems by no means wrong.


So with the spelling fixed:

Reviewed-by: Hanna Reitz

Re: [PATCH v2 5/5] qemu-img: Speed up checksum

2022-12-12 Thread Hanna Reitz


On 28.11.22 15:15, Nir Soffer wrote:

Add coroutine based loop inspired by `qemu-img convert` design.

Changes compared to `qemu-img convert`:

- State for the entire image is kept in ImgChecksumState

- State for single worker coroutine is kept in ImgChecksumworker.

- "Writes" are always in-order, ensured using a queue.

- Calling block status once per image extent, when the current extent is
   consumed by the workers.

- Using 1m buffer size - testings shows that this gives best read
   performance both with buffered and direct I/O.

- Number of coroutines is not configurable. Testing does not show
   improvement when using more than 8 coroutines.

- Progress include entire image, not only the allocated state.

Comparing to the simple read loop shows that this version is up to 4.67
times faster when computing a checksum for an image full of zeroes. For
real images it is 1.59 times faster with direct I/O, and with buffered
I/O there is no difference.

Test results on Dell PowerEdge R640 in a CentOS Stream 9 container:

| image| size | i/o   | before | after  | change |
|--|--|---||||
| zero [1] |   6g | buffered  | 1.600s ±0.014s | 0.342s ±0.016s |  x4.67 |
| zero |   6g | direct| 4.684s ±0.093s | 2.211s ±0.009s |  x2.12 |
| real [2] |   6g | buffered  | 1.841s ±0.075s | 1.806s ±0.036s |  x1.02 |
| real |   6g | direct| 3.094s ±0.079s | 1.947s ±0.017s |  x1.59 |
| nbd  [3] |   6g | buffered  | 2.455s ±0.183s | 1.808s ±0.016s |  x1.36 |
| nbd  |   6g | direct| 3.540s ±0.020s | 1.749s ±0.018s |  x2.02 |

[1] raw image full of zeroes
[2] raw fedora 35 image with additional random data, 50% full
[3] image [2] exported by qemu-nbd via unix socket

Signed-off-by: Nir Soffer 
---
  qemu-img.c | 350 ++---
  1 file changed, 277 insertions(+), 73 deletions(-)


Reviewed-by: Hanna Reitz

[PATCH-for-8.0] hw/acpi: Rename tco.c -> ich9_tco.c

2022-12-12 Thread Philippe Mathieu-Daudé

tco.c contains the ICH9 implementation of its "total cost
of ownership". Rename it accordingly to emphasis this is
a part of the ICH9 model.

Signed-off-by: Philippe Mathieu-Daudé 
---
Based-on: <20221207231205.1106381-1-shen...@gmail.com>
  "Clean up dependencies of ACPI controllers"
---
 MAINTAINERS   | 4 ++--
 hw/acpi/ich9.c| 2 +-
 hw/acpi/{tco.c => ich9_tco.c} | 2 +-
 hw/acpi/meson.build   | 2 +-
 include/hw/acpi/ich9.h| 2 +-
 include/hw/acpi/{tco.h => ich9_tco.h} | 2 +-
 tests/qtest/tco-test.c| 2 +-
 7 files changed, 8 insertions(+), 8 deletions(-)
 rename hw/acpi/{tco.c => ich9_tco.c} (99%)
 rename include/hw/acpi/{tco.h => ich9_tco.h} (97%)

diff --git a/MAINTAINERS b/MAINTAINERS
index 6966490c94..35bde4a97e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1644,8 +1644,8 @@ F: hw/isa/piix3.c
 F: hw/isa/lpc_ich9.c
 F: hw/i2c/smbus_ich9.c
 F: hw/acpi/piix4.c
-F: hw/acpi/ich9.c
-F: include/hw/acpi/ich9.h
+F: hw/acpi/ich9*.c
+F: include/hw/acpi/ich9*.h
 F: include/hw/southbridge/piix.h
 F: hw/misc/sga.c
 F: hw/isa/apm.c
diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c
index bd9bbade70..b10afa372a 100644
--- a/hw/acpi/ich9.c
+++ b/hw/acpi/ich9.c
@@ -34,7 +34,7 @@
 #include "sysemu/reset.h"
 #include "sysemu/runstate.h"
 #include "hw/acpi/acpi.h"
-#include "hw/acpi/tco.h"
+#include "hw/acpi/ich9_tco.h"
 
 #include "hw/i386/ich9.h"
 #include "hw/mem/pc-dimm.h"
diff --git a/hw/acpi/tco.c b/hw/acpi/ich9_tco.c
similarity index 99%
rename from hw/acpi/tco.c
rename to hw/acpi/ich9_tco.c
index 4783721e4e..b68348707b 100644
--- a/hw/acpi/tco.c
+++ b/hw/acpi/ich9_tco.c
@@ -12,7 +12,7 @@
 #include "hw/i386/ich9.h"
 #include "migration/vmstate.h"
 
-#include "hw/acpi/tco.h"
+#include "hw/acpi/ich9_tco.h"
 #include "trace.h"
 
 enum {
diff --git a/hw/acpi/meson.build b/hw/acpi/meson.build
index cfae2f58f6..30054a8cdc 100644
--- a/hw/acpi/meson.build
+++ b/hw/acpi/meson.build
@@ -22,7 +22,7 @@ acpi_ss.add(when: 'CONFIG_ACPI_PIIX4', if_true: 
files('piix4.c'))
 acpi_ss.add(when: 'CONFIG_ACPI_PCIHP', if_true: files('pcihp.c'))
 acpi_ss.add(when: 'CONFIG_ACPI_PCIHP', if_false: 
files('acpi-pci-hotplug-stub.c'))
 acpi_ss.add(when: 'CONFIG_ACPI_VIOT', if_true: files('viot.c'))
-acpi_ss.add(when: 'CONFIG_ACPI_ICH9', if_true: files('ich9.c', 'tco.c'))
+acpi_ss.add(when: 'CONFIG_ACPI_ICH9', if_true: files('ich9.c', 'ich9_tco.c'))
 acpi_ss.add(when: 'CONFIG_ACPI_ERST', if_true: files('erst.c'))
 acpi_ss.add(when: 'CONFIG_IPMI', if_true: files('ipmi.c'), if_false: 
files('ipmi-stub.c'))
 acpi_ss.add(when: 'CONFIG_PC', if_false: files('acpi-x86-stub.c'))
diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h
index 7ca92843c6..d41866a229 100644
--- a/include/hw/acpi/ich9.h
+++ b/include/hw/acpi/ich9.h
@@ -27,7 +27,7 @@
 #include "hw/acpi/pcihp.h"
 #include "hw/acpi/memory_hotplug.h"
 #include "hw/acpi/acpi_dev_interface.h"
-#include "hw/acpi/tco.h"
+#include "hw/acpi/ich9_tco.h"
 
 #define ACPI_PCIHP_ADDR_ICH9 0x0cc0
 
diff --git a/include/hw/acpi/tco.h b/include/hw/acpi/ich9_tco.h
similarity index 97%
rename from include/hw/acpi/tco.h
rename to include/hw/acpi/ich9_tco.h
index a1e0da8213..c4393caee0 100644
--- a/include/hw/acpi/tco.h
+++ b/include/hw/acpi/ich9_tco.h
@@ -1,5 +1,5 @@
 /*
- * QEMU ICH9 TCO emulation
+ * QEMU ICH9 TCO emulation (total cost of ownership)
  *
  * Copyright (c) 2015 Paulo Alcantara 
  *
diff --git a/tests/qtest/tco-test.c b/tests/qtest/tco-test.c
index 254f735370..d7b61ccfa5 100644
--- a/tests/qtest/tco-test.c
+++ b/tests/qtest/tco-test.c
@@ -16,7 +16,7 @@
 #include "hw/pci/pci_regs.h"
 #include "hw/i386/ich9.h"
 #include "hw/acpi/ich9.h"
-#include "hw/acpi/tco.h"
+#include "hw/acpi/ich9_tco.h"
 
 #define RCBA_BASE_ADDR0xfed1c000
 #define PM_IO_BASE_ADDR   0xb000
-- 
2.38.1

Re: [PATCH v4 4/4] scripts: add script to compare compatible properties

2022-12-12 Thread Dr. David Alan Gilbert

* Maksim Davydov (davydov-...@yandex-team.ru) wrote:
> This script run QEMU to obtain compat_props of machines and default
> values of different types and produce appropriate table. This table
> can be used to compare machine types to choose the most suitable
> machine. Also this table in json or csv format should be used to check that
> new machine doesn't affect previous ones by comparing tables with and
> without new machine.
> Default values of properties are needed to fill "holes" in the table (one
> machine has these properties and another not. For instance, 2.12 mt has
> `{ "EPYC-" TYPE_X86_CPU, "xlevel", "0x800a" }`, but compat_pros of
> 3.1 mt doesn't have it. So, to compare these machines we need to fill
> unknown value of "EPYC-x86_64-cpu-xlevel" for 3.1 mt. This unknown value
> in the table I called "hole". To get values (default values) for these
> "holes" the script uses list of appropriate methods.)
> 
> Notes:
> * some init values from the devices can't be available like properties
>   from virtio-9p when configure has --disable-virtfs. This situations will
>   be seen in the table as "unavailable driver".
> * Default values can be obtained in an unobvious way, like x86 features.
>   If the script doesn't know how to get property default value to compare
>   one machine with another it fills "holes" with "unavailable method". This
>   is done because script uses whitelist model to get default values of
>   different types. It means that the method that can't be applied to a new
>   type that can crash this script. It is better to get an "unavailable
>   driver" when creating a new machine with new compatible properties than
>   to break this script. So it turns out a more stable and generic script.
> * If the default value can't be obtained because this property doesn't
>   exist or because this property can't have default value, appropriate
>   "hole" will be filled by "unknown property" or "no default value"
> * If the property is applied to the abstract class, the script collects
>   default values from all child classes (set of default values)

Nice;  just a suggestion - have you considered adding a flag to specify
the qemu binaries separately, so that you can compare the machine type
definitions of two qemu binaries for the same machine type?

Dave

> Example: ./scripts/compare_mt.py --mt pc-q35-3.1 pc-q35-4.0
> 
> ╒═══╤══╤╕
> │   │  pc-q35-3.1  │  
>pc-q35-4.0 │
> ╞═══╪══╪╡
> │ Cascadelake-Server-x86_64-cpu:mpx │ True │  
>  False│
> ├───┼──┼┤
> │ Cascadelake-Server-x86_64-cpu:stepping│  5   │  
>6  │
> ├───┼──┼┤
> │ Icelake-Client-x86_64-cpu:mpx │ True │ 
> unavailable driver │
> ├───┼──┼┤
> │ Icelake-Server-x86_64-cpu:mpx │ True │  
>  False│
> ├───┼──┼┤
> │ Opteron_G3-x86_64-cpu:rdtscp  │False │  
>   True│
> ├───┼──┼┤
> │ Opteron_G4-x86_64-cpu:rdtscp  │False │  
>   True│
> ├───┼──┼┤
> │ Opteron_G5-x86_64-cpu:rdtscp  │False │  
>   True│
> ├───┼──┼┤
> │ Skylake-Client-IBRS-x86_64-cpu:mpx│ True │  
>  False│
> ├───┼──┼┤
> │ Skylake-Client-x86_64-cpu:mpx │ True │  
>  False│
> ├───┼──┼┤
> │ Skylake-Server-IBRS-x86_64-cpu:mpx│ True │  
>  False│
> ├───┼──┼┤
> │ Skylake-Server-x86_64-cpu:mpx │ True │  
>  False│
> ├───┼──┼┤
> │ intel-iommu:dma-drain │False │  
>

Re: [PATCH 6/5] include/hw/cxl: Break inclusion loop

2022-12-12 Thread Jonathan Cameron via

On Sat, 10 Dec 2022 08:09:06 +0100
Markus Armbruster  wrote:

> Markus Armbruster  writes:
> 
> > hw/cxl/cxl_pci.h and hw/cxl/cxl_cdat.h include each other.  Neither
> > header actually needs the other one.  Drop both #include directives.
> >
> > Signed-off-by: Markus Armbruster 
> > ---
> >  include/hw/cxl/cxl_cdat.h | 1 -
> >  include/hw/cxl/cxl_pci.h  | 1 -
> >  2 files changed, 2 deletions(-)
> >
> > diff --git a/include/hw/cxl/cxl_cdat.h b/include/hw/cxl/cxl_cdat.h
> > index 7f67638685..e3fd737f9d 100644
> > --- a/include/hw/cxl/cxl_cdat.h
> > +++ b/include/hw/cxl/cxl_cdat.h
> > @@ -10,7 +10,6 @@
> >  #ifndef CXL_CDAT_H
> >  #define CXL_CDAT_H
> >  
> > -#include "hw/cxl/cxl_pci.h"
> >  #include "hw/pci/pcie_doe.h"

The include was to get to CXL_VENDOR_ID which is in hw/cxl/cxl_pci.h
Can move that elsewhere perhaps, though I don't think we need to
if we break the loop by dropping the other one.


> >  
> >  /*
> > diff --git a/include/hw/cxl/cxl_pci.h b/include/hw/cxl/cxl_pci.h
> > index aca14845ab..01e15ed5b4 100644
> > --- a/include/hw/cxl/cxl_pci.h
> > +++ b/include/hw/cxl/cxl_pci.h
> > @@ -11,7 +11,6 @@
> >  #define CXL_PCI_H
> >  
> >  #include "qemu/compiler.h"
> > -#include "hw/cxl/cxl_cdat.h"
Guess that's a left over of some earlier refactoring. Good to get rid
of this one.

> >  
> >  #define CXL_VENDOR_ID 0x1e98  
> 
> Friday afternoon post with insufficient testing...  Everything still
> builds fine, but cxl_component.h is no longer self-contained.  I'll
> squash in the appended patch and revise the commit message.

By staring at the code rather than any automation I'm failing to spot
what it needs from cxl_pci.h.  Can you add that info to the commit message?

> 
> 
> diff --git a/include/hw/cxl/cxl_component.h b/include/hw/cxl/cxl_component.h
> index 5dca21e95b..78f83ed742 100644
> --- a/include/hw/cxl/cxl_component.h
> +++ b/include/hw/cxl/cxl_component.h
> @@ -19,6 +19,7 @@
>  #include "qemu/range.h"
>  #include "qemu/typedefs.h"
>  #include "hw/cxl/cxl_cdat.h"
> +#include "hw/cxl/cxl_pci.h"
>  #include "hw/register.h"
>  #include "qapi/error.h"
>  
>

Re: [PATCH v1 06/24] vfio-user: Define type vfio_user_pci_dev_info

2022-12-12 Thread John Levon

On Mon, Dec 12, 2022 at 10:01:33AM +0100, Cédric Le Goater wrote:

> > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> > index 80b03a2..dc19869 100644
> > --- a/hw/vfio/pci.c
> > +++ b/hw/vfio/pci.c
> > @@ -19,6 +19,7 @@
> >*/
> >   #include "qemu/osdep.h"
> > +#include CONFIG_DEVICES
> >   #include 
> >   #include 
> > @@ -3421,3 +3422,91 @@ static void register_vfio_pci_dev_type(void)
> >   }
> >   type_init(register_vfio_pci_dev_type)
> > +
> > +
> > +#ifdef CONFIG_VFIO_USER_PCI
> 
> Why not introduce a new file hw/vfio/user.c file ? It would be
> cleaner.

user.c is in this series, and holds the vfio-user implementation - it's not a
PCI specific thing. So it would have to be hw/vfio/user_pci.c or something

regards
john

Re: [PATCH v2 2/5] target/riscv: Update VS timer whenever htimedelta changes

2022-12-12 Thread Anup Patel

On Mon, Dec 12, 2022 at 11:23 AM Alistair Francis  wrote:
>
> On Thu, Dec 8, 2022 at 6:41 PM Anup Patel  wrote:
> >
> > On Thu, Dec 8, 2022 at 9:00 AM Alistair Francis  
> > wrote:
> > >
> > > On Tue, Nov 8, 2022 at 11:07 PM Anup Patel  
> > > wrote:
> > > >
> > > > The htimedelta[h] CSR has impact on the VS timer comparison so we
> > > > should call riscv_timer_write_timecmp() whenever htimedelta changes.
> > > >
> > > > Fixes: 3ec0fe18a31f ("target/riscv: Add vstimecmp suppor")
> > > > Signed-off-by: Anup Patel 
> > > > Reviewed-by: Alistair Francis 
> > >
> > > This patch breaks my Xvisor test. When running OpenSBI and Xvisor like 
> > > this:
> > >
> > > qemu-system-riscv64 -machine virt \
> > > -m 1G -serial mon:stdio -serial null -nographic \
> > > -append 'vmm.console=uart@1000 vmm.bootcmd="vfs mount initrd
> > > /;vfs run /boot.xscript;vfs cat /system/banner.txt; guest kick guest0;
> > > vserial bind guest0/uart0"' \
> > > -smp 4 -d guest_errors \
> > > -bios none \
> > > -device loader,file=./images/qemuriscv64/vmm.bin,addr=0x8020 \
> > > -kernel ./images/qemuriscv64/fw_jump.elf \
> > > -initrd ./images/qemuriscv64/vmm-disk-linux.img -cpu rv64,h=true
> > >
> > > Running:
> > >
> > > Xvisor v0.3.0-129-gbc33f339 (Jan  1 1970 00:00:00)
> > >
> > > I see this failure:
> > >
> > > INIT: bootcmd:  guest kick guest0
> > >
> > > guest0: Kicked
> > >
> > > INIT: bootcmd:  vserial bind guest0/uart0
> > >
> > > [guest0/uart0] cpu_vcpu_stage2_map: guest_phys=0x3B9AC000
> > > size=0x4096 map failed
> > >
> > > do_error: CPU3: VCPU=guest0/vcpu0 page fault failed (error -1)
> > >
> > >zero=0x  ra=0x80001B4E
> > >
> > >  sp=0x8001CF80  gp=0x
> > >
> > >  tp=0x  s0=0x8001CFB0
> > >
> > >  s1=0x  a0=0x10001048
> > >
> > >  a1=0x  a2=0x00989680
> > >
> > >  a3=0x3B9ACA00  a4=0x0048
> > >
> > >  a5=0x  a6=0x00019000
> > >
> > >  a7=0x  s2=0x
> > >
> > >  s3=0x  s4=0x
> > >
> > >  s5=0x  s6=0x
> > >
> > >  s7=0x  s8=0x
> > >
> > >  s9=0x s10=0x
> > >
> > > s11=0x  t0=0x4000
> > >
> > >  t1=0x0100  t2=0x
> > >
> > >  t3=0x  t4=0x
> > >
> > >  t5=0x  t6=0x
> > >
> > >sepc=0x80001918 sstatus=0x00024120
> > >
> > > hstatus=0x0002002001C0 sp_exec=0x10A64000
> > >
> > >  scause=0x0017   stval=0x3B9ACAF8
> > >
> > >   htval=0x0EE6B2BE  htinst=0x00D03021
> > >
> > > I have tried updating to a newer Xvisor release, but with that I don't
> > > get any serial output.
> > >
> > > Can you help get the Xvisor tests back up and running?
> >
> > I tried the latest Xvisor-next (https://github.com/avpatel/xvisor-next)
> > with your QEMU riscv-to-apply.next branch and it works fine (both
> > with and without Sstc).
>
> Does it work with the latest release?

Yes, the latest Xvisor-next repo works for QEMU v7.2.0-rc4 and
your riscv-to-apply.next branch (commit 51bb9de2d188)

Regards,
Anup

>
> Alistair
>
> >
> > Here's the QEMU command which I use:
> >
> > qemu-system-riscv64 -M virt -m 512M -nographic \
> > -bios opensbi/build/platform/generic/firmware/fw_jump.bin \
> > -kernel ../xvisor-next/build/vmm.bin \
> > -initrd rbd_v64.img \
> > -append "vmm.bootcmd=\"vfs mount initrd /;vfs run /boot.xscript;vfs
> > cat /system/banner.txt\"" \
> > -smp 4
> >
> > Also, I will be releasing Xvisor-0.3.2 by the end of Dec 2022 so I
> > suggest using this upcoming release in your test.
> >
> > Regards,
> > Anup

[PATCH v3 2/4] hw/nvme: rename shadow doorbell related trace events

2022-12-12 Thread Klaus Jensen

From: Klaus Jensen 

Rename the trace events related to writing the event index and reading
the doorbell value to make it more clear that the event is associated
with an actual update (write or read respectively).

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c   | 11 +++
 hw/nvme/trace-events |  8 
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 6b70c1e39831..cfab21b3436e 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -1337,8 +1337,9 @@ static inline void nvme_blk_write(BlockBackend *blk, 
int64_t offset,
 static void nvme_update_cq_head(NvmeCQueue *cq)
 {
 pci_dma_read(PCI_DEVICE(cq->ctrl), cq->db_addr, &cq->head,
-sizeof(cq->head));
-trace_pci_nvme_shadow_doorbell_cq(cq->cqid, cq->head);
+ sizeof(cq->head));
+
+trace_pci_nvme_update_cq_head(cq->cqid, cq->head);
 }
 
 static void nvme_post_cqes(void *opaque)
@@ -6147,16 +6148,18 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest 
*req)
 
 static void nvme_update_sq_eventidx(const NvmeSQueue *sq)
 {
+trace_pci_nvme_update_sq_eventidx(sq->sqid, sq->tail);
+
 pci_dma_write(PCI_DEVICE(sq->ctrl), sq->ei_addr, &sq->tail,
   sizeof(sq->tail));
-trace_pci_nvme_eventidx_sq(sq->sqid, sq->tail);
 }
 
 static void nvme_update_sq_tail(NvmeSQueue *sq)
 {
 pci_dma_read(PCI_DEVICE(sq->ctrl), sq->db_addr, &sq->tail,
  sizeof(sq->tail));
-trace_pci_nvme_shadow_doorbell_sq(sq->sqid, sq->tail);
+
+trace_pci_nvme_update_sq_tail(sq->sqid, sq->tail);
 }
 
 static void nvme_process_sq(void *opaque)
diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events
index fccb79f48973..b16f2260b4fd 100644
--- a/hw/nvme/trace-events
+++ b/hw/nvme/trace-events
@@ -84,8 +84,8 @@ pci_nvme_enqueue_event_noqueue(int queued) "queued %d"
 pci_nvme_enqueue_event_masked(uint8_t typ) "type 0x%"PRIx8""
 pci_nvme_no_outstanding_aers(void) "ignoring event; no outstanding AERs"
 pci_nvme_enqueue_req_completion(uint16_t cid, uint16_t cqid, uint32_t dw0, 
uint32_t dw1, uint16_t status) "cid %"PRIu16" cqid %"PRIu16" dw0 0x%"PRIx32" 
dw1 0x%"PRIx32" status 0x%"PRIx16""
-pci_nvme_eventidx_cq(uint16_t cqid, uint16_t new_eventidx) "cqid %"PRIu16" 
new_eventidx %"PRIu16""
-pci_nvme_eventidx_sq(uint16_t sqid, uint16_t new_eventidx) "sqid %"PRIu16" 
new_eventidx %"PRIu16""
+pci_nvme_update_cq_eventidx(uint16_t cqid, uint16_t new_eventidx) "cqid 
%"PRIu16" new_eventidx %"PRIu16""
+pci_nvme_update_sq_eventidx(uint16_t sqid, uint16_t new_eventidx) "sqid 
%"PRIu16" new_eventidx %"PRIu16""
 pci_nvme_mmio_read(uint64_t addr, unsigned size) "addr 0x%"PRIx64" size %d"
 pci_nvme_mmio_write(uint64_t addr, uint64_t data, unsigned size) "addr 
0x%"PRIx64" data 0x%"PRIx64" size %d"
 pci_nvme_mmio_doorbell_cq(uint16_t cqid, uint16_t new_head) "cqid %"PRIu16" 
new_head %"PRIu16""
@@ -102,8 +102,8 @@ pci_nvme_mmio_start_success(void) "setting controller 
enable bit succeeded"
 pci_nvme_mmio_stopped(void) "cleared controller enable bit"
 pci_nvme_mmio_shutdown_set(void) "shutdown bit set"
 pci_nvme_mmio_shutdown_cleared(void) "shutdown bit cleared"
-pci_nvme_shadow_doorbell_cq(uint16_t cqid, uint16_t new_shadow_doorbell) "cqid 
%"PRIu16" new_shadow_doorbell %"PRIu16""
-pci_nvme_shadow_doorbell_sq(uint16_t sqid, uint16_t new_shadow_doorbell) "sqid 
%"PRIu16" new_shadow_doorbell %"PRIu16""
+pci_nvme_update_cq_head(uint16_t cqid, uint16_t new_head) "cqid %"PRIu16" 
new_head %"PRIu16""
+pci_nvme_update_sq_tail(uint16_t sqid, uint16_t new_tail) "sqid %"PRIu16" 
new_tail %"PRIu16""
 pci_nvme_open_zone(uint64_t slba, uint32_t zone_idx, int all) "open zone, 
slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
 pci_nvme_close_zone(uint64_t slba, uint32_t zone_idx, int all) "close zone, 
slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
 pci_nvme_finish_zone(uint64_t slba, uint32_t zone_idx, int all) "finish zone, 
slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
-- 
2.38.1

[PATCH v3 4/4] hw/nvme: fix missing cq eventidx update

2022-12-12 Thread Klaus Jensen

From: Klaus Jensen 

Prior to reading the shadow doorbell cq head, we have to update the
eventidx. Otherwise, we risk that the driver will skip an mmio doorbell
write. This happens on riscv64, as reported by Guenter.

Adding the missing update to the cq eventidx fixes the issue.

Fixes: 3f7fe8de3d49 ("hw/nvme: Implement shadow doorbell buffer support")
Cc: qemu-sta...@nongnu.org
Cc: qemu-ri...@nongnu.org
Reported-by: Guenter Roeck 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c | 17 +++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 8af70f0216f0..3df29ea68b2f 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -1334,10 +1334,22 @@ static inline void nvme_blk_write(BlockBackend *blk, 
int64_t offset,
 }
 }
 
+static void nvme_update_cq_eventidx(const NvmeCQueue *cq)
+{
+uint32_t v = cpu_to_le32(cq->head);
+
+trace_pci_nvme_update_cq_eventidx(cq->cqid, cq->head);
+
+pci_dma_write(PCI_DEVICE(cq->ctrl), cq->ei_addr, &v, sizeof(v));
+}
+
 static void nvme_update_cq_head(NvmeCQueue *cq)
 {
-pci_dma_read(PCI_DEVICE(cq->ctrl), cq->db_addr, &cq->head,
- sizeof(cq->head));
+uint32_t v;
+
+pci_dma_read(PCI_DEVICE(cq->ctrl), cq->db_addr, &v, sizeof(v));
+
+cq->head = le32_to_cpu(v);
 
 trace_pci_nvme_update_cq_head(cq->cqid, cq->head);
 }
@@ -1355,6 +1367,7 @@ static void nvme_post_cqes(void *opaque)
 hwaddr addr;
 
 if (n->dbbuf_enabled) {
+nvme_update_cq_eventidx(cq);
 nvme_update_cq_head(cq);
 }
 
-- 
2.38.1

[PATCH v3 1/4] hw/nvme: use QOM accessors

2022-12-12 Thread Klaus Jensen

From: Klaus Jensen 

Replace various ->parent_obj use with the equivalent QOM accessors.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c | 89 +++---
 1 file changed, 48 insertions(+), 41 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index e54276dc1dc7..6b70c1e39831 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -449,7 +449,7 @@ static int nvme_addr_read(NvmeCtrl *n, hwaddr addr, void 
*buf, int size)
 return 0;
 }
 
-return pci_dma_read(&n->parent_obj, addr, buf, size);
+return pci_dma_read(PCI_DEVICE(n), addr, buf, size);
 }
 
 static int nvme_addr_write(NvmeCtrl *n, hwaddr addr, const void *buf, int size)
@@ -469,7 +469,7 @@ static int nvme_addr_write(NvmeCtrl *n, hwaddr addr, const 
void *buf, int size)
 return 0;
 }
 
-return pci_dma_write(&n->parent_obj, addr, buf, size);
+return pci_dma_write(PCI_DEVICE(n), addr, buf, size);
 }
 
 static bool nvme_nsid_valid(NvmeCtrl *n, uint32_t nsid)
@@ -514,24 +514,27 @@ static uint8_t nvme_sq_empty(NvmeSQueue *sq)
 
 static void nvme_irq_check(NvmeCtrl *n)
 {
+PCIDevice *pci = PCI_DEVICE(n);
 uint32_t intms = ldl_le_p(&n->bar.intms);
 
-if (msix_enabled(&(n->parent_obj))) {
+if (msix_enabled(pci)) {
 return;
 }
 if (~intms & n->irq_status) {
-pci_irq_assert(&n->parent_obj);
+pci_irq_assert(pci);
 } else {
-pci_irq_deassert(&n->parent_obj);
+pci_irq_deassert(pci);
 }
 }
 
 static void nvme_irq_assert(NvmeCtrl *n, NvmeCQueue *cq)
 {
+PCIDevice *pci = PCI_DEVICE(n);
+
 if (cq->irq_enabled) {
-if (msix_enabled(&(n->parent_obj))) {
+if (msix_enabled(pci)) {
 trace_pci_nvme_irq_msix(cq->vector);
-msix_notify(&(n->parent_obj), cq->vector);
+msix_notify(pci, cq->vector);
 } else {
 trace_pci_nvme_irq_pin();
 assert(cq->vector < 32);
@@ -546,7 +549,7 @@ static void nvme_irq_assert(NvmeCtrl *n, NvmeCQueue *cq)
 static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq)
 {
 if (cq->irq_enabled) {
-if (msix_enabled(&(n->parent_obj))) {
+if (msix_enabled(PCI_DEVICE(n))) {
 return;
 } else {
 assert(cq->vector < 32);
@@ -570,7 +573,7 @@ static void nvme_req_clear(NvmeRequest *req)
 static inline void nvme_sg_init(NvmeCtrl *n, NvmeSg *sg, bool dma)
 {
 if (dma) {
-pci_dma_sglist_init(&sg->qsg, &n->parent_obj, 0);
+pci_dma_sglist_init(&sg->qsg, PCI_DEVICE(n), 0);
 sg->flags = NVME_SG_DMA;
 } else {
 qemu_iovec_init(&sg->iov, 0);
@@ -1333,7 +1336,7 @@ static inline void nvme_blk_write(BlockBackend *blk, 
int64_t offset,
 
 static void nvme_update_cq_head(NvmeCQueue *cq)
 {
-pci_dma_read(&cq->ctrl->parent_obj, cq->db_addr, &cq->head,
+pci_dma_read(PCI_DEVICE(cq->ctrl), cq->db_addr, &cq->head,
 sizeof(cq->head));
 trace_pci_nvme_shadow_doorbell_cq(cq->cqid, cq->head);
 }
@@ -1363,7 +1366,7 @@ static void nvme_post_cqes(void *opaque)
 req->cqe.sq_id = cpu_to_le16(sq->sqid);
 req->cqe.sq_head = cpu_to_le16(sq->head);
 addr = cq->dma_addr + cq->tail * n->cqe_size;
-ret = pci_dma_write(&n->parent_obj, addr, (void *)&req->cqe,
+ret = pci_dma_write(PCI_DEVICE(n), addr, (void *)&req->cqe,
 sizeof(req->cqe));
 if (ret) {
 trace_pci_nvme_err_addr_write(addr);
@@ -4615,6 +4618,7 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest 
*req)
 
 static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n)
 {
+PCIDevice *pci = PCI_DEVICE(n);
 uint16_t offset = (cq->cqid << 3) + (1 << 2);
 
 n->cq[cq->cqid] = NULL;
@@ -4625,8 +4629,8 @@ static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n)
 event_notifier_set_handler(&cq->notifier, NULL);
 event_notifier_cleanup(&cq->notifier);
 }
-if (msix_enabled(&n->parent_obj)) {
-msix_vector_unuse(&n->parent_obj, cq->vector);
+if (msix_enabled(pci)) {
+msix_vector_unuse(pci, cq->vector);
 }
 if (cq->cqid) {
 g_free(cq);
@@ -4664,8 +4668,10 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, 
uint64_t dma_addr,
  uint16_t cqid, uint16_t vector, uint16_t size,
  uint16_t irq_enabled)
 {
-if (msix_enabled(&n->parent_obj)) {
-msix_vector_use(&n->parent_obj, vector);
+PCIDevice *pci = PCI_DEVICE(n);
+
+if (msix_enabled(pci)) {
+msix_vector_use(pci, vector);
 }
 cq->ctrl = n;
 cq->cqid = cqid;
@@ -4716,7 +4722,7 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest 
*req)
 trace_pci_nvme_err_invalid_create_cq_addr(prp1);
 return NVME_INVALID_PRP_OFFSET | NVME_DNR;
 }
-if (unlikely(!msix_enabled(&n->parent_obj) && vector)) {
+if (unlikely(!msix_enabled(PCI_DEVICE(n)) && vector

[PATCH v3 3/4] hw/nvme: fix missing endian conversions for doorbell buffers

2022-12-12 Thread Klaus Jensen

From: Klaus Jensen 

The eventidx and doorbell value are not handling endianness correctly.
Fix this.

Fixes: 3f7fe8de3d49 ("hw/nvme: Implement shadow doorbell buffer support")
Cc: qemu-sta...@nongnu.org
Reported-by: Guenter Roeck 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index cfab21b3436e..8af70f0216f0 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -6148,16 +6148,20 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest 
*req)
 
 static void nvme_update_sq_eventidx(const NvmeSQueue *sq)
 {
+uint32_t v = cpu_to_le32(sq->tail);
+
 trace_pci_nvme_update_sq_eventidx(sq->sqid, sq->tail);
 
-pci_dma_write(PCI_DEVICE(sq->ctrl), sq->ei_addr, &sq->tail,
-  sizeof(sq->tail));
+pci_dma_write(PCI_DEVICE(sq->ctrl), sq->ei_addr, &v, sizeof(v));
 }
 
 static void nvme_update_sq_tail(NvmeSQueue *sq)
 {
-pci_dma_read(PCI_DEVICE(sq->ctrl), sq->db_addr, &sq->tail,
- sizeof(sq->tail));
+uint32_t v;
+
+pci_dma_read(PCI_DEVICE(sq->ctrl), sq->db_addr, &v, sizeof(v));
+
+sq->tail = le32_to_cpu(v);
 
 trace_pci_nvme_update_sq_tail(sq->sqid, sq->tail);
 }
-- 
2.38.1

[PATCH v3 0/4] hw/nvme: fix broken shadow doorbells on some platforms

2022-12-12 Thread Klaus Jensen

From: Klaus Jensen 

Guenter reports that hw/nvme is broken on riscv64[1] and big endian
platforms[2].

This is a regression since 7.1, so this does not warrent an rc5 for 7.2.
I'm sure Guenter can carry this patch in his tree, and maybe we can get
this out in a stable release.

On riscv, the issue is a missing cq eventidx update. I really wonder why
this issue only shows up on riscv64. We have not observed this on other
platforms (yet).

Further, Guenter also reported problems on big-endian platforms. The
issue here is missing endian conversions which patch 3 addresses. This
also requires a fix for the Linux kernel that I am posting separately
(can't link to it, chicken and egg problem).

  [1]: https://lore.kernel.org/qemu-devel/20221207174918.ga1151...@roeck-us.net/
  [2]: https://lore.kernel.org/qemu-devel/20221209110022.ga3396...@roeck-us.net/

v3:
 - add patch to fix big-endian platforms

v2:
 - use QOM accessor (Philippe)
 - added some cleanup patches in front

Klaus Jensen (4):
  hw/nvme: use QOM accessors
  hw/nvme: rename shadow doorbell related trace events
  hw/nvme: fix missing endian conversions for doorbell buffers
  hw/nvme: fix missing cq eventidx update

 hw/nvme/ctrl.c   | 121 ++-
 hw/nvme/trace-events |   8 +--
 2 files changed, 78 insertions(+), 51 deletions(-)

-- 
2.38.1

Re: [PATCH 1/2] include/hw/virtio: Break inclusion loop

2022-12-12 Thread Stefano Garzarella


On Sat, Dec 10, 2022 at 02:39:14PM +0100, Markus Armbruster wrote:

hw/virtio/virtio.h and hw/virtio/vhost.h include each other.  The
former doesn't actually need the latter, so drop that inclusion to
break the loop.

Signed-off-by: Markus Armbruster 
---
include/hw/virtio/virtio.h | 1 -
hw/virtio/virtio.c | 1 +
2 files changed, 1 insertion(+), 1 deletion(-)


Reviewed-by: Stefano Garzarella 



diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index a973811cbf..8b68b69e00 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -22,7 +22,6 @@
#include "standard-headers/linux/virtio_config.h"
#include "standard-headers/linux/virtio_ring.h"
#include "qom/object.h"
-#include "hw/virtio/vhost.h"

/*
 * A guest should never accept this. It implies negotiation is broken
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index eb6347ab5d..faedf886da 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -26,6 +26,7 @@
#include "qemu/module.h"
#include "qom/object_interfaces.h"
#include "hw/virtio/virtio.h"
+#include "hw/virtio/vhost.h"
#include "migration/qemu-file-types.h"
#include "qemu/atomic.h"
#include "hw/virtio/virtio-bus.h"
--
2.37.3

Re: [PATCH v3 3/4] hw/nvme: fix missing endian conversions for doorbell buffers

2022-12-12 Thread Philippe Mathieu-Daudé


On 12/12/22 12:32, Klaus Jensen wrote:

From: Klaus Jensen 

The eventidx and doorbell value are not handling endianness correctly.
Fix this.

Fixes: 3f7fe8de3d49 ("hw/nvme: Implement shadow doorbell buffer support")
Cc: qemu-sta...@nongnu.org
Reported-by: Guenter Roeck 
Signed-off-by: Klaus Jensen 
---
  hw/nvme/ctrl.c | 12 
  1 file changed, 8 insertions(+), 4 deletions(-)


Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH] s390x/pci: reset ISM passthrough devices on shutdown and system reset

2022-12-12 Thread Thomas Huth


On 09/12/2022 20.57, Matthew Rosato wrote:

ISM device firmware stores unique state information that can
can cause a wholesale unmap of the associated IOMMU (e.g. when
we get a termination signal for QEMU) to trigger firmware errors
because firmware believes we are attempting to invalidate entries
that are still in-use by the guest OS (when in fact that guest is
in the process of being terminated or rebooted).
To alleviate this, register both a shutdown notifier (for unexpected
termination cases e.g. virsh destroy) as well as a reset callback
(for cases like guest OS reboot).  For each of these scenarios, trigger
PCI device reset; this is enough to indicate to firmware that the IOMMU
is no longer in-use by the guest OS, making it safe to invalidate any
associated IOMMU entries.

Fixes: 15d0e7942d3b ("s390x/pci: don't fence interpreted devices without MSI-X")
Signed-off-by: Matthew Rosato 
---
  hw/s390x/s390-pci-bus.c | 28 
  hw/s390x/s390-pci-vfio.c|  2 ++
  include/hw/s390x/s390-pci-bus.h |  5 +
  3 files changed, 35 insertions(+)

diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index 977e7daa15..02751f3597 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -24,6 +24,8 @@
  #include "hw/pci/msi.h"
  #include "qemu/error-report.h"
  #include "qemu/module.h"
+#include "sysemu/reset.h"
+#include "sysemu/runstate.h"
  
  #ifndef DEBUG_S390PCI_BUS

  #define DEBUG_S390PCI_BUS  0
@@ -150,10 +152,30 @@ out:
  psccb->header.response_code = cpu_to_be16(rc);
  }
  
+static void s390_pci_shutdown_notifier(Notifier *n, void *opaque)

+{
+S390PCIBusDevice *pbdev = container_of(n, S390PCIBusDevice,
+   shutdown_notifier);
+
+pci_device_reset(pbdev->pdev);
+}
+
+static void s390_pci_reset_cb(void *opaque)
+{
+S390PCIBusDevice *pbdev = opaque;
+
+pci_device_reset(pbdev->pdev);
+}
+
  static void s390_pci_perform_unplug(S390PCIBusDevice *pbdev)
  {
  HotplugHandler *hotplug_ctrl;
  
+if (pbdev->pft == ZPCI_PFT_ISM) {

+notifier_remove(&pbdev->shutdown_notifier);
+qemu_unregister_reset(s390_pci_reset_cb, pbdev);
+}
+
  /* Unplug the PCI device */
  if (pbdev->pdev) {
  DeviceState *pdev = DEVICE(pbdev->pdev);
@@ -,6 +1133,12 @@ static void s390_pcihost_plug(HotplugHandler 
*hotplug_dev, DeviceState *dev,
  pbdev->fh |= FH_SHM_VFIO;
  pbdev->forwarding_assist = false;
  }
+/* Register shutdown notifier and reset callback for ISM devices */
+if (pbdev->pft == ZPCI_PFT_ISM) {
+pbdev->shutdown_notifier.notify = s390_pci_shutdown_notifier;
+qemu_register_shutdown_notifier(&pbdev->shutdown_notifier);
+qemu_register_reset(s390_pci_reset_cb, pbdev);
+}
  } else {
  pbdev->fh |= FH_SHM_EMUL;
  /* Always intercept emulated devices */
diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c
index 5f0adb0b4a..419763f829 100644
--- a/hw/s390x/s390-pci-vfio.c
+++ b/hw/s390x/s390-pci-vfio.c
@@ -122,6 +122,8 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev,
  /* The following values remain 0 until we support other FMB formats */
  pbdev->zpci_fn.fmbl = 0;
  pbdev->zpci_fn.pft = 0;
+/* Store function type separately for type-specific behavior */
+pbdev->pft = cap->pft;
  }


Thanks, queued:

 https://gitlab.com/thuth/qemu/-/commits/s390x-next/

I had to adjust the hunk in s390_pci_read_base() due to a conflict with your 
earlier patch, please check whether it looks sane to you.


 Thomas

Re: [PATCH v1 04/24] vfio-user: add region cache

2022-12-12 Thread Philippe Mathieu-Daudé


On 9/11/22 00:13, John Johnson wrote:

cache VFIO_DEVICE_GET_REGION_INFO results to reduce
memory alloc/free cycles and as prep work for vfio-user

Signed-off-by: John G Johnson 
Signed-off-by: Elena Ufimtseva 
Signed-off-by: Jagannathan Raman 
---
  hw/vfio/ccw.c |  5 -
  hw/vfio/common.c  | 41 +++--
  hw/vfio/igd.c | 23 +--
  hw/vfio/migration.c   |  2 --
  hw/vfio/pci-quirks.c  | 19 +--
  hw/vfio/pci.c |  8 
  include/hw/vfio/vfio-common.h |  2 ++
  7 files changed, 51 insertions(+), 49 deletions(-)




  void vfio_put_base_device(VFIODevice *vbasedev)
  {
+if (vbasedev->regions != NULL) {
+int i;
+
+for (i = 0; i < vbasedev->num_regions; i++) {
+g_free(vbasedev->regions[i]);
+}
+g_free(vbasedev->regions);
+vbasedev->regions = NULL;
+}
+
  if (!vbasedev->group) {
  return;
  }
@@ -2432,6 +2451,17 @@ int vfio_get_region_info(VFIODevice *vbasedev, int index,
  {
  size_t argsz = sizeof(struct vfio_region_info);
  
+/* create region cache */

+if (vbasedev->regions == NULL) {
+vbasedev->regions = g_new0(struct vfio_region_info *,
+   vbasedev->num_regions);
+}
+/* check cache */
+if (vbasedev->regions[index] != NULL) {
+*info = vbasedev->regions[index];
+return 0;
+}


What about simply allocating/releasing once regions[] in
vfio_instance_init / vfio_instance_finalize?

Re: [PATCH v1 06/24] vfio-user: Define type vfio_user_pci_dev_info

2022-12-12 Thread Philippe Mathieu-Daudé


On 12/12/22 12:03, John Levon wrote:

On Mon, Dec 12, 2022 at 10:01:33AM +0100, Cédric Le Goater wrote:


diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 80b03a2..dc19869 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -19,6 +19,7 @@
*/
   #include "qemu/osdep.h"
+#include CONFIG_DEVICES
   #include 
   #include 
@@ -3421,3 +3422,91 @@ static void register_vfio_pci_dev_type(void)
   }
   type_init(register_vfio_pci_dev_type)
+
+
+#ifdef CONFIG_VFIO_USER_PCI


Why not introduce a new file hw/vfio/user.c file ? It would be
cleaner.


user.c is in this series, and holds the vfio-user implementation - it's not a
PCI specific thing. So it would have to be hw/vfio/user_pci.c or something


Or hw/vfio/pci-user.c

Re: [PATCH v3 4/4] hw/nvme: fix missing cq eventidx update

2022-12-12 Thread Philippe Mathieu-Daudé


On 12/12/22 12:32, Klaus Jensen wrote:

From: Klaus Jensen 

Prior to reading the shadow doorbell cq head, we have to update the
eventidx. Otherwise, we risk that the driver will skip an mmio doorbell
write. This happens on riscv64, as reported by Guenter.

Adding the missing update to the cq eventidx fixes the issue.

Fixes: 3f7fe8de3d49 ("hw/nvme: Implement shadow doorbell buffer support")
Cc: qemu-sta...@nongnu.org
Cc: qemu-ri...@nongnu.org
Reported-by: Guenter Roeck 
Signed-off-by: Klaus Jensen 
---
  hw/nvme/ctrl.c | 17 +++--
  1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 8af70f0216f0..3df29ea68b2f 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -1334,10 +1334,22 @@ static inline void nvme_blk_write(BlockBackend *blk, 
int64_t offset,
  }
  }




  static void nvme_update_cq_head(NvmeCQueue *cq)
  {
-pci_dma_read(PCI_DEVICE(cq->ctrl), cq->db_addr, &cq->head,
- sizeof(cq->head));
+uint32_t v;
+
+pci_dma_read(PCI_DEVICE(cq->ctrl), cq->db_addr, &v, sizeof(v));
+
+cq->head = le32_to_cpu(v);
  


Isn't this be part of the previous patch?


  trace_pci_nvme_update_cq_head(cq->cqid, cq->head);
  }

Re: [PATCH v3 4/4] hw/nvme: fix missing cq eventidx update

2022-12-12 Thread Klaus Jensen

On Dec 12 12:37, Philippe Mathieu-Daudé wrote:
> On 12/12/22 12:32, Klaus Jensen wrote:
> > From: Klaus Jensen 
> > 
> > Prior to reading the shadow doorbell cq head, we have to update the
> > eventidx. Otherwise, we risk that the driver will skip an mmio doorbell
> > write. This happens on riscv64, as reported by Guenter.
> > 
> > Adding the missing update to the cq eventidx fixes the issue.
> > 
> > Fixes: 3f7fe8de3d49 ("hw/nvme: Implement shadow doorbell buffer support")
> > Cc: qemu-sta...@nongnu.org
> > Cc: qemu-ri...@nongnu.org
> > Reported-by: Guenter Roeck 
> > Signed-off-by: Klaus Jensen 
> > ---
> >   hw/nvme/ctrl.c | 17 +++--
> >   1 file changed, 15 insertions(+), 2 deletions(-)
> > 
> > diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
> > index 8af70f0216f0..3df29ea68b2f 100644
> > --- a/hw/nvme/ctrl.c
> > +++ b/hw/nvme/ctrl.c
> > @@ -1334,10 +1334,22 @@ static inline void nvme_blk_write(BlockBackend 
> > *blk, int64_t offset,
> >   }
> >   }
> 
> 
> >   static void nvme_update_cq_head(NvmeCQueue *cq)
> >   {
> > -pci_dma_read(PCI_DEVICE(cq->ctrl), cq->db_addr, &cq->head,
> > - sizeof(cq->head));
> > +uint32_t v;
> > +
> > +pci_dma_read(PCI_DEVICE(cq->ctrl), cq->db_addr, &v, sizeof(v));
> > +
> > +cq->head = le32_to_cpu(v);
> 
> Isn't this be part of the previous patch?
> 

Argh, I screwed it up. I'll fix it.


signature.asc
Description: PGP signature

[PATCH v4 0/4] hw/nvme: fix broken shadow doorbells on some platforms

2022-12-12 Thread Klaus Jensen

From: Klaus Jensen 

Guenter reports that hw/nvme is broken on riscv64[1] and big endian
platforms[2].

This is a regression since 7.1, so this does not warrent an rc5 for 7.2.
I'm sure Guenter can carry this patch in his tree, and maybe we can get
this out in a stable release.

On riscv, the issue is a missing cq eventidx update. I really wonder why
this issue only shows up on riscv64. We have not observed this on other
platforms (yet).

Further, Guenter also reported problems on big-endian platforms. The
issue here is missing endian conversions which patch 3 addresses. This
also requires a fix for the Linux kernel that I am posting separately
(can't link to it, chicken and egg problem).

  [1]: https://lore.kernel.org/qemu-devel/20221207174918.ga1151...@roeck-us.net/
  [2]: https://lore.kernel.org/qemu-devel/20221209110022.ga3396...@roeck-us.net/

v4:
 - screwed up the rebase (Philippe)

v3:
 - add patch to fix big-endian platforms

v2:
 - use QOM accessor (Philippe)
 - added some cleanup patches in front

Klaus Jensen (4):
  hw/nvme: use QOM accessors
  hw/nvme: rename shadow doorbell related trace events
  hw/nvme: fix missing endian conversions for doorbell buffers
  hw/nvme: fix missing cq eventidx update

 hw/nvme/ctrl.c   | 121 ++-
 hw/nvme/trace-events |   8 +--
 2 files changed, 78 insertions(+), 51 deletions(-)

-- 
2.38.1

[PATCH v4 4/4] hw/nvme: fix missing cq eventidx update

2022-12-12 Thread Klaus Jensen

From: Klaus Jensen 

Prior to reading the shadow doorbell cq head, we have to update the
eventidx. Otherwise, we risk that the driver will skip an mmio doorbell
write. This happens on riscv64, as reported by Guenter.

Adding the missing update to the cq eventidx fixes the issue.

Fixes: 3f7fe8de3d49 ("hw/nvme: Implement shadow doorbell buffer support")
Cc: qemu-sta...@nongnu.org
Cc: qemu-ri...@nongnu.org
Reported-by: Guenter Roeck 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index bb505131f5f9..3df29ea68b2f 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -1334,6 +1334,15 @@ static inline void nvme_blk_write(BlockBackend *blk, 
int64_t offset,
 }
 }
 
+static void nvme_update_cq_eventidx(const NvmeCQueue *cq)
+{
+uint32_t v = cpu_to_le32(cq->head);
+
+trace_pci_nvme_update_cq_eventidx(cq->cqid, cq->head);
+
+pci_dma_write(PCI_DEVICE(cq->ctrl), cq->ei_addr, &v, sizeof(v));
+}
+
 static void nvme_update_cq_head(NvmeCQueue *cq)
 {
 uint32_t v;
@@ -1358,6 +1367,7 @@ static void nvme_post_cqes(void *opaque)
 hwaddr addr;
 
 if (n->dbbuf_enabled) {
+nvme_update_cq_eventidx(cq);
 nvme_update_cq_head(cq);
 }
 
-- 
2.38.1

[PATCH v4 3/4] hw/nvme: fix missing endian conversions for doorbell buffers

2022-12-12 Thread Klaus Jensen

From: Klaus Jensen 

The eventidx and doorbell value are not handling endianness correctly.
Fix this.

Fixes: 3f7fe8de3d49 ("hw/nvme: Implement shadow doorbell buffer support")
Cc: qemu-sta...@nongnu.org
Reported-by: Guenter Roeck 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c | 19 +--
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index cfab21b3436e..bb505131f5f9 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -1336,8 +1336,11 @@ static inline void nvme_blk_write(BlockBackend *blk, 
int64_t offset,
 
 static void nvme_update_cq_head(NvmeCQueue *cq)
 {
-pci_dma_read(PCI_DEVICE(cq->ctrl), cq->db_addr, &cq->head,
- sizeof(cq->head));
+uint32_t v;
+
+pci_dma_read(PCI_DEVICE(cq->ctrl), cq->db_addr, &v, sizeof(v));
+
+cq->head = le32_to_cpu(v);
 
 trace_pci_nvme_update_cq_head(cq->cqid, cq->head);
 }
@@ -6148,16 +6151,20 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest 
*req)
 
 static void nvme_update_sq_eventidx(const NvmeSQueue *sq)
 {
+uint32_t v = cpu_to_le32(sq->tail);
+
 trace_pci_nvme_update_sq_eventidx(sq->sqid, sq->tail);
 
-pci_dma_write(PCI_DEVICE(sq->ctrl), sq->ei_addr, &sq->tail,
-  sizeof(sq->tail));
+pci_dma_write(PCI_DEVICE(sq->ctrl), sq->ei_addr, &v, sizeof(v));
 }
 
 static void nvme_update_sq_tail(NvmeSQueue *sq)
 {
-pci_dma_read(PCI_DEVICE(sq->ctrl), sq->db_addr, &sq->tail,
- sizeof(sq->tail));
+uint32_t v;
+
+pci_dma_read(PCI_DEVICE(sq->ctrl), sq->db_addr, &v, sizeof(v));
+
+sq->tail = le32_to_cpu(v);
 
 trace_pci_nvme_update_sq_tail(sq->sqid, sq->tail);
 }
-- 
2.38.1

Re: [PATCH] tests/qtest/libqos/e1000e: Remove "other" interrupts

2022-12-12 Thread Thomas Huth


On 10/11/2022 12.40, Akihiko Odaki wrote:

The "other" kind of interrupts are not used in the tests.

Signed-off-by: Akihiko Odaki 
---
  tests/qtest/libqos/e1000e.h | 1 -
  tests/qtest/libqos/e1000e.c | 1 -
  2 files changed, 2 deletions(-)

diff --git a/tests/qtest/libqos/e1000e.h b/tests/qtest/libqos/e1000e.h
index a22f5fdbad..3bf285af42 100644
--- a/tests/qtest/libqos/e1000e.h
+++ b/tests/qtest/libqos/e1000e.h
@@ -24,7 +24,6 @@
  
  #define E1000E_RX0_MSG_ID   (0)

  #define E1000E_TX0_MSG_ID   (1)
-#define E1000E_OTHER_MSG_ID (2)
  
  #define E1000E_TDLEN(0x3808)

  #define E1000E_TDT  (0x3818)
diff --git a/tests/qtest/libqos/e1000e.c b/tests/qtest/libqos/e1000e.c
index 80b3e3db90..3b51bafcb7 100644
--- a/tests/qtest/libqos/e1000e.c
+++ b/tests/qtest/libqos/e1000e.c
@@ -32,7 +32,6 @@
  #define E1000E_IVAR_TEST_CFG \
  (((E1000E_RX0_MSG_ID | E1000_IVAR_INT_ALLOC_VALID) << 
E1000_IVAR_RXQ0_SHIFT) | \
   ((E1000E_TX0_MSG_ID | E1000_IVAR_INT_ALLOC_VALID) << 
E1000_IVAR_TXQ0_SHIFT) | \
- ((E1000E_OTHER_MSG_ID | E1000_IVAR_INT_ALLOC_VALID) << 
E1000_IVAR_OTHER_SHIFT) | \
   E1000_IVAR_TX_INT_EVERY_WB)


Thanks, queued to my testing-next branch now:

 https://gitlab.com/thuth/qemu/-/commits/testing-next/

 Thomas

[PATCH v4 1/4] hw/nvme: use QOM accessors

2022-12-12 Thread Klaus Jensen

From: Klaus Jensen 

Replace various ->parent_obj use with the equivalent QOM accessors.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c | 89 +++---
 1 file changed, 48 insertions(+), 41 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index e54276dc1dc7..6b70c1e39831 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -449,7 +449,7 @@ static int nvme_addr_read(NvmeCtrl *n, hwaddr addr, void 
*buf, int size)
 return 0;
 }
 
-return pci_dma_read(&n->parent_obj, addr, buf, size);
+return pci_dma_read(PCI_DEVICE(n), addr, buf, size);
 }
 
 static int nvme_addr_write(NvmeCtrl *n, hwaddr addr, const void *buf, int size)
@@ -469,7 +469,7 @@ static int nvme_addr_write(NvmeCtrl *n, hwaddr addr, const 
void *buf, int size)
 return 0;
 }
 
-return pci_dma_write(&n->parent_obj, addr, buf, size);
+return pci_dma_write(PCI_DEVICE(n), addr, buf, size);
 }
 
 static bool nvme_nsid_valid(NvmeCtrl *n, uint32_t nsid)
@@ -514,24 +514,27 @@ static uint8_t nvme_sq_empty(NvmeSQueue *sq)
 
 static void nvme_irq_check(NvmeCtrl *n)
 {
+PCIDevice *pci = PCI_DEVICE(n);
 uint32_t intms = ldl_le_p(&n->bar.intms);
 
-if (msix_enabled(&(n->parent_obj))) {
+if (msix_enabled(pci)) {
 return;
 }
 if (~intms & n->irq_status) {
-pci_irq_assert(&n->parent_obj);
+pci_irq_assert(pci);
 } else {
-pci_irq_deassert(&n->parent_obj);
+pci_irq_deassert(pci);
 }
 }
 
 static void nvme_irq_assert(NvmeCtrl *n, NvmeCQueue *cq)
 {
+PCIDevice *pci = PCI_DEVICE(n);
+
 if (cq->irq_enabled) {
-if (msix_enabled(&(n->parent_obj))) {
+if (msix_enabled(pci)) {
 trace_pci_nvme_irq_msix(cq->vector);
-msix_notify(&(n->parent_obj), cq->vector);
+msix_notify(pci, cq->vector);
 } else {
 trace_pci_nvme_irq_pin();
 assert(cq->vector < 32);
@@ -546,7 +549,7 @@ static void nvme_irq_assert(NvmeCtrl *n, NvmeCQueue *cq)
 static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq)
 {
 if (cq->irq_enabled) {
-if (msix_enabled(&(n->parent_obj))) {
+if (msix_enabled(PCI_DEVICE(n))) {
 return;
 } else {
 assert(cq->vector < 32);
@@ -570,7 +573,7 @@ static void nvme_req_clear(NvmeRequest *req)
 static inline void nvme_sg_init(NvmeCtrl *n, NvmeSg *sg, bool dma)
 {
 if (dma) {
-pci_dma_sglist_init(&sg->qsg, &n->parent_obj, 0);
+pci_dma_sglist_init(&sg->qsg, PCI_DEVICE(n), 0);
 sg->flags = NVME_SG_DMA;
 } else {
 qemu_iovec_init(&sg->iov, 0);
@@ -1333,7 +1336,7 @@ static inline void nvme_blk_write(BlockBackend *blk, 
int64_t offset,
 
 static void nvme_update_cq_head(NvmeCQueue *cq)
 {
-pci_dma_read(&cq->ctrl->parent_obj, cq->db_addr, &cq->head,
+pci_dma_read(PCI_DEVICE(cq->ctrl), cq->db_addr, &cq->head,
 sizeof(cq->head));
 trace_pci_nvme_shadow_doorbell_cq(cq->cqid, cq->head);
 }
@@ -1363,7 +1366,7 @@ static void nvme_post_cqes(void *opaque)
 req->cqe.sq_id = cpu_to_le16(sq->sqid);
 req->cqe.sq_head = cpu_to_le16(sq->head);
 addr = cq->dma_addr + cq->tail * n->cqe_size;
-ret = pci_dma_write(&n->parent_obj, addr, (void *)&req->cqe,
+ret = pci_dma_write(PCI_DEVICE(n), addr, (void *)&req->cqe,
 sizeof(req->cqe));
 if (ret) {
 trace_pci_nvme_err_addr_write(addr);
@@ -4615,6 +4618,7 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest 
*req)
 
 static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n)
 {
+PCIDevice *pci = PCI_DEVICE(n);
 uint16_t offset = (cq->cqid << 3) + (1 << 2);
 
 n->cq[cq->cqid] = NULL;
@@ -4625,8 +4629,8 @@ static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n)
 event_notifier_set_handler(&cq->notifier, NULL);
 event_notifier_cleanup(&cq->notifier);
 }
-if (msix_enabled(&n->parent_obj)) {
-msix_vector_unuse(&n->parent_obj, cq->vector);
+if (msix_enabled(pci)) {
+msix_vector_unuse(pci, cq->vector);
 }
 if (cq->cqid) {
 g_free(cq);
@@ -4664,8 +4668,10 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, 
uint64_t dma_addr,
  uint16_t cqid, uint16_t vector, uint16_t size,
  uint16_t irq_enabled)
 {
-if (msix_enabled(&n->parent_obj)) {
-msix_vector_use(&n->parent_obj, vector);
+PCIDevice *pci = PCI_DEVICE(n);
+
+if (msix_enabled(pci)) {
+msix_vector_use(pci, vector);
 }
 cq->ctrl = n;
 cq->cqid = cqid;
@@ -4716,7 +4722,7 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest 
*req)
 trace_pci_nvme_err_invalid_create_cq_addr(prp1);
 return NVME_INVALID_PRP_OFFSET | NVME_DNR;
 }
-if (unlikely(!msix_enabled(&n->parent_obj) && vector)) {
+if (unlikely(!msix_enabled(PCI_DEVICE(n)) && vector

Re: [PATCH] tests/qtest/e1000e-test: De-duplicate constants

2022-12-12 Thread Thomas Huth


On 10/11/2022 12.44, Akihiko Odaki wrote:

De-duplicate constants found in e1000e_send_verify() to avoid mismatch
and improve readability.

Signed-off-by: Akihiko Odaki 
---
  tests/qtest/e1000e-test.c | 19 +--
  1 file changed, 9 insertions(+), 10 deletions(-)


Thanks, I added e1000e_receive_verify() to the commit description (since 
you've modified that, too) and added this to my testing-next branch:


 https://gitlab.com/thuth/qemu/-/commits/testing-next/

 Thomas

Re: [PATCH] tests/qtest/libqos/e1000e: Correctly group register accesses

2022-12-12 Thread Thomas Huth


On 10/11/2022 12.45, Akihiko Odaki wrote:

Add a newline after E1000_TCTL write and make it clear that E1000_TCTL
write is what enabling transmit.

Signed-off-by: Akihiko Odaki 
---
  tests/qtest/libqos/e1000e.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/tests/qtest/libqos/e1000e.c b/tests/qtest/libqos/e1000e.c
index 80b3e3db90..0e5cceafe4 100644
--- a/tests/qtest/libqos/e1000e.c
+++ b/tests/qtest/libqos/e1000e.c
@@ -152,6 +152,7 @@ static void e1000e_pci_start_hw(QOSGraphObject *obj)
  
  /* Enable transmit */

  e1000e_macreg_write(&d->e1000e, E1000_TCTL, E1000_TCTL_EN);
+
  e1000e_macreg_write(&d->e1000e, E1000_RDBAL,
 (uint32_t)d->e1000e.rx_ring);
  e1000e_macreg_write(&d->e1000e, E1000_RDBAH,


Thanks, queued:

 https://gitlab.com/thuth/qemu/-/commits/testing-next/

 Thomas

[PATCH v4 2/4] hw/nvme: rename shadow doorbell related trace events

2022-12-12 Thread Klaus Jensen

From: Klaus Jensen 

Rename the trace events related to writing the event index and reading
the doorbell value to make it more clear that the event is associated
with an actual update (write or read respectively).

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c   | 11 +++
 hw/nvme/trace-events |  8 
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 6b70c1e39831..cfab21b3436e 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -1337,8 +1337,9 @@ static inline void nvme_blk_write(BlockBackend *blk, 
int64_t offset,
 static void nvme_update_cq_head(NvmeCQueue *cq)
 {
 pci_dma_read(PCI_DEVICE(cq->ctrl), cq->db_addr, &cq->head,
-sizeof(cq->head));
-trace_pci_nvme_shadow_doorbell_cq(cq->cqid, cq->head);
+ sizeof(cq->head));
+
+trace_pci_nvme_update_cq_head(cq->cqid, cq->head);
 }
 
 static void nvme_post_cqes(void *opaque)
@@ -6147,16 +6148,18 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest 
*req)
 
 static void nvme_update_sq_eventidx(const NvmeSQueue *sq)
 {
+trace_pci_nvme_update_sq_eventidx(sq->sqid, sq->tail);
+
 pci_dma_write(PCI_DEVICE(sq->ctrl), sq->ei_addr, &sq->tail,
   sizeof(sq->tail));
-trace_pci_nvme_eventidx_sq(sq->sqid, sq->tail);
 }
 
 static void nvme_update_sq_tail(NvmeSQueue *sq)
 {
 pci_dma_read(PCI_DEVICE(sq->ctrl), sq->db_addr, &sq->tail,
  sizeof(sq->tail));
-trace_pci_nvme_shadow_doorbell_sq(sq->sqid, sq->tail);
+
+trace_pci_nvme_update_sq_tail(sq->sqid, sq->tail);
 }
 
 static void nvme_process_sq(void *opaque)
diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events
index fccb79f48973..b16f2260b4fd 100644
--- a/hw/nvme/trace-events
+++ b/hw/nvme/trace-events
@@ -84,8 +84,8 @@ pci_nvme_enqueue_event_noqueue(int queued) "queued %d"
 pci_nvme_enqueue_event_masked(uint8_t typ) "type 0x%"PRIx8""
 pci_nvme_no_outstanding_aers(void) "ignoring event; no outstanding AERs"
 pci_nvme_enqueue_req_completion(uint16_t cid, uint16_t cqid, uint32_t dw0, 
uint32_t dw1, uint16_t status) "cid %"PRIu16" cqid %"PRIu16" dw0 0x%"PRIx32" 
dw1 0x%"PRIx32" status 0x%"PRIx16""
-pci_nvme_eventidx_cq(uint16_t cqid, uint16_t new_eventidx) "cqid %"PRIu16" 
new_eventidx %"PRIu16""
-pci_nvme_eventidx_sq(uint16_t sqid, uint16_t new_eventidx) "sqid %"PRIu16" 
new_eventidx %"PRIu16""
+pci_nvme_update_cq_eventidx(uint16_t cqid, uint16_t new_eventidx) "cqid 
%"PRIu16" new_eventidx %"PRIu16""
+pci_nvme_update_sq_eventidx(uint16_t sqid, uint16_t new_eventidx) "sqid 
%"PRIu16" new_eventidx %"PRIu16""
 pci_nvme_mmio_read(uint64_t addr, unsigned size) "addr 0x%"PRIx64" size %d"
 pci_nvme_mmio_write(uint64_t addr, uint64_t data, unsigned size) "addr 
0x%"PRIx64" data 0x%"PRIx64" size %d"
 pci_nvme_mmio_doorbell_cq(uint16_t cqid, uint16_t new_head) "cqid %"PRIu16" 
new_head %"PRIu16""
@@ -102,8 +102,8 @@ pci_nvme_mmio_start_success(void) "setting controller 
enable bit succeeded"
 pci_nvme_mmio_stopped(void) "cleared controller enable bit"
 pci_nvme_mmio_shutdown_set(void) "shutdown bit set"
 pci_nvme_mmio_shutdown_cleared(void) "shutdown bit cleared"
-pci_nvme_shadow_doorbell_cq(uint16_t cqid, uint16_t new_shadow_doorbell) "cqid 
%"PRIu16" new_shadow_doorbell %"PRIu16""
-pci_nvme_shadow_doorbell_sq(uint16_t sqid, uint16_t new_shadow_doorbell) "sqid 
%"PRIu16" new_shadow_doorbell %"PRIu16""
+pci_nvme_update_cq_head(uint16_t cqid, uint16_t new_head) "cqid %"PRIu16" 
new_head %"PRIu16""
+pci_nvme_update_sq_tail(uint16_t sqid, uint16_t new_tail) "sqid %"PRIu16" 
new_tail %"PRIu16""
 pci_nvme_open_zone(uint64_t slba, uint32_t zone_idx, int all) "open zone, 
slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
 pci_nvme_close_zone(uint64_t slba, uint32_t zone_idx, int all) "close zone, 
slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
 pci_nvme_finish_zone(uint64_t slba, uint32_t zone_idx, int all) "finish zone, 
slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
-- 
2.38.1

Re: [PATCH 20/30] meson: prepare move of QEMU_CFLAGS to meson

2022-12-12 Thread Marc-André Lureau

On Fri, Dec 9, 2022 at 3:34 PM Paolo Bonzini  wrote:
>
> Clean up the handling of compiler flags in meson.build, splitting
> the general flags that should be included in subprojects as well,
> from warning flags that only apply to QEMU itself.  The two were
> mixed in both configure tests and meson tests.
>
> This split makes it easier to move the compiler tests piecewise
> from configure to Meson.
>
> Signed-off-by: Paolo Bonzini 

Reviewed-by: Marc-André Lureau 

> ---
>  meson.build | 53 +
>  1 file changed, 29 insertions(+), 24 deletions(-)
>
> diff --git a/meson.build b/meson.build
> index 99c1bde4d154..dac343d14797 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -190,10 +190,23 @@ endif
>  # Compiler flags #
>  ##
>
> -qemu_cflags = config_host['QEMU_CFLAGS'].split()
> +qemu_common_flags = []
> +qemu_cflags = []
> +foreach arg : config_host['QEMU_CFLAGS'].split()
> +  if arg.startswith('-W')
> +qemu_cflags += arg
> +  else
> +qemu_common_flags += arg
> +  endif
> +endforeach
>  qemu_objcflags = config_host['QEMU_OBJCFLAGS'].split()
>  qemu_ldflags = config_host['QEMU_LDFLAGS'].split()
>
> +if get_option('gprof')
> +  qemu_common_flags += ['-p']
> +  qemu_ldflags += ['-p']
> +endif
> +
>  if get_option('prefer_static')
>qemu_ldflags += get_option('b_pie') ? '-static-pie' : '-static'
>  endif
> @@ -207,10 +220,9 @@ if targetos == 'windows'
>qemu_ldflags += cc.get_supported_link_arguments('-Wl,--dynamicbase', 
> '-Wl,--high-entropy-va')
>  endif
>
> -if get_option('gprof')
> -  qemu_cflags += ['-p']
> -  qemu_objcflags += ['-p']
> -  qemu_ldflags += ['-p']
> +# Exclude --warn-common with TSan to suppress warnings from the TSan 
> libraries.
> +if targetos != 'sunos' and not config_host.has_key('CONFIG_TSAN')
> +  qemu_ldflags += cc.get_supported_link_arguments('-Wl,--warn-common')
>  endif
>
>  # Specify linker-script with add_project_link_arguments so that it is not 
> placed
> @@ -230,8 +242,7 @@ if get_option('fuzzing')
>name: '-fsanitize-coverage-allowlist=/dev/null',
>   args: ['-fsanitize-coverage-allowlist=/dev/null',
>  '-fsanitize-coverage=trace-pc'] )
> -
> add_global_arguments('-fsanitize-coverage-allowlist=instrumentation-filter',
> - native: false, language: all_languages)
> +qemu_common_flags += 
> ['-fsanitize-coverage-allowlist=instrumentation-filter']
>endif
>
>if get_option('fuzzing_engine') == ''
> @@ -239,10 +250,8 @@ if get_option('fuzzing')
>  # compiled code.  To build non-fuzzer binaries with --enable-fuzzing, 
> link
>  # everything with fsanitize=fuzzer-no-link. Otherwise, the linker will be
>  # unable to bind the fuzzer-related callbacks added by instrumentation.
> -add_global_arguments('-fsanitize=fuzzer-no-link',
> - native: false, language: all_languages)
> -add_global_link_arguments('-fsanitize=fuzzer-no-link',
> -  native: false, language: all_languages)
> +qemu_common_flags += ['-fsanitize=fuzzer-no-link']
> +qemu_ldflags += ['-fsanitize=fuzzer-no-link']
>  # For the actual fuzzer binaries, we need to link against the libfuzzer
>  # library. They need to be configurable, to support OSS-Fuzz
>  fuzz_exe_ldflags = ['-fsanitize=fuzzer']
> @@ -253,6 +262,9 @@ if get_option('fuzzing')
>endif
>  endif
>
> +add_global_arguments(qemu_common_flags, native: false, language: 
> all_languages)
> +add_global_link_arguments(qemu_ldflags, native: false, language: 
> all_languages)
> +
>  # Check that the C++ compiler exists and works with the C compiler.
>  link_language = 'c'
>  linker = cc
> @@ -276,16 +288,9 @@ if 'cpp' in all_languages
>endif
>  endif
>
> -# Exclude --warn-common with TSan to suppress warnings from the TSan 
> libraries.
> -if targetos != 'sunos' and not config_host.has_key('CONFIG_TSAN')
> -  qemu_ldflags += linker.get_supported_link_arguments('-Wl,--warn-common')
> -endif
> -
> -add_global_link_arguments(qemu_ldflags, native: false, language: 
> all_languages)
> -
> -add_global_arguments(qemu_cflags, native: false, language: 'c')
> -add_global_arguments(qemu_cxxflags, native: false, language: 'cpp')
> -add_global_arguments(qemu_objcflags, native: false, language: 'objc')
> +add_project_arguments(qemu_cflags, native: false, language: 'c')
> +add_project_arguments(qemu_cxxflags, native: false, language: 'cpp')
> +add_project_arguments(qemu_objcflags, native: false, language: 'objc')
>  if targetos == 'linux'
>add_project_arguments('-isystem', meson.current_source_dir() / 
> 'linux-headers',
>  '-isystem', 'linux-headers',
> @@ -3778,12 +3783,12 @@ link_args = get_option(link_language + '_link_args')
>  if link_args.length() > 0
>summary_info += {'LDFLAGS': ' '.join(link_args)}
>  endif
> -summary_info += {'QEMU_CFLAGS':   ' '.join(qemu_cf

Re: CVMSEG Emulation

2022-12-12 Thread Peter Maydell

On Fri, 9 Dec 2022 at 21:03, Jiaxun Yang  wrote:
>
>
>
> > 2022年12月9日 17:44，Christopher Wrogg  写道：
> >
> > I tried both.
> >
> > Option 1
> > What I did:
> > #undef TARGET_VIRT_ADDR_SPACE_BITS and #define 
> > TARGET_VIRT_ADDR_SPACE_BITS 64
> > The Result:
> > perror reports "Cannot allocate memory"
> > Option 2:
> > What I did:
> > TARGET_VIRT_ADDR_SPACE_BITS for me is 30 so I masked by 0x3FFF
> > The Result:
> > The segfault persists and gdb reports the memory as inaccessible.
>
> Hmm this looks wired for me, by no chance TARGET_VIRT_ADDR_SPACE_BITS for MIPS
> can be 30, on N64 ABI build it should be 48 and 32 for N32 or O32 build.
>
> It is defined in target/mips/cpu-param.h .

For 32-bit user-mode we currently set it to 31, on the theory that userspace
only gets access to the bottom half of the address space.

-- PMM

Re: [PATCH v2 4/4] tests/qtest: Enable qtest build on Windows

2022-12-12 Thread Thomas Huth


On 25/11/2022 12.41, Bin Meng wrote:

From: Bin Meng 

Now that we have fixed various test case issues as seen when running
on Windows, let's enable the qtest build on Windows.

Signed-off-by: Bin Meng 
Reviewed-by: Thomas Huth 
---

(no changes since v1)

  tests/qtest/meson.build | 6 --
  1 file changed, 6 deletions(-)

diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
index c07a5b1a5f..f0ebb5fac6 100644
--- a/tests/qtest/meson.build
+++ b/tests/qtest/meson.build
@@ -1,9 +1,3 @@
-# All QTests for now are POSIX-only, but the dependencies are
-# really in libqtest, not in the testcases themselves.
-if not config_host.has_key('CONFIG_POSIX')
-  subdir_done()
-endif
-
  slow_qtests = {
'ahci-test' : 60,
'bios-tables-test' : 120,


Thanks, all four patches queued to my testing-next branch now:

 https://gitlab.com/thuth/qemu/-/commits/testing-next/

 Thomas

Re: [PATCH 21/30] build: move sanitizer tests to meson

2022-12-12 Thread Marc-André Lureau

Hi

On Fri, Dec 9, 2022 at 3:42 PM Paolo Bonzini  wrote:
>
> Signed-off-by: Paolo Bonzini 
> ---
>  configure  | 151 -
>  docs/devel/build-system.rst|   4 -
>  meson.build|  63 +-
>  meson_options.txt  |   4 +
>  scripts/meson-buildoptions.sh  |   6 ++
>  tests/qemu-iotests/meson.build |   2 +-
>  tests/unit/meson.build |   2 +-
>  7 files changed, 73 insertions(+), 159 deletions(-)
>
> diff --git a/configure b/configure
> index b0df6c3cf754..babcf5d28a85 100755
> --- a/configure
> +++ b/configure
> @@ -269,9 +269,6 @@ EXTRA_OBJCFLAGS=""
>  EXTRA_LDFLAGS=""
>
>  debug_tcg="no"
> -sanitizers="no"
> -tsan="no"
> -fortify_source="yes"
>  EXESUF=""
>  prefix="/usr/local"
>  qemu_suffix="qemu"
> @@ -392,14 +389,6 @@ EOF
>compile_object
>  }
>
> -check_include() {
> -cat > $TMPC < -#include <$1>
> -int main(void) { return 0; }
> -EOF
> -  compile_object
> -}
> -
>  write_c_skeleton() {
>  cat > $TMPC <  int main(void) { return 0; }
> @@ -755,15 +744,6 @@ for opt do
>debug_tcg="yes"
>meson_option_parse --enable-debug-mutex ""
>meson_option_add -Doptimization=0
> -  fortify_source="no"
> -  ;;
> -  --enable-sanitizers) sanitizers="yes"
> -  ;;
> -  --disable-sanitizers) sanitizers="no"
> -  ;;
> -  --enable-tsan) tsan="yes"
> -  ;;
> -  --disable-tsan) tsan="no"
>;;
>--disable-tcg) tcg="disabled"
>   plugins="no"
> @@ -971,8 +951,6 @@ Advanced options (experts only):
> desired devices in configs/devices/)
>--with-devices-ARCH=NAME override default configs/devices
>--enable-debug   enable common debug build options
> -  --enable-sanitizers  enable default sanitizers
> -  --enable-tsanenable thread sanitizer
>--disable-werror disable compilation abort on warning
>--disable-stack-protector disable compiler-provided stack protection
>--cpu=CPUBuild for host CPU [$cpu]
> @@ -1547,91 +1525,6 @@ if ! compile_object "-Werror"; then
>  ccache_cpp2=yes
>  fi
>
> -#
> -# clang does not support glibc + FORTIFY_SOURCE.
> -
> -if test "$fortify_source" != "no"; then
> -  if echo | $cc -dM -E - | grep __clang__ > /dev/null 2>&1 ; then
> -fortify_source="no";
> -  elif test -n "$cxx" && has $cxx &&
> -   echo | $cxx -dM -E - | grep __clang__ >/dev/null 2>&1 ; then
> -fortify_source="no";
> -  else
> -fortify_source="yes"
> -  fi
> -fi
> -
> -##
> -# checks for sanitizers
> -
> -have_asan=no
> -have_ubsan=no
> -have_asan_iface_h=no
> -have_asan_iface_fiber=no
> -
> -if test "$sanitizers" = "yes" ; then
> -  write_c_skeleton
> -  if compile_prog "$CPU_CFLAGS -Werror -fsanitize=address" ""; then
> -  have_asan=yes
> -  fi
> -
> -  # we could use a simple skeleton for flags checks, but this also
> -  # detect the static linking issue of ubsan, see also:
> -  # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=84285
> -  cat > $TMPC << EOF
> -#include 
> -int main(void) {
> -void *tmp = malloc(10);
> -if (tmp != NULL) {
> -return *(int *)(tmp + 2);
> -}
> -return 1;
> -}
> -EOF
> -  if compile_prog "$CPU_CFLAGS -Werror -fsanitize=undefined" ""; then
> -  have_ubsan=yes
> -  fi
> -
> -  if check_include "sanitizer/asan_interface.h" ; then
> -  have_asan_iface_h=yes
> -  fi
> -
> -  cat > $TMPC << EOF
> -#include 
> -int main(void) {
> -  __sanitizer_start_switch_fiber(0, 0, 0);
> -  return 0;
> -}
> -EOF
> -  if compile_prog "$CPU_CFLAGS -Werror -fsanitize=address" "" ; then
> -  have_asan_iface_fiber=yes
> -  fi
> -fi
> -
> -# Thread sanitizer is, for now, much noisier than the other sanitizers;
> -# keep it separate until that is not the case.
> -if test "$tsan" = "yes" && test "$sanitizers" = "yes"; then
> -  error_exit "TSAN is not supported with other sanitiziers."
> -fi
> -have_tsan=no
> -have_tsan_iface_fiber=no
> -if test "$tsan" = "yes" ; then
> -  write_c_skeleton
> -  if compile_prog "$CPU_CFLAGS -Werror -fsanitize=thread" "" ; then
> -  have_tsan=yes
> -  fi
> -  cat > $TMPC << EOF
> -#include 
> -int main(void) {
> -  __tsan_create_fiber(0);
> -  return 0;
> -}
> -EOF
> -  if compile_prog "$CPU_CFLAGS -Werror -fsanitize=thread" "" ; then
> -  have_tsan_iface_fiber=yes
> -  fi
> -fi
> -
>  ##
>  # functions to probe cross compilers
>
> @@ -2057,42 +1950,6 @@ case "$vfio_user_server" in
>  ;;
>  esac
>
> -##
> -# End of CC checks
> -# After here, no more $cc or $ld runs
> -
> -write_c_skeleton
> -
> -if test "$fortify_source" = "yes" ; then
> -  QEMU_CFLAGS="-U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 $QEMU_CFLAGS"
> -fi
> -
> -if test "$have_asan" = "yes"; then
> -  QEMU_CFLAGS="-fsanitize=address $QEMU_CFLAGS"
> -  QEMU_LDFLAGS="-fsanitize

Re: [PATCH 23/30] build: move coroutine backend selection to meson

2022-12-12 Thread Marc-André Lureau

On Fri, Dec 9, 2022 at 3:37 PM Paolo Bonzini  wrote:
>
> To simplify the code, rename coroutine-win32.c to match the option
> passed to configure.
>
> Signed-off-by: Paolo Bonzini 

Reviewed-by: Marc-André Lureau 

> ---
>  configure | 62 ---
>  meson.build   | 32 +-
>  meson_options.txt |  3 +
>  scripts/meson-buildoptions.py |  1 +
>  scripts/meson-buildoptions.sh |  3 +
>  ...{coroutine-win32.c => coroutine-windows.c} |  0
>  util/meson.build  |  2 +-
>  7 files changed, 38 insertions(+), 65 deletions(-)
>  rename util/{coroutine-win32.c => coroutine-windows.c} (100%)
>
> diff --git a/configure b/configure
> index fea9cbf3abd0..1f7c5bbba4b9 100755
> --- a/configure
> +++ b/configure
> @@ -275,7 +275,6 @@ softmmu="yes"
>  linux_user=""
>  bsd_user=""
>  pie=""
> -coroutine=""
>  plugins="$default_feature"
>  meson=""
>  ninja=""
> @@ -792,8 +791,6 @@ for opt do
>;;
>--enable-fdt=*) fdt="$optarg"
>;;
> -  --with-coroutine=*) coroutine="$optarg"
> -  ;;
>--with-git=*) git="$optarg"
>;;
>--with-git-submodules=*)
> @@ -949,8 +946,6 @@ Advanced options (experts only):
>--disable-werror disable compilation abort on warning
>--disable-stack-protector disable compiler-provided stack protection
>--cpu=CPUBuild for host CPU [$cpu]
> -  --with-coroutine=BACKEND coroutine backend. Supported options:
> -   ucontext, sigaltstack, windows
>--enable-plugins
> enable plugins via shared library loading
>--disable-containers don't use containers for cross-building
> @@ -1373,61 +1368,6 @@ case "$fdt" in
>  ;;
>  esac
>
> -##
> -# check and set a backend for coroutine
> -
> -# We prefer ucontext, but it's not always possible. The fallback
> -# is sigcontext. On Windows the only valid backend is the Windows
> -# specific one.
> -
> -ucontext_works=no
> -if test "$darwin" != "yes"; then
> -  cat > $TMPC << EOF
> -#include 
> -#ifdef __stub_makecontext
> -#error Ignoring glibc stub makecontext which will always fail
> -#endif
> -int main(void) { makecontext(0, 0, 0); return 0; }
> -EOF
> -  if compile_prog "" "" ; then
> -ucontext_works=yes
> -  fi
> -fi
> -
> -if test "$coroutine" = ""; then
> -  if test "$mingw32" = "yes"; then
> -coroutine=win32
> -  elif test "$ucontext_works" = "yes"; then
> -coroutine=ucontext
> -  else
> -coroutine=sigaltstack
> -  fi
> -else
> -  case $coroutine in
> -  windows)
> -if test "$mingw32" != "yes"; then
> -  error_exit "'windows' coroutine backend only valid for Windows"
> -fi
> -# Unfortunately the user visible backend name doesn't match the
> -# coroutine-*.c filename for this case, so we have to adjust it here.
> -coroutine=win32
> -;;
> -  ucontext)
> -if test "$ucontext_works" != "yes"; then
> -  error_exit "'ucontext' backend requested but makecontext not available"
> -fi
> -;;
> -  sigaltstack)
> -if test "$mingw32" = "yes"; then
> -  error_exit "only the 'windows' coroutine backend is valid for Windows"
> -fi
> -;;
> -  *)
> -error_exit "unknown coroutine backend $coroutine"
> -;;
> -  esac
> -fi
> -
>  
>  # check if ccache is interfering with
>  # semantic analysis of macros
> @@ -2002,8 +1942,6 @@ if [ "$bsd" = "yes" ] ; then
>echo "CONFIG_BSD=y" >> $config_host_mak
>  fi
>
> -echo "CONFIG_COROUTINE_BACKEND=$coroutine" >> $config_host_mak
> -
>  if test "$plugins" = "yes" ; then
>  echo "CONFIG_PLUGIN=y" >> $config_host_mak
>  fi
> diff --git a/meson.build b/meson.build
> index 7ee9f081d0a1..b9df49667a19 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -211,6 +211,34 @@ if get_option('prefer_static')
>qemu_ldflags += get_option('b_pie') ? '-static-pie' : '-static'
>  endif
>
> +coroutine_backend = get_option('coroutine_backend')
> +ucontext_probe = '''
> +  #include 
> +  #ifdef __stub_makecontext
> +  #error Ignoring glibc stub makecontext which will always fail
> +  #endif
> +  int main(void) { makecontext(0, 0, 0); return 0; }'''
> +
> +# On Windows the only valid backend is the Windows specific one.
> +# For POSIX prefer ucontext, but it's not always possible. The fallback
> +# is sigcontext.
> +supported_backends = []
> +if targetos == 'windows'
> +  supported_backends += ['windows']
> +else
> +  if targetos != 'darwin' and cc.links(ucontext_probe)
> +supported_backends += ['ucontext']
> +  endif
> +  supported_backends += ['sigaltstack']
> +endif
> +
> +if coroutine_backend == 'auto'
> +  coroutine_backend = supported_backends[0]
> +elif coroutine_backend not in supported_backends
> +  error('"@0@" backend requested but not available.  Available backends: 
> @1@' \
> +.format(coroutin

Re: [PATCH v2 0/2] qga: improve "syslog" domain logging

2022-12-12 Thread Andrey Drobyshev

On 11/29/22 19:38, Andrey Drobyshev wrote:
> These patches extend QGA logging interface, primarily under Windows
> guests.  They enable QGA to write to Windows event log, much like
> syslog() on *nix.  In addition we get rid of hardcoded log level used by
> ga_log().
> 
> v2:
> * Close event_log handle when doing cleanup_agent()
> * Fix switch cases indentation as reported by scripts/checkpatch.pl
> 
> Andrey Drobyshev (2):
>   qga-win: add logging to Windows event log
>   qga: map GLib log levels to system levels
> 
>  configure |  3 +++
>  qga/installer/qemu-ga.wxs |  5 
>  qga/main.c| 50 +++
>  qga/meson.build   | 19 ++-
>  qga/messages-win32.mc |  9 +++
>  5 files changed, 81 insertions(+), 5 deletions(-)
>  create mode 100644 qga/messages-win32.mc
> 

Could you please clarify the status of these patches?

Re: [PATCH 24/30] build: move stack protector flag selection to meson

2022-12-12 Thread Marc-André Lureau

Hi

On Fri, Dec 9, 2022 at 3:42 PM Paolo Bonzini  wrote:
>
> Signed-off-by: Paolo Bonzini 

Reviewed-by: Marc-André Lureau 

> ---
>  configure | 44 ++-
>  meson.build   | 28 +-
>  meson_options.txt |  2 ++
>  scripts/meson-buildoptions.sh |  3 +++
>  4 files changed, 34 insertions(+), 43 deletions(-)
>
> diff --git a/configure b/configure
> index 1f7c5bbba4b9..5d31294f316f 100755
> --- a/configure
> +++ b/configure
> @@ -175,7 +175,7 @@ compile_prog() {
>local_cflags="$1"
>local_ldflags="$2"
>do_cc $CFLAGS $EXTRA_CFLAGS $CONFIGURE_CFLAGS $QEMU_CFLAGS $local_cflags 
> -o $TMPE $TMPC \
> -  $LDFLAGS $EXTRA_LDFLAGS $CONFIGURE_LDFLAGS $QEMU_LDFLAGS $local_ldflags
> +  $LDFLAGS $EXTRA_LDFLAGS $CONFIGURE_LDFLAGS $local_ldflags
>  }
>
>  # symbolically link $1 to $2.  Portable version of "ln -sf".
> @@ -221,7 +221,6 @@ static="no"
>  cross_compile="no"
>  cross_prefix=""
>  host_cc="cc"
> -stack_protector=""
>  use_containers="yes"
>  gdb_bin=$(command -v "gdb-multiarch" || command -v "gdb")
>
> @@ -370,8 +369,6 @@ sdl2_config="${SDL2_CONFIG-${cross_prefix}sdl2-config}"
>  QEMU_CFLAGS="-fno-strict-aliasing -fno-common -fwrapv"
>  QEMU_CFLAGS="-D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE 
> $QEMU_CFLAGS"
>
> -QEMU_LDFLAGS=
> -
>  # Flags that are needed during configure but later taken care of by Meson
>  CONFIGURE_CFLAGS="-std=gnu11 -Wall"
>  CONFIGURE_LDFLAGS=
> @@ -773,10 +770,6 @@ for opt do
>;;
>--disable-werror) werror="no"
>;;
> -  --enable-stack-protector) stack_protector="yes"
> -  ;;
> -  --disable-stack-protector) stack_protector="no"
> -  ;;
>--enable-cfi)
>cfi="true";
>meson_option_add -Db_lto=true
> @@ -944,7 +937,6 @@ Advanced options (experts only):
>--with-devices-ARCH=NAME override default configs/devices
>--enable-debug   enable common debug build options
>--disable-werror disable compilation abort on warning
> -  --disable-stack-protector disable compiler-provided stack protection
>--cpu=CPUBuild for host CPU [$cpu]
>--enable-plugins
> enable plugins via shared library loading
> @@ -1157,7 +1149,7 @@ EOF
>  optflag="$(echo $1 | sed -e 's/^-Wno-/-W/')"
>  do_objc -Werror $optflag \
>$OBJCFLAGS $EXTRA_OBJCFLAGS $CONFIGURE_OBJCFLAGS $QEMU_OBJCFLAGS \
> -  -o $TMPE $TMPM $QEMU_LDFLAGS
> +  -o $TMPE $TMPM
>  }
>
>  for flag in $gcc_flags; do
> @@ -1169,37 +1161,6 @@ for flag in $gcc_flags; do
>  fi
>  done
>
> -if test "$stack_protector" != "no"; then
> -  cat > $TMPC << EOF
> -int main(int argc, char *argv[])
> -{
> -char arr[64], *p = arr, *c = argv[argc - 1];
> -while (*c) {
> -*p++ = *c++;
> -}
> -return 0;
> -}
> -EOF
> -  gcc_flags="-fstack-protector-strong -fstack-protector-all"
> -  sp_on=0
> -  for flag in $gcc_flags; do
> -# We need to check both a compile and a link, since some compiler
> -# setups fail only on a .c->.o compile and some only at link time
> -if compile_object "-Werror $flag" &&
> -   compile_prog "-Werror $flag" ""; then
> -  QEMU_CFLAGS="$QEMU_CFLAGS $flag"
> -  QEMU_LDFLAGS="$QEMU_LDFLAGS $flag"
> -  sp_on=1
> -  break
> -fi
> -  done
> -  if test "$stack_protector" = yes; then
> -if test $sp_on = 0; then
> -  error_exit "Stack protector not supported"
> -fi
> -  fi
> -fi
> -
>  # Disable -Wmissing-braces on older compilers that warn even for
>  # the "universal" C zero initializer {0}.
>  cat > $TMPC << EOF
> @@ -1968,7 +1929,6 @@ echo "PKG_CONFIG=${pkg_config_exe}" >> $config_host_mak
>  echo "CC=$cc" >> $config_host_mak
>  echo "QEMU_CFLAGS=$QEMU_CFLAGS" >> $config_host_mak
>  echo "QEMU_OBJCFLAGS=$QEMU_OBJCFLAGS" >> $config_host_mak
> -echo "QEMU_LDFLAGS=$QEMU_LDFLAGS" >> $config_host_mak
>  echo "EXESUF=$EXESUF" >> $config_host_mak
>
>  # use included Linux headers
> diff --git a/meson.build b/meson.build
> index b9df49667a19..c5a8dce9e1d6 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -200,7 +200,7 @@ foreach arg : config_host['QEMU_CFLAGS'].split()
>endif
>  endforeach
>  qemu_objcflags = config_host['QEMU_OBJCFLAGS'].split()
> -qemu_ldflags = config_host['QEMU_LDFLAGS'].split()
> +qemu_ldflags = []
>
>  if get_option('gprof')
>qemu_common_flags += ['-p']
> @@ -211,6 +211,32 @@ if get_option('prefer_static')
>qemu_ldflags += get_option('b_pie') ? '-static-pie' : '-static'
>  endif
>
> +if not get_option('stack_protector').disabled()
> +  stack_protector_probe = '''
> +int main(int argc, char *argv[])
> +{
> +  char arr[64], *p = arr, *c = argv[argc - 1];
> +  while (*c) {
> +  *p++ = *c++;
> +  }
> +  return 0;
> +}'''
> +  have_stack_protector = false
> +  foreach arg : ['-fstack-protector-strong', '-fstack-protector-all']
> +# We need to check both a compile and a lin

Re: [PATCH] FreeBSD: Upgrade to 12.4 release

2022-12-12 Thread Thomas Huth


On 08/12/2022 07.52, Brad Smith wrote:

FreeBSD: Upgrade to 12.4 release

Signed-off-by: Brad Smith 
---
  .gitlab-ci.d/cirrus.yml | 2 +-
  tests/vm/freebsd| 4 ++--
  2 files changed, 3 insertions(+), 3 deletions(-)


Thanks, queued to my testing-next branch:

 https://gitlab.com/thuth/qemu/-/commits/testing-next/

 Thomas

Re: [PATCH v2 0/2] qga: improve "syslog" domain logging

2022-12-12 Thread Konstantin Kostiuk

Currently, there is a code freeze in QEMU for release 7.2.
I will merge this after it https://wiki.qemu.org/Planning/7.2

Best Regards,
Konstantin Kostiuk.


On Mon, Dec 12, 2022 at 2:17 PM Andrey Drobyshev <
andrey.drobys...@virtuozzo.com> wrote:

> On 11/29/22 19:38, Andrey Drobyshev wrote:
> > These patches extend QGA logging interface, primarily under Windows
> > guests.  They enable QGA to write to Windows event log, much like
> > syslog() on *nix.  In addition we get rid of hardcoded log level used by
> > ga_log().
> >
> > v2:
> > * Close event_log handle when doing cleanup_agent()
> > * Fix switch cases indentation as reported by scripts/checkpatch.pl
> >
> > Andrey Drobyshev (2):
> >   qga-win: add logging to Windows event log
> >   qga: map GLib log levels to system levels
> >
> >  configure |  3 +++
> >  qga/installer/qemu-ga.wxs |  5 
> >  qga/main.c| 50 +++
> >  qga/meson.build   | 19 ++-
> >  qga/messages-win32.mc |  9 +++
> >  5 files changed, 81 insertions(+), 5 deletions(-)
> >  create mode 100644 qga/messages-win32.mc
> >
>
> Could you please clarify the status of these patches?
>
>

Re: [PATCH] tests/qtest/vhost-user-blk-test: don't abort all qtests on missing envar

2022-12-12 Thread Thomas Huth


On 25/11/2022 16.58, Christian Schoenebeck wrote:

This test requires environment variable QTEST_QEMU_STORAGE_DAEMON_BINARY
to be defined for running. If not, it would immediately abort all qtests
and prevent other, unrelated tests from running.

To fix that, just skip vhost-user-blk-test instead and log a message
about missing environment variable.

Signed-off-by: Christian Schoenebeck 
---

  I also tried g_test_skip(errmsg) from the setup handlers instead, but it
  always caused the tests to abort with an error:
  
  ../tests/qtest/libqtest.c:179: kill_qemu() tried to terminate QEMU process

  but encountered exit status 1 (expected 0)
  
  I haven't further investigated.


  tests/qtest/vhost-user-blk-test.c | 6 ++
  1 file changed, 6 insertions(+)

diff --git a/tests/qtest/vhost-user-blk-test.c 
b/tests/qtest/vhost-user-blk-test.c
index 07a4c2d500..dc37f5af4d 100644
--- a/tests/qtest/vhost-user-blk-test.c
+++ b/tests/qtest/vhost-user-blk-test.c
@@ -983,6 +983,12 @@ static void register_vhost_user_blk_test(void)
  .before = vhost_user_blk_test_setup,
  };
  
+if (!getenv("QTEST_QEMU_STORAGE_DAEMON_BINARY")) {

+g_test_message("QTEST_QEMU_STORAGE_DAEMON_BINARY not defined, "
+   "skipping vhost-user-blk-test");
+return;
+}
+
  /*
   * tests for vhost-user-blk and vhost-user-blk-pci
   * The tests are borrowed from tests/virtio-blk-test.c. But some tests


Thanks, queued to my testing-next branch now:

 https://gitlab.com/thuth/qemu/-/commits/testing-next/

 Thomas

Re: [RFC PATCH v2 03/22] i386/xen: Add xen-version machine property and init KVM Xen support

2022-12-12 Thread Paul Durrant


On 09/12/2022 09:55, David Woodhouse wrote:

From: David Woodhouse 

This is a machine property for two main reasons. One is that it allows
us to set it in default_machine_opts for the xenfv platform when not
running on actual Xen. The other is that theoretically we *could* do
this with TCG too; we'd just have to implement a bunch of the stuff that
KVM already does for us.

Signed-off-by: David Woodhouse 
---
  hw/i386/pc.c| 32 +++
  hw/i386/pc_piix.c   | 10 +++--
  include/hw/i386/pc.h|  3 +++
  target/i386/kvm/kvm.c   | 26 ++
  target/i386/meson.build |  1 +
  target/i386/xen.c   | 49 +
  target/i386/xen.h   | 19 
  7 files changed, 138 insertions(+), 2 deletions(-)
  create mode 100644 target/i386/xen.c
  create mode 100644 target/i386/xen.h

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 546b703cb4..9bada1a8ff 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1811,6 +1811,32 @@ static void pc_machine_set_max_fw_size(Object *obj, 
Visitor *v,
  pcms->max_fw_size = value;
  }
  
+static void pc_machine_get_xen_version(Object *obj, Visitor *v,

+   const char *name, void *opaque,
+   Error **errp)
+{
+PCMachineState *pcms = PC_MACHINE(obj);
+uint32_t value = pcms->xen_version;
+
+visit_type_uint32(v, name, &value, errp);
+}
+
+static void pc_machine_set_xen_version(Object *obj, Visitor *v,
+   const char *name, void *opaque,
+   Error **errp)
+{
+PCMachineState *pcms = PC_MACHINE(obj);
+Error *error = NULL;
+uint32_t value;
+
+visit_type_uint32(v, name, &value, &error);
+if (error) {
+error_propagate(errp, error);
+return;
+}
+
+pcms->xen_version = value;
+}
  
  static void pc_machine_initfn(Object *obj)

  {
@@ -1978,6 +2004,12 @@ static void pc_machine_class_init(ObjectClass *oc, void 
*data)
  NULL, NULL);
  object_class_property_set_description(oc, PC_MACHINE_SMBIOS_EP,
  "SMBIOS Entry Point type [32, 64]");
+
+object_class_property_add(oc, "xen-version", "uint32",
+pc_machine_get_xen_version, pc_machine_set_xen_version,
+NULL, NULL);
+object_class_property_set_description(oc, "xen-version",
+"Xen version to be emulated (in XENVER_version form e.g. 0x4000a for 
4.10)");
  }


Since this is e properly of the general pc machine class, could it be 
made to report the actual version if running on real Xen and be 
read-only? AFAICT I could specify "accel=xen,xen-version=" and the 
feels like it should be an error.


  
  static const TypeInfo pc_machine_info = {

diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 0ad0ed1603..13286d0739 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -876,7 +876,10 @@ static void xenfv_4_2_machine_options(MachineClass *m)
  pc_i440fx_4_2_machine_options(m);
  m->desc = "Xen Fully-virtualized PC";
  m->max_cpus = HVM_MAX_VCPUS;
-m->default_machine_opts = "accel=xen,suppress-vmdesc=on";
+if (xen_enabled())
+m->default_machine_opts = "accel=xen,suppress-vmdesc=on";
+else
+m->default_machine_opts = "accel=kvm,xen-version=0x40002";
  }
  
  DEFINE_PC_MACHINE(xenfv_4_2, "xenfv-4.2", pc_xen_hvm_init,

@@ -888,7 +891,10 @@ static void xenfv_3_1_machine_options(MachineClass *m)
  m->desc = "Xen Fully-virtualized PC";
  m->alias = "xenfv";
  m->max_cpus = HVM_MAX_VCPUS;
-m->default_machine_opts = "accel=xen,suppress-vmdesc=on";
+if (xen_enabled())
+m->default_machine_opts = "accel=xen,suppress-vmdesc=on";
+else
+m->default_machine_opts = "accel=kvm,xen-version=0x30001";
  }
  
  DEFINE_PC_MACHINE(xenfv, "xenfv-3.1", pc_xen_hvm_init,

diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index c95333514e..9b14b18836 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -52,6 +52,9 @@ typedef struct PCMachineState {
  bool default_bus_bypass_iommu;
  uint64_t max_fw_size;
  
+/* Xen HVM emulation */

+uint32_t xen_version;
+
  /* ACPI Memory hotplug IO base address */
  hwaddr memhp_io_base;
  
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c

index a213209379..0a2069b117 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -31,6 +31,7 @@
  #include "sysemu/runstate.h"
  #include "kvm_i386.h"
  #include "sev.h"
+#include "xen.h"
  #include "hyperv.h"
  #include "hyperv-proto.h"
  
@@ -774,6 +775,17 @@ static inline bool freq_within_bounds(int freq, int target_freq)

  return false;
  }
  
+static uint32_t kvm_arch_xen_version(MachineState *ms)

+{
+uint32_t v = object_property_get_int(OBJECT(ms), "xen-version", NULL);
+
+/* If it was unset, return zero */
+if (v == (uint32_t) -1)
+return 0;
+
+return

Re: [PATCH v4 3/4] hw/nvme: fix missing endian conversions for doorbell buffers

2022-12-12 Thread Philippe Mathieu-Daudé


On 12/12/22 12:44, Klaus Jensen wrote:

From: Klaus Jensen 

The eventidx and doorbell value are not handling endianness correctly.
Fix this.

Fixes: 3f7fe8de3d49 ("hw/nvme: Implement shadow doorbell buffer support")
Cc: qemu-sta...@nongnu.org
Reported-by: Guenter Roeck 
Signed-off-by: Klaus Jensen 
---
  hw/nvme/ctrl.c | 19 +--
  1 file changed, 13 insertions(+), 6 deletions(-)


Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH 25/30] build: move warning flag selection to meson

2022-12-12 Thread Marc-André Lureau

Hi

On Fri, Dec 9, 2022 at 3:48 PM Paolo Bonzini  wrote:
>
> Meson already knows to test with the positive form of the flag, which
> simplifies the test.  Warnings are now tested explicitly for the C++
> compiler, instead of hardcoding those that are only available for
> the C language.
>
> At this point all compiler flags in QEMU_CFLAGS are global and only
> depend on the OS.  No feature tests are performed in configure.
>
> Signed-off-by: Paolo Bonzini 

Reviewed-by: Marc-André Lureau 

> ---
>  configure| 94 
>  contrib/plugins/Makefile |  3 +-
>  meson.build  | 72 --
>  3 files changed, 51 insertions(+), 118 deletions(-)
>
> diff --git a/configure b/configure
> index 5d31294f316f..6df61f4337e4 100755
> --- a/configure
> +++ b/configure
> @@ -75,7 +75,6 @@ fi
>  TMPB="qemu-conf"
>  TMPC="${TMPDIR1}/${TMPB}.c"
>  TMPO="${TMPDIR1}/${TMPB}.o"
> -TMPM="${TMPDIR1}/${TMPB}.m"
>  TMPE="${TMPDIR1}/${TMPB}.exe"
>
>  rm -f config.log
> @@ -157,15 +156,6 @@ do_cc() {
>  do_compiler_werror "$cc" $CPU_CFLAGS "$@"
>  }
>
> -do_objc() {
> -do_compiler_werror "$objcc" $CPU_CFLAGS "$@"
> -}
> -
> -# Append $2 to the variable named $1, with space separation
> -add_to() {
> -eval $1=\${$1:+\"\$$1 \"}\$2
> -}
> -
>  compile_object() {
>local_cflags="$1"
>do_cc $CFLAGS $EXTRA_CFLAGS $CONFIGURE_CFLAGS $QEMU_CFLAGS $local_cflags 
> -c -o $TMPO $TMPC
> @@ -1091,89 +1081,6 @@ if ! compile_prog "" "" ; then
>  error_exit "You need at least GCC v7.4 or Clang v6.0 (or XCode Clang 
> v10.0)"
>  fi
>
> -# Accumulate -Wfoo and -Wno-bar separately.
> -# We will list all of the enable flags first, and the disable flags second.
> -# Note that we do not add -Werror, because that would enable it for all
> -# configure tests. If a configure test failed due to -Werror this would
> -# just silently disable some features, so it's too error prone.
> -
> -warn_flags=
> -add_to warn_flags -Wundef
> -add_to warn_flags -Wwrite-strings
> -add_to warn_flags -Wmissing-prototypes
> -add_to warn_flags -Wstrict-prototypes
> -add_to warn_flags -Wredundant-decls
> -add_to warn_flags -Wold-style-declaration
> -add_to warn_flags -Wold-style-definition
> -add_to warn_flags -Wtype-limits
> -add_to warn_flags -Wformat-security
> -add_to warn_flags -Wformat-y2k
> -add_to warn_flags -Winit-self
> -add_to warn_flags -Wignored-qualifiers
> -add_to warn_flags -Wempty-body
> -add_to warn_flags -Wnested-externs
> -add_to warn_flags -Wendif-labels
> -add_to warn_flags -Wexpansion-to-defined
> -add_to warn_flags -Wimplicit-fallthrough=2
> -
> -nowarn_flags=
> -add_to nowarn_flags -Wno-initializer-overrides
> -add_to nowarn_flags -Wno-missing-include-dirs
> -add_to nowarn_flags -Wno-shift-negative-value
> -add_to nowarn_flags -Wno-string-plus-int
> -add_to nowarn_flags -Wno-typedef-redefinition
> -add_to nowarn_flags -Wno-tautological-type-limit-compare
> -add_to nowarn_flags -Wno-psabi
> -add_to nowarn_flags -Wno-gnu-variable-sized-type-not-at-end
> -
> -gcc_flags="$warn_flags $nowarn_flags"
> -
> -cc_has_warning_flag() {
> -write_c_skeleton;
> -
> -# Use the positive sense of the flag when testing for -Wno-wombat
> -# support (gcc will happily accept the -Wno- form of unknown
> -# warning options).
> -optflag="$(echo $1 | sed -e 's/^-Wno-/-W/')"
> -compile_prog "-Werror $optflag" ""
> -}
> -
> -objcc_has_warning_flag() {
> -cat > $TMPM < -int main(void) { return 0; }
> -EOF
> -
> -# Use the positive sense of the flag when testing for -Wno-wombat
> -# support (gcc will happily accept the -Wno- form of unknown
> -# warning options).
> -optflag="$(echo $1 | sed -e 's/^-Wno-/-W/')"
> -do_objc -Werror $optflag \
> -  $OBJCFLAGS $EXTRA_OBJCFLAGS $CONFIGURE_OBJCFLAGS $QEMU_OBJCFLAGS \
> -  -o $TMPE $TMPM
> -}
> -
> -for flag in $gcc_flags; do
> -if cc_has_warning_flag $flag ; then
> -QEMU_CFLAGS="$QEMU_CFLAGS $flag"
> -fi
> -if objcc_has_warning_flag $flag ; then
> -QEMU_OBJCFLAGS="$QEMU_OBJCFLAGS $flag"
> -fi
> -done
> -
> -# Disable -Wmissing-braces on older compilers that warn even for
> -# the "universal" C zero initializer {0}.
> -cat > $TMPC << EOF
> -struct {
> -  int a[2];
> -} x = {0};
> -EOF
> -if compile_object "-Werror" "" ; then
> -  :
> -else
> -  QEMU_CFLAGS="$QEMU_CFLAGS -Wno-missing-braces"
> -fi
> -
>  # Resolve default for --enable-plugins
>  if test "$static" = "yes" ; then
>if test "$plugins" = "yes"; then
> @@ -1928,7 +1835,6 @@ echo "NINJA=$ninja" >> $config_host_mak
>  echo "PKG_CONFIG=${pkg_config_exe}" >> $config_host_mak
>  echo "CC=$cc" >> $config_host_mak
>  echo "QEMU_CFLAGS=$QEMU_CFLAGS" >> $config_host_mak
> -echo "QEMU_OBJCFLAGS=$QEMU_OBJCFLAGS" >> $config_host_mak
>  echo "EXESUF=$EXESUF" >> $config_host_mak
>
>  # use included Linux headers
> diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile
> index 8a316cd76f2f..b2b9

[PATCH 03/15] block: Pull polling out of bdrv_parent_drained_begin_single()

2022-12-12 Thread Paolo Bonzini

Only one caller of bdrv_parent_drained_begin_single() passes poll=true;
move the polling to that one caller.

While this requires exposing bdrv_parent_drained_poll_single to outside
block/io.c, this is not a big deal because the bdrv_parent_drained_*_single
functions are really internal between block.c and block/io.c.  So make
that clear while we're at it, by moving them to block_int-io.h.

Based on a patch by Kevin Wolf .

Signed-off-by: Paolo Bonzini 
---
 block.c  |  4 +++-
 block/io.c   | 10 +++---
 include/block/block-io.h | 15 ---
 include/block/block_int-io.h | 21 +
 4 files changed, 27 insertions(+), 23 deletions(-)

diff --git a/block.c b/block.c
index 2f2123f4a4e5..c542a0a33358 100644
--- a/block.c
+++ b/block.c
@@ -2830,7 +2830,9 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
  */
 new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
 if (new_bs_quiesce_counter && !child->quiesced_parent) {
-bdrv_parent_drained_begin_single(child, true);
+bdrv_parent_drained_begin_single(child);
+AIO_WAIT_WHILE(bdrv_child_get_parent_aio_context(child),
+   bdrv_parent_drained_poll_single(child));
 }
 
 if (old_bs) {
diff --git a/block/io.c b/block/io.c
index 571ff8c6493a..fbd9 100644
--- a/block/io.c
+++ b/block/io.c
@@ -53,7 +53,7 @@ static void bdrv_parent_drained_begin(BlockDriverState *bs, 
BdrvChild *ignore)
 if (c == ignore) {
 continue;
 }
-bdrv_parent_drained_begin_single(c, false);
+bdrv_parent_drained_begin_single(c);
 }
 }
 
@@ -81,7 +81,7 @@ static void bdrv_parent_drained_end(BlockDriverState *bs, 
BdrvChild *ignore)
 }
 }
 
-static bool bdrv_parent_drained_poll_single(BdrvChild *c)
+bool bdrv_parent_drained_poll_single(BdrvChild *c)
 {
 if (c->klass->drained_poll) {
 return c->klass->drained_poll(c);
@@ -105,9 +105,8 @@ static bool bdrv_parent_drained_poll(BlockDriverState *bs, 
BdrvChild *ignore,
 return busy;
 }
 
-void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
+void bdrv_parent_drained_begin_single(BdrvChild *c)
 {
-AioContext *ctx = bdrv_child_get_parent_aio_context(c);
 IO_OR_GS_CODE();
 
 assert(!c->quiesced_parent);
@@ -116,9 +115,6 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool 
poll)
 if (c->klass->drained_begin) {
 c->klass->drained_begin(c);
 }
-if (poll) {
-AIO_WAIT_WHILE(ctx, bdrv_parent_drained_poll_single(c));
-}
 }
 
 static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src)
diff --git a/include/block/block-io.h b/include/block/block-io.h
index 0e0cd1249705..10659a3f246c 100644
--- a/include/block/block-io.h
+++ b/include/block/block-io.h
@@ -305,21 +305,6 @@ bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector 
*qiov, int64_t pos);
 int co_wrapper_mixed
 bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
 
-/**
- * bdrv_parent_drained_begin_single:
- *
- * Begin a quiesced section for the parent of @c. If @poll is true, wait for
- * any pending activity to cease.
- */
-void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll);
-
-/**
- * bdrv_parent_drained_end_single:
- *
- * End a quiesced section for the parent of @c.
- */
-void bdrv_parent_drained_end_single(BdrvChild *c);
-
 /**
  * bdrv_drain_poll:
  *
diff --git a/include/block/block_int-io.h b/include/block/block_int-io.h
index 8bc061ebb895..0ced9c025acb 100644
--- a/include/block/block_int-io.h
+++ b/include/block/block_int-io.h
@@ -179,4 +179,25 @@ void bdrv_bsc_invalidate_range(BlockDriverState *bs,
  */
 void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes);
 
+/**
+ * bdrv_parent_drained_begin_single:
+ *
+ * Begin a quiesced section for the parent of @c.
+ */
+void bdrv_parent_drained_begin_single(BdrvChild *c);
+
+/**
+ * bdrv_parent_drained_begin_single:
+ *
+ * Check whether the parent of @c has quiesced.
+ */
+bool bdrv_parent_drained_poll_single(BdrvChild *c);
+
+/**
+ * bdrv_parent_drained_end_single:
+ *
+ * End a quiesced section for the parent of @c.
+ */
+void bdrv_parent_drained_end_single(BdrvChild *c);
+
 #endif /* BLOCK_INT_IO_H */
-- 
2.38.1

[PATCH 06/15] tests/qemu-iotests/030: test_stream_parallel should use auto_finalize=False

2022-12-12 Thread Paolo Bonzini

From: Emanuele Giuseppe Esposito 

First, use run_job() instead of the current logic to run the
stream job. Then, use auto_finalize=False to be sure that
the job is not automatically deleted once it is done.

In this way, if the job finishes before we want, it is not
finalized yet so the other commands can still execute
without failing. run_job() will then take care of calling
job-finalize.

Signed-off-by: Emanuele Giuseppe Esposito 
Suggested-by: Kevin Wolf 
Message-Id: <20220314131854.2202651-11-eespo...@redhat.com>
Signed-off-by: Paolo Bonzini 
---
 tests/qemu-iotests/030 | 12 +++-
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030
index 98595d47fec3..e5c13cb5fe4c 100755
--- a/tests/qemu-iotests/030
+++ b/tests/qemu-iotests/030
@@ -256,7 +256,7 @@ class TestParallelOps(iotests.QMPTestCase):
 pending_jobs.append(job_id)
 result = self.vm.qmp('block-stream', device=node_name,
  job_id=job_id, bottom=f'node{i-1}',
- speed=1024)
+ speed=1024, auto_finalize=False)
 self.assert_qmp(result, 'return', {})
 
 # Do this in reverse: After unthrottling them, some jobs may finish
@@ -272,14 +272,8 @@ class TestParallelOps(iotests.QMPTestCase):
 result = self.vm.qmp('block-job-set-speed', device=job, speed=0)
 self.assert_qmp(result, 'return', {})
 
-# Wait for all jobs to be finished.
-while len(pending_jobs) > 0:
-for event in self.vm.get_qmp_events(wait=True):
-if event['event'] == 'BLOCK_JOB_COMPLETED':
-job_id = self.dictpath(event, 'data/device')
-self.assertTrue(job_id in pending_jobs)
-self.assert_qmp_absent(event, 'data/error')
-pending_jobs.remove(job_id)
+for job in pending_jobs:
+self.vm.run_job(job=job, auto_finalize=False)
 
 self.assert_no_active_block_jobs()
 self.vm.shutdown()
-- 
2.38.1

[PATCH 05/15] tests/unit/test-bdrv-drain.c: graph setup functions can't run in coroutines

2022-12-12 Thread Paolo Bonzini

From: Emanuele Giuseppe Esposito 

Graph initialization functions like blk_new(), bdrv_new() and so on
should not run in a coroutine. In fact, they might invoke a drain
(for example blk_insert_bs eventually calls bdrv_replace_child_noperm)
that in turn can invoke callbacks like bdrv_do_drained_begin_quiesce(),
that asserts exactly that we are not in a coroutine.

Move the initialization phase of test_drv_cb and test_quiesce_common
outside the coroutine logic.

Signed-off-by: Emanuele Giuseppe Esposito 
Message-Id: <20220314131854.2202651-9-eespo...@redhat.com>
Signed-off-by: Paolo Bonzini 
---
 tests/unit/test-bdrv-drain.c | 110 ++-
 1 file changed, 69 insertions(+), 41 deletions(-)

diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
index 3be214f30186..f677f1d9fc25 100644
--- a/tests/unit/test-bdrv-drain.c
+++ b/tests/unit/test-bdrv-drain.c
@@ -126,7 +126,8 @@ static void aio_ret_cb(void *opaque, int ret)
 }
 
 typedef struct CallInCoroutineData {
-void (*entry)(void);
+void (*entry)(void *);
+void *arg;
 bool done;
 } CallInCoroutineData;
 
@@ -134,15 +135,16 @@ static coroutine_fn void call_in_coroutine_entry(void 
*opaque)
 {
 CallInCoroutineData *data = opaque;
 
-data->entry();
+data->entry(data->arg);
 data->done = true;
 }
 
-static void call_in_coroutine(void (*entry)(void))
+static void call_in_coroutine(void (*entry)(void *), void *arg)
 {
 Coroutine *co;
 CallInCoroutineData data = {
 .entry  = entry,
+.arg= arg,
 .done   = false,
 };
 
@@ -199,26 +201,28 @@ static void do_drain_end_unlocked(enum drain_type 
drain_type, BlockDriverState *
 }
 }
 
-static void test_drv_cb_common(enum drain_type drain_type, bool recursive)
-{
+typedef struct TestDriverCBData {
+enum drain_type drain_type;
+bool recursive;
 BlockBackend *blk;
 BlockDriverState *bs, *backing;
-BDRVTestState *s, *backing_s;
+} TestDriverCBData;
+
+static void test_drv_cb_common(void *arg)
+{
+TestDriverCBData *data = arg;
+BlockBackend *blk = data->blk;
+BlockDriverState *bs = data->bs;
+BlockDriverState *backing = data->backing;
+enum drain_type drain_type = data->drain_type;
+bool recursive = data->recursive;
+BDRVTestState *s = bs->opaque;
+BDRVTestState *backing_s = backing->opaque;
 BlockAIOCB *acb;
 int aio_ret;
 
 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, 0);
 
-blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
-bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
-  &error_abort);
-s = bs->opaque;
-blk_insert_bs(blk, bs, &error_abort);
-
-backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
-backing_s = backing->opaque;
-bdrv_set_backing_hd(bs, backing, &error_abort);
-
 /* Simple bdrv_drain_all_begin/end pair, check that CBs are called */
 g_assert_cmpint(s->drain_count, ==, 0);
 g_assert_cmpint(backing_s->drain_count, ==, 0);
@@ -252,44 +256,67 @@ static void test_drv_cb_common(enum drain_type 
drain_type, bool recursive)
 
 g_assert_cmpint(s->drain_count, ==, 0);
 g_assert_cmpint(backing_s->drain_count, ==, 0);
+}
 
-bdrv_unref(backing);
-bdrv_unref(bs);
-blk_unref(blk);
+static void test_common_cb(enum drain_type drain_type, bool in_coroutine,
+   void (*cb)(void *))
+{
+TestDriverCBData data;
+
+data.drain_type = drain_type;
+data.recursive = (drain_type != BDRV_DRAIN);
+
+data.blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
+data.bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
+  &error_abort);
+blk_insert_bs(data.blk, data.bs, &error_abort);
+
+data.backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, 
&error_abort);
+bdrv_set_backing_hd(data.bs, data.backing, &error_abort);
+
+if (in_coroutine) {
+call_in_coroutine(cb, &data);
+} else {
+cb(&data);
+}
+
+bdrv_unref(data.backing);
+bdrv_unref(data.bs);
+blk_unref(data.blk);
+}
+
+static void test_drv_cb(enum drain_type drain_type, bool in_coroutine)
+{
+test_common_cb(drain_type, in_coroutine, test_drv_cb_common);
 }
 
 static void test_drv_cb_drain_all(void)
 {
-test_drv_cb_common(BDRV_DRAIN_ALL, true);
+test_drv_cb(BDRV_DRAIN_ALL, false);
 }
 
 static void test_drv_cb_drain(void)
 {
-test_drv_cb_common(BDRV_DRAIN, false);
+test_drv_cb(BDRV_DRAIN, false);
 }
 
 static void test_drv_cb_co_drain_all(void)
 {
-call_in_coroutine(test_drv_cb_drain_all);
+test_drv_cb(BDRV_DRAIN_ALL, true);
 }
 
 static void test_drv_cb_co_drain(void)
 {
-call_in_coroutine(test_drv_cb_drain);
+test_drv_cb(BDRV_DRAIN, true);
 }
 
-static void test_quiesce_common(enum drain_type drain_type, bool recursive)
+static void test_quiesce_common(void *arg)
 {
-BlockBa

[PATCH 09/15] block-backend: make global properties write-once

2022-12-12 Thread Paolo Bonzini

The three global properties allow_aio_context_change,
disable_request_queuing and allow_write_before_eof are
always set for the whole life of a BlockBackend.  Make
this clear by removing the possibility of clearing them,
and by marking the corresponding function GLOBAL_STATE_CODE().

Signed-off-by: Paolo Bonzini 
---
 block/block-backend.c | 24 ++--
 block/commit.c|  4 ++--
 block/export/export.c |  2 +-
 block/mirror.c|  4 ++--
 block/parallels.c |  2 +-
 block/qcow.c  |  2 +-
 block/qcow2.c |  2 +-
 block/qed.c   |  2 +-
 block/stream.c|  4 ++--
 block/vdi.c   |  2 +-
 block/vhdx.c  |  2 +-
 block/vmdk.c  |  4 ++--
 block/vpc.c   |  2 +-
 include/sysemu/block-backend-io.h |  6 +++---
 nbd/server.c  |  3 +--
 tests/unit/test-bdrv-drain.c  |  4 ++--
 tests/unit/test-block-iothread.c  |  2 +-
 17 files changed, 37 insertions(+), 34 deletions(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index c4a884b86c2b..fe42d53d655d 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -73,8 +73,13 @@ struct BlockBackend {
 uint64_t shared_perm;
 bool disable_perm;
 
+/*
+ * Can only become true; should be written before any requests is
+ * submitted to the BlockBackend.
+ */
 bool allow_aio_context_change;
 bool allow_write_beyond_eof;
+bool disable_request_queuing;
 
 /* Protected by BQL */
 NotifierList remove_bs_notifiers, insert_bs_notifiers;
@@ -82,7 +87,6 @@ struct BlockBackend {
 
 int quiesce_counter;
 CoQueue queued_requests;
-bool disable_request_queuing;
 
 VMChangeStateEntry *vmsh;
 bool force_allow_inactivate;
@@ -1217,22 +1221,22 @@ void blk_iostatus_set_err(BlockBackend *blk, int error)
 }
 }
 
-void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow)
+void blk_allow_write_beyond_eof(BlockBackend *blk)
 {
-IO_CODE();
-blk->allow_write_beyond_eof = allow;
+GLOBAL_STATE_CODE();
+blk->allow_write_beyond_eof = true;
 }
 
-void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow)
+void blk_allow_aio_context_change(BlockBackend *blk)
 {
-IO_CODE();
-blk->allow_aio_context_change = allow;
+GLOBAL_STATE_CODE();
+blk->allow_aio_context_change = true;
 }
 
-void blk_set_disable_request_queuing(BlockBackend *blk, bool disable)
+void blk_disable_request_queuing(BlockBackend *blk)
 {
-IO_CODE();
-blk->disable_request_queuing = disable;
+GLOBAL_STATE_CODE();
+blk->disable_request_queuing = true;
 }
 
 static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
diff --git a/block/commit.c b/block/commit.c
index b34634176797..7a448838301b 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -379,7 +379,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
 if (ret < 0) {
 goto fail;
 }
-blk_set_disable_request_queuing(s->base, true);
+blk_disable_request_queuing(s->base);
 s->base_bs = base;
 
 /* Required permissions are already taken with block_job_add_bdrv() */
@@ -388,7 +388,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
 if (ret < 0) {
 goto fail;
 }
-blk_set_disable_request_queuing(s->top, true);
+blk_disable_request_queuing(s->top);
 
 s->backing_file_str = g_strdup(backing_file_str);
 s->on_error = on_error;
diff --git a/block/export/export.c b/block/export/export.c
index 7cc0c25c1c9f..6abfe5becb88 100644
--- a/block/export/export.c
+++ b/block/export/export.c
@@ -155,7 +155,7 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error 
**errp)
 blk = blk_new(ctx, perm, BLK_PERM_ALL);
 
 if (!fixed_iothread) {
-blk_set_allow_aio_context_change(blk, true);
+blk_allow_aio_context_change(blk);
 }
 
 ret = blk_insert_bs(blk, bs, errp);
diff --git a/block/mirror.c b/block/mirror.c
index 251adc5ae02a..28e76e9dbf55 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1768,8 +1768,8 @@ static BlockJob *mirror_start_job(
  * ensure that. */
 blk_set_force_allow_inactivate(s->target);
 }
-blk_set_allow_aio_context_change(s->target, true);
-blk_set_disable_request_queuing(s->target, true);
+blk_allow_aio_context_change(s->target);
+blk_disable_request_queuing(s->target);
 
 s->replaces = g_strdup(replaces);
 s->on_source_error = on_source_error;
diff --git a/block/parallels.c b/block/parallels.c
index bbea2f2221ff..44e18a8cc75d 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -576,7 +576,7 @@ static int coroutine_fn 
parallels_co_create(BlockdevCreateOptions* opts,
 ret = -EPERM;
 goto out;
 }
-blk_set_allow_write_beyond_eof(blk, true);
+blk_allow_write_beyond_eof(blk);

[PATCH 13/15] block: second argument of bdrv_do_drained_end is always NULL

2022-12-12 Thread Paolo Bonzini

Remove it and unify the function with bdrv_drained_end.

Signed-off-by: Paolo Bonzini 
---
 block/io.c | 21 ++---
 1 file changed, 6 insertions(+), 15 deletions(-)

diff --git a/block/io.c b/block/io.c
index 695c8f3f5faa..c2962adf8d2d 100644
--- a/block/io.c
+++ b/block/io.c
@@ -69,14 +69,11 @@ void bdrv_parent_drained_end_single(BdrvChild *c)
 }
 }
 
-static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore)
+static void bdrv_parent_drained_end(BlockDriverState *bs)
 {
 BdrvChild *c;
 
 QLIST_FOREACH(c, &bs->parents, next_parent) {
-if (c == ignore) {
-continue;
-}
 bdrv_parent_drained_end_single(c);
 }
 }
@@ -262,7 +259,6 @@ static bool bdrv_drain_poll_top_level(BlockDriverState *bs,
 
 static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
   bool poll);
-static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent);
 
 static void bdrv_co_drain_bh_cb(void *opaque)
 {
@@ -381,10 +377,11 @@ void bdrv_drained_begin(BlockDriverState *bs)
  * This function does not poll, nor must any of its recursively called
  * functions.
  */
-static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent)
+void bdrv_drained_end(BlockDriverState *bs)
 {
 int old_quiesce_counter;
 
+IO_OR_GS_CODE();
 assert(bs->quiesce_counter > 0);
 
 /* Re-enable things in child-to-parent order */
@@ -393,17 +390,11 @@ static void bdrv_do_drained_end(BlockDriverState *bs, 
BdrvChild *parent)
 if (bs->drv && bs->drv->bdrv_drain_end) {
 bs->drv->bdrv_drain_end(bs);
 }
-bdrv_parent_drained_end(bs, parent);
+bdrv_parent_drained_end(bs);
 aio_enable_external(bdrv_get_aio_context(bs));
 }
 }
 
-void bdrv_drained_end(BlockDriverState *bs)
-{
-IO_OR_GS_CODE();
-bdrv_do_drained_end(bs, NULL);
-}
-
 void bdrv_drain(BlockDriverState *bs)
 {
 IO_OR_GS_CODE();
@@ -504,7 +495,7 @@ void bdrv_drain_all_end_quiesce(BlockDriverState *bs)
 g_assert(!bs->refcnt);
 
 while (bs->quiesce_counter) {
-bdrv_do_drained_end(bs, NULL);
+bdrv_drained_end(bs);
 }
 }
 
@@ -526,7 +517,7 @@ void bdrv_drain_all_end(void)
 AioContext *aio_context = bdrv_get_aio_context(bs);
 
 aio_context_acquire(aio_context);
-bdrv_do_drained_end(bs, NULL);
+bdrv_drained_end(bs);
 aio_context_release(aio_context);
 }
 
-- 
2.38.1

[PATCH 10/15] block-backend: always wait for drain before starting operation

2022-12-12 Thread Paolo Bonzini

All I/O operations call blk_wait_while_drained() immediately after
blk_inc_in_flight(), except for blk_abort_aio_request() where it
does not hurt to add such a call.  Merge the two functions into one,
and add a note about a disturbing lack of thread-safety that will
be fixed shortly.

While at it, make the quiesce_counter check a loop.  While the check
does not have to be "perfect", i.e. it only matters that an endless
stream of I/Os is stopped sooner or later, it is more logical to check
the counter repeatedly until it is zero.

Signed-off-by: Paolo Bonzini 
---
 block/block-backend.c | 27 ---
 1 file changed, 8 insertions(+), 19 deletions(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index fe42d53d655d..627d491d4155 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1270,18 +1270,6 @@ static int blk_check_byte_request(BlockBackend *blk, 
int64_t offset,
 return 0;
 }
 
-/* To be called between exactly one pair of blk_inc/dec_in_flight() */
-static void coroutine_fn blk_wait_while_drained(BlockBackend *blk)
-{
-assert(blk->in_flight > 0);
-
-if (blk->quiesce_counter && !blk->disable_request_queuing) {
-blk_dec_in_flight(blk);
-qemu_co_queue_wait(&blk->queued_requests, NULL);
-blk_inc_in_flight(blk);
-}
-}
-
 /* To be called between exactly one pair of blk_inc/dec_in_flight() */
 static int coroutine_fn
 blk_co_do_preadv_part(BlockBackend *blk, int64_t offset, int64_t bytes,
@@ -1334,7 +1322,6 @@ int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t 
offset,
 IO_OR_GS_CODE();
 
 blk_inc_in_flight(blk);
-blk_wait_while_drained(blk);
 ret = blk_co_do_preadv_part(blk, offset, bytes, qiov, 0, flags);
 blk_dec_in_flight(blk);
 
@@ -1349,7 +1336,6 @@ int coroutine_fn blk_co_preadv_part(BlockBackend *blk, 
int64_t offset,
 IO_OR_GS_CODE();
 
 blk_inc_in_flight(blk);
-blk_wait_while_drained(blk);
 ret = blk_co_do_preadv_part(blk, offset, bytes, qiov, qiov_offset, flags);
 blk_dec_in_flight(blk);
 
@@ -1401,7 +1387,6 @@ int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, 
int64_t offset,
 IO_OR_GS_CODE();
 
 blk_inc_in_flight(blk);
-blk_wait_while_drained(blk);
 ret = blk_co_do_pwritev_part(blk, offset, bytes, qiov, qiov_offset, flags);
 blk_dec_in_flight(blk);
 
@@ -1466,6 +1451,14 @@ void blk_inc_in_flight(BlockBackend *blk)
 {
 IO_CODE();
 qatomic_inc(&blk->in_flight);
+if (!blk->disable_request_queuing) {
+/* TODO: this is not thread-safe! */
+while (blk->quiesce_counter) {
+qatomic_dec(&blk->in_flight);
+qemu_co_queue_wait(&blk->queued_requests, NULL);
+qatomic_inc(&blk->in_flight);
+}
+}
 }
 
 void blk_dec_in_flight(BlockBackend *blk)
@@ -1546,7 +1539,6 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, 
int64_t offset,
 Coroutine *co;
 
 blk_inc_in_flight(blk);
-blk_wait_while_drained(blk);
 acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
 acb->rwco = (BlkRwCo) {
 .blk= blk,
@@ -1685,7 +1677,6 @@ int coroutine_fn blk_co_ioctl(BlockBackend *blk, unsigned 
long int req,
 IO_OR_GS_CODE();
 
 blk_inc_in_flight(blk);
-blk_wait_while_drained(blk);
 ret = blk_co_do_ioctl(blk, req, buf);
 blk_dec_in_flight(blk);
 
@@ -1749,7 +1740,6 @@ int coroutine_fn blk_co_pdiscard(BlockBackend *blk, 
int64_t offset,
 IO_OR_GS_CODE();
 
 blk_inc_in_flight(blk);
-blk_wait_while_drained(blk);
 ret = blk_co_do_pdiscard(blk, offset, bytes);
 blk_dec_in_flight(blk);
 
@@ -1790,7 +1780,6 @@ int coroutine_fn blk_co_flush(BlockBackend *blk)
 IO_OR_GS_CODE();
 
 blk_inc_in_flight(blk);
-blk_wait_while_drained(blk);
 ret = blk_co_do_flush(blk);
 blk_dec_in_flight(blk);
 
-- 
2.38.1

[PATCH 02/15] Revert "block: Don't poll in bdrv_replace_child_noperm()"

2022-12-12 Thread Paolo Bonzini

This reverts commit a4e5c80a45b22359cf9c187f0df4f8544812c55c.

Signed-off-by: Paolo Bonzini 
---
 block.c  | 103 +--
 block/io.c   |   2 +-
 include/block/block-io.h |   8 ---
 tests/unit/test-bdrv-drain.c |  10 
 4 files changed, 15 insertions(+), 108 deletions(-)

diff --git a/block.c b/block.c
index 87022f4cd971..2f2123f4a4e5 100644
--- a/block.c
+++ b/block.c
@@ -2367,20 +2367,6 @@ static void bdrv_replace_child_abort(void *opaque)
 
 GLOBAL_STATE_CODE();
 /* old_bs reference is transparently moved from @s to @s->child */
-if (!s->child->bs) {
-/*
- * The parents were undrained when removing old_bs from the child. New
- * requests can't have been made, though, because the child was empty.
- *
- * TODO Make bdrv_replace_child_noperm() transactionable to avoid
- * undraining the parent in the first place. Once this is done, having
- * new_bs drained when calling bdrv_replace_child_tran() is not a
- * requirement any more.
- */
-bdrv_parent_drained_begin_single(s->child, false);
-assert(!bdrv_parent_drained_poll_single(s->child));
-}
-assert(s->child->quiesced_parent);
 bdrv_replace_child_noperm(s->child, s->old_bs);
 bdrv_unref(new_bs);
 }
@@ -2396,19 +2382,12 @@ static TransactionActionDrv bdrv_replace_child_drv = {
  *
  * Note: real unref of old_bs is done only on commit.
  *
- * Both @child->bs and @new_bs (if non-NULL) must be drained. @new_bs must be
- * kept drained until the transaction is completed.
- *
  * The function doesn't update permissions, caller is responsible for this.
  */
 static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs,
 Transaction *tran)
 {
 BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1);
-
-assert(child->quiesced_parent);
-assert(!new_bs || new_bs->quiesce_counter);
-
 *s = (BdrvReplaceChildState) {
 .child = child,
 .old_bs = child->bs,
@@ -2831,14 +2810,6 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission 
qapi_perm)
 return permissions[qapi_perm];
 }
 
-/*
- * Replaces the node that a BdrvChild points to without updating permissions.
- *
- * If @new_bs is non-NULL, the parent of @child must already be drained through
- * @child.
- *
- * This function does not poll.
- */
 static void bdrv_replace_child_noperm(BdrvChild *child,
   BlockDriverState *new_bs)
 {
@@ -2846,28 +2817,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
 int new_bs_quiesce_counter;
 
 assert(!child->frozen);
-
-/*
- * If we want to change the BdrvChild to point to a drained node as its new
- * child->bs, we need to make sure that its new parent is drained, too. In
- * other words, either child->quiesce_parent must already be true or we 
must
- * be able to set it and keep the parent's quiesce_counter consistent with
- * that, but without polling or starting new requests (this function
- * guarantees that it doesn't poll, and starting new requests would be
- * against the invariants of drain sections).
- *
- * To keep things simple, we pick the first option (child->quiesce_parent
- * must already be true). We also generalise the rule a bit to make it
- * easier to verify in callers and more likely to be covered in test cases:
- * The parent must be quiesced through this child even if new_bs isn't
- * currently drained.
- *
- * The only exception is for callers that always pass new_bs == NULL. In
- * this case, we obviously never need to consider the case of a drained
- * new_bs, so we can keep the callers simpler by allowing them not to drain
- * the parent.
- */
-assert(!new_bs || child->quiesced_parent);
 assert(old_bs != new_bs);
 GLOBAL_STATE_CODE();
 
@@ -2875,6 +2824,15 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
 assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
 }
 
+/*
+ * If the new child node is drained but the old one was not, flush
+ * all outstanding requests to the old child node.
+ */
+new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
+if (new_bs_quiesce_counter && !child->quiesced_parent) {
+bdrv_parent_drained_begin_single(child, true);
+}
+
 if (old_bs) {
 if (child->klass->detach) {
 child->klass->detach(child);
@@ -2894,9 +2852,11 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
 }
 
 /*
- * If the parent was drained through this BdrvChild previously, but new_bs
- * is not drained, allow requests to come in only after the new node has
- * been attached.
+ * If the old child node was drained but the new one is not, allow
+ * requests to come in only after the new node has been atta

[PATCH 00/12] More cleanups and fixes for drain

2022-12-12 Thread Paolo Bonzini

There are a few more lines of code that can be removed around draining
code, but especially the logic can be simplified by removing unnecessary
parameters.

Due to the failure of the block-next branch, the first three patches
drop patches 14+15 of Kevin's drain cleanup series, and then redo
patch 15 in a slightly less satisfactory way that still enables the
remaining cleanups.  These reverts are not supposed to be applied;
either the offending patches are dropped from the branch, or if the
issue is fixed then my first three patches can go away.

The next three are taken from Emanuele's old subtree drain attempt
at removing the AioContext.  The main one is the second, which is needed
to avoid testcase failures, but I included all of them for simplicity.

Patch 7 fixes another latent bug exposed by the later cleanups, and while
looking for a fix I noticed a general lack of thread-safety in BlockBackend's
drain code.  There are some global properties that only need to be documented
and enforced to be set only at creation time (patches 8/9), but also
queued_requests is not protected by any mutex, which is fixed in patch 10.

Finally, patches 11-15 are the actual simplification.

Applies on top of block-next.

Paolo

Emanuele Giuseppe Esposito (3):
  test-bdrv-drain.c: remove test_detach_by_parent_cb()
  tests/unit/test-bdrv-drain.c: graph setup functions can't run in
coroutines
  tests/qemu-iotests/030: test_stream_parallel should use
auto_finalize=False

Paolo Bonzini (12):
  Revert "block: Remove poll parameter from
bdrv_parent_drained_begin_single()"
  Revert "block: Don't poll in bdrv_replace_child_noperm()"
  block: Pull polling out of bdrv_parent_drained_begin_single()
  block-backend: enter aio coroutine only after drain
  nbd: a BlockExport always has a BlockBackend
  block-backend: make global properties write-once
  block-backend: always wait for drain before starting operation
  block-backend: make queued_requests thread-safe
  block: limit bdrv_co_yield_to_drain to drain_begin
  block: second argument of bdrv_do_drained_end is always NULL
  block: second argument of bdrv_do_drained_begin and bdrv_drain_poll is
always NULL
  block: only get out of coroutine context for polling

 block.c   | 109 
 block/block-backend.c |  91 +---
 block/commit.c|   4 +-
 block/export/export.c |   2 +-
 block/io.c| 136 +---
 block/mirror.c|   4 +-
 block/parallels.c |   2 +-
 block/qcow.c  |   2 +-
 block/qcow2.c |   2 +-
 block/qed.c   |   2 +-
 block/stream.c|   4 +-
 block/vdi.c   |   2 +-
 block/vhdx.c  |   2 +-
 block/vmdk.c  |   4 +-
 block/vpc.c   |   2 +-
 include/block/block-io.h  |  29 +-
 include/block/block_int-io.h  |  21 
 include/sysemu/block-backend-io.h |   6 +-
 nbd/server.c  |  15 ++-
 tests/qemu-iotests/030|  12 +--
 tests/unit/test-bdrv-drain.c  | 166 +++---
 tests/unit/test-block-iothread.c  |   2 +-
 22 files changed, 258 insertions(+), 361 deletions(-)

-- 
2.38.1

[PATCH 11/15] block-backend: make queued_requests thread-safe

2022-12-12 Thread Paolo Bonzini

Protect quiesce_counter and queued_requests with a QemuMutex, so that
they are protected from concurrent access in the main thread (for example
blk_root_drained_end() reached from bdrv_drain_all()) and in the iothread
(where any I/O operation will call blk_inc_in_flight()).

Signed-off-by: Paolo Bonzini 
---
 block/block-backend.c | 44 +++
 1 file changed, 36 insertions(+), 8 deletions(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index 627d491d4155..fdf82f1f1599 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -23,6 +23,7 @@
 #include "qapi/error.h"
 #include "qapi/qapi-events-block.h"
 #include "qemu/id.h"
+#include "qemu/thread.h"
 #include "qemu/main-loop.h"
 #include "qemu/option.h"
 #include "trace.h"
@@ -85,6 +86,8 @@ struct BlockBackend {
 NotifierList remove_bs_notifiers, insert_bs_notifiers;
 QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers;
 
+/* Protected by quiesce_lock.  */
+QemuMutex quiesce_lock;
 int quiesce_counter;
 CoQueue queued_requests;
 
@@ -372,6 +375,7 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, 
uint64_t shared_perm)
 
 block_acct_init(&blk->stats);
 
+qemu_mutex_init(&blk->quiesce_lock);
 qemu_co_queue_init(&blk->queued_requests);
 notifier_list_init(&blk->remove_bs_notifiers);
 notifier_list_init(&blk->insert_bs_notifiers);
@@ -490,6 +494,7 @@ static void blk_delete(BlockBackend *blk)
 assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers));
 assert(QLIST_EMPTY(&blk->aio_notifiers));
 QTAILQ_REMOVE(&block_backends, blk, link);
+qemu_mutex_destroy(&blk->quiesce_lock);
 drive_info_del(blk->legacy_dinfo);
 block_acct_cleanup(&blk->stats);
 g_free(blk);
@@ -1451,11 +1456,25 @@ void blk_inc_in_flight(BlockBackend *blk)
 {
 IO_CODE();
 qatomic_inc(&blk->in_flight);
-if (!blk->disable_request_queuing) {
-/* TODO: this is not thread-safe! */
+
+/*
+ * Avoid a continuous stream of requests from AIO callbacks, which
+ * call a user-provided callback while blk->in_flight is elevated,
+ * if the BlockBackend is being quiesced.
+ *
+ * This initial test does not have to be perfect: a race might
+ * cause an extra I/O to be queued, but sooner or later a nonzero
+ * quiesce_counter will be observed.
+ */
+if (!blk->disable_request_queuing && qatomic_read(&blk->quiesce_counter)) {
+/*
+ * ... on the other hand, it is important that the final check and
+* the wait are done under the lock, so that no wakeups are missed.
+ */
+QEMU_LOCK_GUARD(&blk->quiesce_lock);
 while (blk->quiesce_counter) {
 qatomic_dec(&blk->in_flight);
-qemu_co_queue_wait(&blk->queued_requests, NULL);
+qemu_co_queue_wait(&blk->queued_requests, &blk->quiesce_lock);
 qatomic_inc(&blk->in_flight);
 }
 }
@@ -2542,7 +2561,8 @@ static void blk_root_drained_begin(BdrvChild *child)
 BlockBackend *blk = child->opaque;
 ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
 
-if (++blk->quiesce_counter == 1) {
+qatomic_set(&blk->quiesce_counter, blk->quiesce_counter + 1);
+if (blk->quiesce_counter == 1) {
 if (blk->dev_ops && blk->dev_ops->drained_begin) {
 blk->dev_ops->drained_begin(blk->dev_opaque);
 }
@@ -2560,6 +2580,7 @@ static bool blk_root_drained_poll(BdrvChild *child)
 {
 BlockBackend *blk = child->opaque;
 bool busy = false;
+
 assert(blk->quiesce_counter);
 
 if (blk->dev_ops && blk->dev_ops->drained_poll) {
@@ -2576,14 +2597,21 @@ static void blk_root_drained_end(BdrvChild *child)
 assert(blk->public.throttle_group_member.io_limits_disabled);
 qatomic_dec(&blk->public.throttle_group_member.io_limits_disabled);
 
-if (--blk->quiesce_counter == 0) {
+qemu_mutex_lock(&blk->quiesce_lock);
+qatomic_set(&blk->quiesce_counter, blk->quiesce_counter - 1);
+if (blk->quiesce_counter == 0) {
+/* After this point, new I/O will not sleep on queued_requests.  */
+qemu_mutex_unlock(&blk->quiesce_lock);
+
 if (blk->dev_ops && blk->dev_ops->drained_end) {
 blk->dev_ops->drained_end(blk->dev_opaque);
 }
-while (qemu_co_enter_next(&blk->queued_requests, NULL)) {
-/* Resume all queued requests */
-}
+
+/* Now resume all queued requests */
+qemu_mutex_lock(&blk->quiesce_lock);
+qemu_co_enter_all(&blk->queued_requests, &blk->quiesce_lock);
 }
+qemu_mutex_unlock(&blk->quiesce_lock);
 }
 
 bool blk_register_buf(BlockBackend *blk, void *host, size_t size, Error **errp)
-- 
2.38.1

[PATCH 14/15] block: second argument of bdrv_do_drained_begin and bdrv_drain_poll is always NULL

2022-12-12 Thread Paolo Bonzini

Remove it from the functions, from callers/callees such as
bdrv_do_drained_begin_quiesce() and bdrv_drain_poll(), and
from bdrv_co_yield_to_drain() and BdrvCoDrainData.

Signed-off-by: Paolo Bonzini 
---
 block.c  |  4 ++--
 block/io.c   | 49 
 include/block/block-io.h |  7 +++---
 3 files changed, 24 insertions(+), 36 deletions(-)

diff --git a/block.c b/block.c
index c542a0a33358..676bbe0529b0 100644
--- a/block.c
+++ b/block.c
@@ -1186,13 +1186,13 @@ static char *bdrv_child_get_parent_desc(BdrvChild *c)
 static void bdrv_child_cb_drained_begin(BdrvChild *child)
 {
 BlockDriverState *bs = child->opaque;
-bdrv_do_drained_begin_quiesce(bs, NULL);
+bdrv_do_drained_begin_quiesce(bs);
 }
 
 static bool bdrv_child_cb_drained_poll(BdrvChild *child)
 {
 BlockDriverState *bs = child->opaque;
-return bdrv_drain_poll(bs, NULL, false);
+return bdrv_drain_poll(bs, false);
 }
 
 static void bdrv_child_cb_drained_end(BdrvChild *child)
diff --git a/block/io.c b/block/io.c
index c2962adf8d2d..a75f42ee13cb 100644
--- a/block/io.c
+++ b/block/io.c
@@ -45,14 +45,11 @@ static void bdrv_parent_cb_resize(BlockDriverState *bs);
 static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
 int64_t offset, int64_t bytes, BdrvRequestFlags flags);
 
-static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore)
+static void bdrv_parent_drained_begin(BlockDriverState *bs)
 {
 BdrvChild *c, *next;
 
 QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
-if (c == ignore) {
-continue;
-}
 bdrv_parent_drained_begin_single(c);
 }
 }
@@ -86,14 +83,13 @@ bool bdrv_parent_drained_poll_single(BdrvChild *c)
 return false;
 }
 
-static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore,
- bool ignore_bds_parents)
+static bool bdrv_parent_drained_poll(BlockDriverState *bs, bool 
ignore_bds_parents)
 {
 BdrvChild *c, *next;
 bool busy = false;
 
 QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
-if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
+if (ignore_bds_parents && c->klass->parent_is_bds) {
 continue;
 }
 busy |= bdrv_parent_drained_poll_single(c);
@@ -231,16 +227,14 @@ typedef struct {
 BlockDriverState *bs;
 bool done;
 bool poll;
-BdrvChild *parent;
 } BdrvCoDrainData;
 
 /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */
-bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent,
- bool ignore_bds_parents)
+bool bdrv_drain_poll(BlockDriverState *bs, bool ignore_bds_parents)
 {
 IO_OR_GS_CODE();
 
-if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) {
+if (bdrv_parent_drained_poll(bs, ignore_bds_parents)) {
 return true;
 }
 
@@ -251,14 +245,12 @@ bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild 
*ignore_parent,
 return false;
 }
 
-static bool bdrv_drain_poll_top_level(BlockDriverState *bs,
-  BdrvChild *ignore_parent)
+static bool bdrv_drain_poll_top_level(BlockDriverState *bs)
 {
-return bdrv_drain_poll(bs, ignore_parent, false);
+return bdrv_drain_poll(bs, false);
 }
 
-static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
-  bool poll);
+static void bdrv_do_drained_begin(BlockDriverState *bs, bool poll);
 
 static void bdrv_co_drain_bh_cb(void *opaque)
 {
@@ -270,7 +262,7 @@ static void bdrv_co_drain_bh_cb(void *opaque)
 AioContext *ctx = bdrv_get_aio_context(bs);
 aio_context_acquire(ctx);
 bdrv_dec_in_flight(bs);
-bdrv_do_drained_begin(bs, data->parent, data->poll);
+bdrv_do_drained_begin(bs, data->poll);
 aio_context_release(ctx);
 } else {
 bdrv_drain_all_begin();
@@ -281,7 +273,6 @@ static void bdrv_co_drain_bh_cb(void *opaque)
 }
 
 static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
-BdrvChild *parent,
 bool poll)
 {
 BdrvCoDrainData data;
@@ -297,7 +288,6 @@ static void coroutine_fn 
bdrv_co_yield_to_drain(BlockDriverState *bs,
 .co = self,
 .bs = bs,
 .done = false,
-.parent = parent,
 .poll = poll,
 };
 
@@ -329,20 +319,19 @@ static void coroutine_fn 
bdrv_co_yield_to_drain(BlockDriverState *bs,
 }
 }
 
-static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
-  bool poll)
+static void bdrv_do_drained_begin(BlockDriverState *bs, bool poll)
 {
 IO_OR_GS_CODE();
 
 if (qemu_in_coroutine()) {
-bdrv_co_yield_to_drain(bs, parent, poll);
+bdrv_co_yield_to_drain(bs, poll);
 return;
 }
 
 /*

[PATCH 04/15] test-bdrv-drain.c: remove test_detach_by_parent_cb()

2022-12-12 Thread Paolo Bonzini

From: Emanuele Giuseppe Esposito 

This test uses a callback of an I/O function (blk_aio_preadv)
to modify the graph, using bdrv_attach_child.
This is simply not allowed anymore. I/O cannot change the graph.

Before "block/io.c: make bdrv_do_drained_begin_quiesce static
and introduce bdrv_drained_begin_no_poll", the test would simply
be at risk of failure, because if bdrv_replace_child_noperm()
(called to modify the graph) would call a drain,
then one callback of .drained_begin() is bdrv_do_drained_begin_quiesce,
that specifically asserts that we are not in a coroutine.

Now that we fixed the behavior, the drain will invoke a bh in the
main loop, so we don't have such problem. However, this test is still
illegal and fails because we forbid graph changes from I/O paths.

Once we add the required subtree_drains to protect
bdrv_replace_child_noperm(), the key problem in this test is in:

acb = blk_aio_preadv(blk, 0, &qiov, 0, detach_by_parent_aio_cb, NULL);
/* Drain and check the expected result */
bdrv_subtree_drained_begin(parent_b);

because the detach_by_parent_aio_cb calls detach_indirect_bh(), that
modifies the graph and is invoked during bdrv_subtree_drained_begin().
The call stack is the following:
1. blk_aio_preadv() creates a coroutine, increments in_flight counter
and enters the coroutine running blk_aio_read_entry()
2. blk_aio_read_entry() performs the read and then schedules a bh to
   complete (blk_aio_complete)
3. at this point, subtree_drained_begin() kicks in and waits for all
   in_flight requests, polling
4. polling allows the bh to be scheduled, so blk_aio_complete runs
5. blk_aio_complete *first* invokes the callback
   (detach_by_parent_aio_cb) and then decrements the in_flight counter
6. Here we have the problem: detach_by_parent_aio_cb modifies the graph,
   so both bdrv_unref_child() and bdrv_attach_child() will have
   subtree_drains inside. And this causes a deadlock, because the
   nested drain will wait for in_flight counter to go to zero, which
   is only happening once the drain itself finishes.

Different story is test_detach_by_driver_cb(): in this case,
detach_by_parent_aio_cb() does not call detach_indirect_bh(),
but it is instead called as a bh running in the main loop by
detach_by_driver_cb_drained_begin(), the callback for
.drained_begin().

This test was added in 231281ab42 and part of the series
"Drain fixes and cleanups, part 3"
https://lists.nongnu.org/archive/html/qemu-block/2018-05/msg01132.html
but as explained above I believe that it is not valid anymore, and
can be discarded.

Signed-off-by: Emanuele Giuseppe Esposito 
Reviewed-by: Stefan Hajnoczi 
Message-Id: <20220314131854.2202651-8-eespo...@redhat.com>
Signed-off-by: Paolo Bonzini 
---
 tests/unit/test-bdrv-drain.c | 42 +---
 1 file changed, 10 insertions(+), 32 deletions(-)

diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
index 172bc6debc23..3be214f30186 100644
--- a/tests/unit/test-bdrv-drain.c
+++ b/tests/unit/test-bdrv-drain.c
@@ -1106,7 +1106,6 @@ struct detach_by_parent_data {
 BdrvChild *child_b;
 BlockDriverState *c;
 BdrvChild *child_c;
-bool by_parent_cb;
 };
 static struct detach_by_parent_data detach_by_parent_data;
 
@@ -1124,12 +1123,7 @@ static void detach_indirect_bh(void *opaque)
 
 static void detach_by_parent_aio_cb(void *opaque, int ret)
 {
-struct detach_by_parent_data *data = &detach_by_parent_data;
-
 g_assert_cmpint(ret, ==, 0);
-if (data->by_parent_cb) {
-detach_indirect_bh(data);
-}
 }
 
 static void detach_by_driver_cb_drained_begin(BdrvChild *child)
@@ -1148,33 +1142,26 @@ static BdrvChildClass detach_by_driver_cb_class;
  *\ /   \
  * A B C
  *
- * by_parent_cb == true:  Test that parent callbacks don't poll
- *
- * PA has a pending write request whose callback changes the child nodes of
- * PB: It removes B and adds C instead. The subtree of PB is drained, which
- * will indirectly drain the write request, too.
- *
- * by_parent_cb == false: Test that bdrv_drain_invoke() doesn't poll
+ * Test that bdrv_drain_invoke() doesn't poll
  *
  * PA's BdrvChildClass has a .drained_begin callback that schedules a BH
  * that does the same graph change. If bdrv_drain_invoke() calls it, the
  * state is messed up, but if it is only polled in the single
  * BDRV_POLL_WHILE() at the end of the drain, this should work fine.
  */
-static void test_detach_indirect(bool by_parent_cb)
+static void test_detach_indirect(void)
 {
 BlockBackend *blk;
 BlockDriverState *parent_a, *parent_b, *a, *b, *c;
 BdrvChild *child_a, *child_b;
 BlockAIOCB *acb;
+BDRVTestState *s;
 
 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, 0);
 
-if (!by_parent_cb) {
-detach_by_driver_cb_class = child_of_bds;
-detach_by_driver_cb_class.drained_begin =
-detach_by_driver_cb_drained_begin;
-}
+detach_by_driver_cb_class =

[PATCH 01/15] Revert "block: Remove poll parameter from bdrv_parent_drained_begin_single()"

2022-12-12 Thread Paolo Bonzini

This reverts commit dcc5d4bc2abed4268bf31908193c4369e4c9d005.

Signed-off-by: Paolo Bonzini 
---
 block.c  | 4 ++--
 block/io.c   | 8 ++--
 include/block/block-io.h | 5 +++--
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/block.c b/block.c
index 6191ac1f440c..87022f4cd971 100644
--- a/block.c
+++ b/block.c
@@ -2377,7 +2377,7 @@ static void bdrv_replace_child_abort(void *opaque)
  * new_bs drained when calling bdrv_replace_child_tran() is not a
  * requirement any more.
  */
-bdrv_parent_drained_begin_single(s->child);
+bdrv_parent_drained_begin_single(s->child, false);
 assert(!bdrv_parent_drained_poll_single(s->child));
 }
 assert(s->child->quiesced_parent);
@@ -3050,7 +3050,7 @@ static BdrvChild 
*bdrv_attach_child_common(BlockDriverState *child_bs,
  * a problem, we already did this), but it will still poll until the parent
  * is fully quiesced, so it will not be negatively affected either.
  */
-bdrv_parent_drained_begin_single(new_child);
+bdrv_parent_drained_begin_single(new_child, false);
 bdrv_replace_child_noperm(new_child, child_bs);
 
 BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1);
diff --git a/block/io.c b/block/io.c
index fbd9..aee6e70c1496 100644
--- a/block/io.c
+++ b/block/io.c
@@ -53,7 +53,7 @@ static void bdrv_parent_drained_begin(BlockDriverState *bs, 
BdrvChild *ignore)
 if (c == ignore) {
 continue;
 }
-bdrv_parent_drained_begin_single(c);
+bdrv_parent_drained_begin_single(c, false);
 }
 }
 
@@ -105,8 +105,9 @@ static bool bdrv_parent_drained_poll(BlockDriverState *bs, 
BdrvChild *ignore,
 return busy;
 }
 
-void bdrv_parent_drained_begin_single(BdrvChild *c)
+void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
 {
+AioContext *ctx = bdrv_child_get_parent_aio_context(c);
 IO_OR_GS_CODE();
 
 assert(!c->quiesced_parent);
@@ -115,6 +116,9 @@ void bdrv_parent_drained_begin_single(BdrvChild *c)
 if (c->klass->drained_begin) {
 c->klass->drained_begin(c);
 }
+if (poll) {
+AIO_WAIT_WHILE(ctx, bdrv_parent_drained_poll_single(c));
+}
 }
 
 static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src)
diff --git a/include/block/block-io.h b/include/block/block-io.h
index 52869ea08eb5..75d043204355 100644
--- a/include/block/block-io.h
+++ b/include/block/block-io.h
@@ -308,9 +308,10 @@ bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector 
*qiov, int64_t pos);
 /**
  * bdrv_parent_drained_begin_single:
  *
- * Begin a quiesced section for the parent of @c.
+ * Begin a quiesced section for the parent of @c. If @poll is true, wait for
+ * any pending activity to cease.
  */
-void bdrv_parent_drained_begin_single(BdrvChild *c);
+void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll);
 
 /**
  * bdrv_parent_drained_poll_single:
-- 
2.38.1

[PATCH 08/15] nbd: a BlockExport always has a BlockBackend

2022-12-12 Thread Paolo Bonzini

exp->common.blk cannot be NULL, nbd_export_delete() is only called from
blk_exp_unref() and in turn that can only happen after blk_exp_add()
has asserted exp->blk != NULL.

Signed-off-by: Paolo Bonzini 
---
 nbd/server.c | 14 ++
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/nbd/server.c b/nbd/server.c
index c53c39560e50..462ddb0e4adb 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -1848,15 +1848,13 @@ static void nbd_export_delete(BlockExport *blk_exp)
 g_free(exp->description);
 exp->description = NULL;
 
-if (exp->common.blk) {
-if (exp->eject_notifier_blk) {
-notifier_remove(&exp->eject_notifier);
-blk_unref(exp->eject_notifier_blk);
-}
-blk_remove_aio_context_notifier(exp->common.blk, blk_aio_attached,
-blk_aio_detach, exp);
-blk_set_disable_request_queuing(exp->common.blk, false);
+if (exp->eject_notifier_blk) {
+notifier_remove(&exp->eject_notifier);
+blk_unref(exp->eject_notifier_blk);
 }
+blk_remove_aio_context_notifier(exp->common.blk, blk_aio_attached,
+blk_aio_detach, exp);
+blk_set_disable_request_queuing(exp->common.blk, false);
 
 for (i = 0; i < exp->nr_export_bitmaps; i++) {
 bdrv_dirty_bitmap_set_busy(exp->export_bitmaps[i], false);
-- 
2.38.1

[Bug 1523246] Re: Virtio-blk does not support TRIM

2022-12-12 Thread mohammadreza nasrabadi

I've checked the size of disk file with ls -alh  and du --apparent-size
commands

I downloaded the 1G size file 5 times to make sure about sparse blocks
issue, but the problem still exists

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1523246

Title:
  Virtio-blk does not support TRIM

Status in QEMU:
  Fix Released

Bug description:
  When model=virtio is used, TRIM is not supported.

  # mount -o discard /dev/vda4 /mnt
  # mount | tail -1
  /dev/vda4 on /mnt type fuseblk 
(rw,nosuid,nodev,relatime,user_id=0,group_id=0,allow_other,blksize=4096)
  # fstrim /mnt/
  fstrim: /mnt/: the discard operation is not supported

  Booting without model=virtio allows using TRIM (in Windows as well).

  Full QEMU line:

  qemu-system-x86_64 -enable-kvm -cpu host -bios
  /usr/share/ovmf/ovmf_x64.bin -smp 2 -m 7G -vga qxl -usbdevice tablet
  -net nic,model=virtio -net user -drive discard=unmap,detect-
  zeroes=unmap,cache=none,file=vms/win10.hd.img.vmdk,format=vmdk,if=virtio

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1523246/+subscriptions

1 2 3 >

1 - 100 of 282 matches

Mail list logo