Re: [PATCH v1 05/24] vfio-user: add device IO ops vector
On 11/9/22 00:13, John Johnson wrote: Used for communication with VFIO driver (prep work for vfio-user, which will communicate over a socket) Signed-off-by: John G Johnson I would keep the short rw handling for a subsequent patch to make sure the changes do not introduce any functional change. --- hw/vfio/ap.c | 1 + hw/vfio/ccw.c | 1 + hw/vfio/common.c | 107 +++- hw/vfio/pci.c | 140 ++ hw/vfio/platform.c| 1 + include/hw/vfio/vfio-common.h | 27 6 files changed, 209 insertions(+), 68 deletions(-) diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c index e0dd561..7ef42f1 100644 --- a/hw/vfio/ap.c +++ b/hw/vfio/ap.c @@ -102,6 +102,7 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp) mdevid = basename(vapdev->vdev.sysfsdev); vapdev->vdev.name = g_strdup_printf("%s", mdevid); vapdev->vdev.dev = dev; +vapdev->vdev.io_ops = &vfio_dev_io_ioctl; /* * vfio-ap devices operate in a way compatible with discarding of diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c index 06b588c..cbd1c25 100644 --- a/hw/vfio/ccw.c +++ b/hw/vfio/ccw.c @@ -614,6 +614,7 @@ static void vfio_ccw_get_device(VFIOGroup *group, VFIOCCWDevice *vcdev, vcdev->vdev.type = VFIO_DEVICE_TYPE_CCW; vcdev->vdev.name = name; vcdev->vdev.dev = &vcdev->cdev.parent_obj.parent_obj; +vcdev->vdev.io_ops = &vfio_dev_io_ioctl; return; diff --git a/hw/vfio/common.c b/hw/vfio/common.c index dd9104f..c7bf0aa 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -71,7 +71,7 @@ void vfio_disable_irqindex(VFIODevice *vbasedev, int index) .count = 0, }; -ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); +VDEV_SET_IRQS(vbasedev, &irq_set); } void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index) @@ -84,7 +84,7 @@ void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index) .count = 1, }; -ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); +VDEV_SET_IRQS(vbasedev, &irq_set); } void vfio_mask_single_irqindex(VFIODevice *vbasedev, int index) @@ -97,7 +97,7 @@ void vfio_mask_single_irqindex(VFIODevice *vbasedev, int index) .count = 1, }; -ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); +VDEV_SET_IRQS(vbasedev, &irq_set); } static inline const char *action_to_str(int action) @@ -178,9 +178,7 @@ int vfio_set_irq_signaling(VFIODevice *vbasedev, int index, int subindex, pfd = (int32_t *)&irq_set->data; *pfd = fd; -if (ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set)) { -ret = -errno; -} +ret = VDEV_SET_IRQS(vbasedev, irq_set); g_free(irq_set); if (!ret) { @@ -215,6 +213,7 @@ void vfio_region_write(void *opaque, hwaddr addr, uint32_t dword; uint64_t qword; } buf; +int ret; switch (size) { case 1: @@ -234,13 +233,15 @@ void vfio_region_write(void *opaque, hwaddr addr, break; } -if (pwrite(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) { +ret = VDEV_REGION_WRITE(vbasedev, region->nr, addr, size, &buf); +if (ret != size) { +const char *err = ret < 0 ? strerror(-ret) : "short write"; + error_report("%s(%s:region%d+0x%"HWADDR_PRIx", 0x%"PRIx64 - ",%d) failed: %m", + ",%d) failed: %s", __func__, vbasedev->name, region->nr, - addr, data, size); + addr, data, size, err); } - trace_vfio_region_write(vbasedev->name, region->nr, addr, data, size); /* @@ -266,13 +267,18 @@ uint64_t vfio_region_read(void *opaque, uint64_t qword; } buf; uint64_t data = 0; +int ret; + +ret = VDEV_REGION_READ(vbasedev, region->nr, addr, size, &buf); +if (ret != size) { +const char *err = ret < 0 ? strerror(-ret) : "short read"; -if (pread(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) { -error_report("%s(%s:region%d+0x%"HWADDR_PRIx", %d) failed: %m", +error_report("%s(%s:region%d+0x%"HWADDR_PRIx", %d) failed: %s", __func__, vbasedev->name, region->nr, - addr, size); + addr, size, err); return (uint64_t)-1; } + switch (size) { case 1: data = buf.byte; @@ -2450,6 +2456,7 @@ int vfio_get_region_info(VFIODevice *vbasedev, int index, struct vfio_region_info **info) { size_t argsz = sizeof(struct vfio_region_info); +int ret; /* create region cache */ if (vbasedev->regions == NULL) { @@ -2468,10 +2475,11 @@ int vfio_get_region_info(VFIODevice *vbasedev, int index, retry: (*info)->argsz = argsz; -if (ioctl(v
Re: [PATCH-for-8.0 1/7] hw/mips/Kconfig: Introduce CONFIG_GT64120 to select gt64xxx_pci.c
On 12/12/22 01:13, Bernhard Beschow wrote: Am 9. Dezember 2022 15:15:27 UTC schrieb "Philippe Mathieu-Daudé" : From: Philippe Mathieu-Daudé Signed-off-by: Philippe Mathieu-Daudé --- hw/mips/Kconfig | 6 ++ hw/mips/meson.build | 3 ++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/hw/mips/Kconfig b/hw/mips/Kconfig index 725525358d..d6bbbe7069 100644 --- a/hw/mips/Kconfig +++ b/hw/mips/Kconfig @@ -1,5 +1,6 @@ config MALTA bool +select GT64120 select ISA_SUPERIO config MIPSSIM @@ -59,3 +60,8 @@ config MIPS_BOSTON config FW_CFG_MIPS bool + +config GT64120 +bool +select PCI +select I8259 s/select I8259/depends on I8259/ since the model needs but doesn't provide I8259? Then just take my mail regarding the last patch as a reminder. I try to remember the 'depends on' directive as "depends on BUS". If there is no BUS, then the option is pointless. Here "select PCI" means 'provide/expose a PCI bus on the machine'. I8259 must be available for GT64120 to be working. If you need a GT64120, its 'select' directive will select the minimum options required, so it will implicitly select a I8259. See docs/devel/kconfig.rst: **dependencies**: ``depends on `` This defines a dependency for this configurable element. Dependencies evaluate an expression and force the value of the variable to false if the expression is false. **reverse dependencies**: ``select [if ]`` While ``depends on`` can force a symbol to false, reverse dependencies can be used to force another symbol to true. **devices** Example:: config MEGASAS_SCSI_PCI bool default y if PCI_DEVICES depends on PCI select SCSI Devices are the most complex of the five. They can have a variety of directives that cooperate so that a default configuration includes all the devices that can be accessed from QEMU. Devices *depend on* the bus that they lie on, for example a PCI device would specify ``depends on PCI``. An MMIO device will likely have no ``depends on`` directive. Devices also *select* the buses that the device provides, for example a SCSI adapter would specify ``select SCSI``. Finally, devices are usually ``default y`` if and only if they have at least one ``depends on``; the default could be conditional on a device group. Otherwise: Reviewed-by: Bernhard Beschow Thanks!
Re: [PATCH v3] hw/rtc/mc146818rtc: Make this rtc device target independent
On 12/12/22 08:56, Thomas Huth wrote: The only reason for this code being target dependent is the apic-related code in rtc_policy_slew_deliver_irq(). Since these apic functions are rather simple, we can easily move them into a new, separate file (apic_irqcount.c) which will always be compiled and linked if either APIC or the mc146818 device are required. This way we can get rid of the #ifdef TARGET_I386 switches in mc146818rtc.c and declare it in the softmmu_ss instead of specific_ss, so that the code only gets compiled once for all targets. Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Mark Cave-Ayland Signed-off-by: Thomas Huth --- v3: Move TYPE_APIC_COMMON from apic_internal.h to apic.h and use it Ah, clever. include/hw/i386/apic.h | 2 ++ include/hw/i386/apic_internal.h | 2 -- include/hw/rtc/mc146818rtc.h| 1 + hw/intc/apic_common.c | 27 - hw/intc/apic_irqcount.c | 53 + hw/rtc/mc146818rtc.c| 25 +--- hw/intc/meson.build | 6 +++- hw/rtc/meson.build | 3 +- 8 files changed, 69 insertions(+), 50 deletions(-) create mode 100644 hw/intc/apic_irqcount.c
Re: [PATCH 06/30] meson: tweak hardening options for Windows
Hi On Fri, Dec 9, 2022 at 3:36 PM Paolo Bonzini wrote: > > -Wl,--dynamicbase has been enabled for DLLs upstream for roughly 2 > years (https://sourceware.org/bugzilla/show_bug.cgi?id=19011), and > also by some distros including Debian for 6 years even > (https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=836365), so > just enable it unconditionally. > > Also add -Wl,--high-entropy-va. > > Signed-off-by: Paolo Bonzini > --- > meson.build | 5 + > 1 file changed, 1 insertion(+), 4 deletions(-) > > diff --git a/meson.build b/meson.build > index 5c6b5a1c757f..d61c7a82f112 100644 > --- a/meson.build > +++ b/meson.build > @@ -193,10 +193,7 @@ qemu_ldflags += > cc.get_supported_link_arguments('-Wl,-z,relro', '-Wl,-z,now') > > if targetos == 'windows' >qemu_ldflags += cc.get_supported_link_arguments('-Wl,--no-seh', > '-Wl,--nxcompat') > - # Disable ASLR for debug builds to allow debugging with gdb > - if get_option('optimization') == '0' > -qemu_ldflags += cc.get_supported_link_arguments('-Wl,--dynamicbase') > - endif > + qemu_ldflags += cc.get_supported_link_arguments('-Wl,--dynamicbase', > '-Wl,--high-entropy-va') What about the comment for disabling ASLR on debug builds? I wonder if we really have to add those flags ourself. Imho, we can leave them to the compiler default or distrib.. I bet most of the deps don't use those flags explicitly either. -- Marc-André Lureau
Re: [PATCH 07/30] meson: support meson 0.64 -Doptimization=plain
Hi On Fri, Dec 9, 2022 at 3:31 PM Paolo Bonzini wrote: > > In Meson 0.64, the optimization built-in option now accepts the "plain" value, > which will not set any optimization flags. While QEMU does not check the > contents of the option and therefore does not suffer any ill effect > from the new value, it uses get_option to print the optimization flags > in the summary. Clean the code up to remove duplication, and check for > -Doptimization=plain at the same time. > > Signed-off-by: Paolo Bonzini Reviewed-by: Marc-André Lureau > --- > meson.build | 16 +++- > 1 file changed, 7 insertions(+), 9 deletions(-) > > diff --git a/meson.build b/meson.build > index d61c7a82f112..dbd0b5563446 100644 > --- a/meson.build > +++ b/meson.build > @@ -3752,18 +3752,16 @@ endif > if targetos == 'darwin' >summary_info += {'Objective-C compiler': ' > '.join(meson.get_compiler('objc').cmd_array())} > endif > -summary_info += {'CFLAGS':' '.join(get_option('c_args') > - + ['-O' + > get_option('optimization')] > - + (get_option('debug') ? > ['-g'] : []))} > +option_cflags = (get_option('debug') ? ['-g'] : []) > +if get_option('optimization') != 'plain' > + option_cflags += ['-O' + get_option('optimization')] > +endif > +summary_info += {'CFLAGS':' '.join(get_option('c_args') + > option_cflags)} > if link_language == 'cpp' > - summary_info += {'CXXFLAGS':' '.join(get_option('cpp_args') > - + ['-O' + > get_option('optimization')] > - + (get_option('debug') ? > ['-g'] : []))} > + summary_info += {'CXXFLAGS':' '.join(get_option('cpp_args') + > option_cflags)} > endif > if targetos == 'darwin' > - summary_info += {'OBJCFLAGS': ' '.join(get_option('objc_args') > - + ['-O' + > get_option('optimization')] > - + (get_option('debug') ? > ['-g'] : []))} > + summary_info += {'OBJCFLAGS': ' '.join(get_option('objc_args') + > option_cflags)} > endif > link_args = get_option(link_language + '_link_args') > if link_args.length() > 0 > -- > 2.38.1 > > -- Marc-André Lureau
Re: [PATCH 09/30] meson: use prefer_static option
On Fri, Dec 9, 2022 at 3:42 PM Paolo Bonzini wrote: > > The option is new in Meson 0.63 and removes the need to pass "static: > true" to all dependency and find_library invocation. Actually cleaning > up the invocations is left for a separate patch. > > Signed-off-by: Paolo Bonzini Reviewed-by: Marc-André Lureau > --- > configure | 4 +--- > docs/devel/build-system.rst | 3 +-- > meson.build | 11 --- > qga/meson.build | 2 +- > 4 files changed, 7 insertions(+), 13 deletions(-) > > diff --git a/configure b/configure > index 411dfe977958..6efc2055ce09 100755 > --- a/configure > +++ b/configure > @@ -2315,9 +2315,6 @@ fi > if test "$solaris" = "yes" ; then >echo "CONFIG_SOLARIS=y" >> $config_host_mak > fi > -if test "$static" = "yes" ; then > - echo "CONFIG_STATIC=y" >> $config_host_mak > -fi > echo "SRC_PATH=$source_path" >> $config_host_mak > echo "TARGET_DIRS=$target_list" >> $config_host_mak > if test "$modules" = "yes"; then > @@ -2540,6 +2537,7 @@ if test "$skip_meson" = no; then ># Built-in options >test "$bindir" != "bin" && meson_option_add "-Dbindir=$bindir" >test "$default_feature" = no && meson_option_add -Dauto_features=disabled > + test "$static" = yes && meson_option_add -Dprefer_static=true >test "$pie" = no && meson_option_add -Db_pie=false >test "$werror" = yes && meson_option_add -Dwerror=true > > diff --git a/docs/devel/build-system.rst b/docs/devel/build-system.rst > index 189472174340..9db18aff159e 100644 > --- a/docs/devel/build-system.rst > +++ b/docs/devel/build-system.rst > @@ -311,8 +311,7 @@ dependency will be used:: >sdl_image = not_found >if not get_option('sdl_image').auto() or have_system > sdl_image = dependency('SDL2_image', required: get_option('sdl_image'), > - method: 'pkg-config', > - static: enable_static) > + method: 'pkg-config') >endif > > This avoids warnings on static builds of user-mode emulators, for example. > diff --git a/meson.build b/meson.build > index 19b023985325..dced840bfbee 100644 > --- a/meson.build > +++ b/meson.build > @@ -18,10 +18,7 @@ sh = find_program('sh') > cc = meson.get_compiler('c') > config_host = keyval.load(meson.current_build_dir() / 'config-host.mak') > enable_modules = 'CONFIG_MODULES' in config_host > -enable_static = 'CONFIG_STATIC' in config_host > - > -# Allow both shared and static libraries unless --enable-static > -static_kwargs = enable_static ? {'static': true} : {} > +static_kwargs = {} > > # Temporary directory used for files created while > # configure runs. Since it is in the build directory > @@ -183,7 +180,7 @@ qemu_cflags = config_host['QEMU_CFLAGS'].split() > qemu_objcflags = config_host['QEMU_OBJCFLAGS'].split() > qemu_ldflags = config_host['QEMU_LDFLAGS'].split() > > -if enable_static > +if get_option('prefer_static') >qemu_ldflags += get_option('b_pie') ? '-static-pie' : '-static' > endif > > @@ -830,7 +827,7 @@ if targetos == 'linux' and have_tools and > get_option('mpath').allowed() > kwargs: static_kwargs) >if libmpathpersist.found() > mpathlibs += libmpathpersist > -if enable_static > +if get_option('prefer_static') >mpathlibs += cc.find_library('devmapper', > required: get_option('mpath'), > kwargs: static_kwargs) > @@ -1214,7 +1211,7 @@ if not gnutls_crypto.found() > # Debian has removed -lgpg-error from libgcrypt-config > # as it "spreads unnecessary dependencies" which in > # turn breaks static builds... > -if gcrypt.found() and enable_static > +if gcrypt.found() and get_option('prefer_static') >gcrypt = declare_dependency(dependencies: [ > gcrypt, > cc.find_library('gpg-error', required: true, kwargs: static_kwargs)]) > diff --git a/qga/meson.build b/qga/meson.build > index 3cfb9166e5d8..ec67326b25f3 100644 > --- a/qga/meson.build > +++ b/qga/meson.build > @@ -22,7 +22,7 @@ have_qga_vss = get_option('qga_vss') \ > Then run configure with: --extra-cxxflags="-isystem > /path/to/vss/inc/win2003"''') \ >.require(midl.found() or widl.found(), > error_message: 'VSS support requires midl or widl') \ > - .require(not enable_static, > + .require(not get_option('prefer_static'), > error_message: 'VSS support requires dynamic linking with GLib') \ >.allowed() > > -- > 2.38.1 > > -- Marc-André Lureau
Re: [PATCH 10/30] meson: remove static_kwargs
On Fri, Dec 9, 2022 at 3:31 PM Paolo Bonzini wrote: > > After static_kwargs has been changed to an empty dictionary, it has > no functional effect and can be removed. > > Signed-off-by: Paolo Bonzini Reviewed-by: Marc-André Lureau > --- > meson.build | 212 +++- > tcg/meson.build | 2 +- > 2 files changed, 84 insertions(+), 130 deletions(-) > > diff --git a/meson.build b/meson.build > index dced840bfbee..9ccbe0f6e4ee 100644 > --- a/meson.build > +++ b/meson.build > @@ -18,7 +18,6 @@ sh = find_program('sh') > cc = meson.get_compiler('c') > config_host = keyval.load(meson.current_build_dir() / 'config-host.mak') > enable_modules = 'CONFIG_MODULES' in config_host > -static_kwargs = {} > > # Temporary directory used for files created while > # configure runs. Since it is in the build directory > @@ -510,7 +509,7 @@ gdbus_codegen = not_found > gdbus_codegen_error = '@0@ requires gdbus-codegen, please install libgio' > if not get_option('gio').auto() or have_system >gio = dependency('gio-2.0', required: get_option('gio'), > - method: 'pkg-config', kwargs: static_kwargs) > + method: 'pkg-config') >if gio.found() and not cc.links(''' > #include > int main(void) > @@ -527,7 +526,7 @@ if not get_option('gio').auto() or have_system > gdbus_codegen = find_program(gio.get_variable('gdbus_codegen'), > required: get_option('gio')) > gio_unix = dependency('gio-unix-2.0', required: get_option('gio'), > - method: 'pkg-config', kwargs: static_kwargs) > + method: 'pkg-config') > gio = declare_dependency(dependencies: [gio, gio_unix], > version: gio.version()) >endif > @@ -540,20 +539,19 @@ endif > lttng = not_found > if 'ust' in get_option('trace_backends') >lttng = dependency('lttng-ust', required: true, version: '>= 2.1', > - method: 'pkg-config', kwargs: static_kwargs) > + method: 'pkg-config') > endif > pixman = not_found > if have_system or have_tools >pixman = dependency('pixman-1', required: have_system, version:'>=0.21.8', > - method: 'pkg-config', kwargs: static_kwargs) > + method: 'pkg-config') > endif > -zlib = dependency('zlib', required: true, kwargs: static_kwargs) > +zlib = dependency('zlib', required: true) > > libaio = not_found > if not get_option('linux_aio').auto() or have_block >libaio = cc.find_library('aio', has_headers: ['libaio.h'], > - required: get_option('linux_aio'), > - kwargs: static_kwargs) > + required: get_option('linux_aio')) > endif > > linux_io_uring_test = ''' > @@ -566,7 +564,7 @@ linux_io_uring = not_found > if not get_option('linux_io_uring').auto() or have_block >linux_io_uring = dependency('liburing', version: '>=0.3', >required: get_option('linux_io_uring'), > - method: 'pkg-config', kwargs: static_kwargs) > + method: 'pkg-config') >if not cc.links(linux_io_uring_test) > linux_io_uring = not_found >endif > @@ -576,7 +574,7 @@ libnfs = not_found > if not get_option('libnfs').auto() or have_block >libnfs = dependency('libnfs', version: '>=1.9.3', >required: get_option('libnfs'), > - method: 'pkg-config', kwargs: static_kwargs) > + method: 'pkg-config') > endif > > libattr_test = ''' > @@ -596,8 +594,7 @@ if get_option('attr').allowed() > libattr = declare_dependency() >else > libattr = cc.find_library('attr', has_headers: ['attr/xattr.h'], > - required: get_option('attr'), > - kwargs: static_kwargs) > + required: get_option('attr')) > if libattr.found() and not \ >cc.links(libattr_test, dependencies: libattr, args: '-DCONFIG_LIBATTR') >libattr = not_found > @@ -632,7 +629,7 @@ seccomp_has_sysrawrc = false > if not get_option('seccomp').auto() or have_system or have_tools >seccomp = dependency('libseccomp', version: '>=2.3.0', > required: get_option('seccomp'), > - method: 'pkg-config', kwargs: static_kwargs) > + method: 'pkg-config') >if seccomp.found() > seccomp_has_sysrawrc = cc.has_header_symbol('seccomp.h', > 'SCMP_FLTATR_API_SYSRAWRC', > @@ -643,8 +640,7 @@ endif > libcap_ng = not_found > if not get_option('cap_ng').auto() or have_system or have_tools >libcap_ng = cc.find_library('cap-ng', has_headers: ['cap-ng.h'], > - required: get_option('cap_ng'), > - kwargs: st
Re: [PATCH 0/3] s390x/pci: rpcit fixes and enhancements
On 28/10/2022 21.47, Matthew Rosato wrote: The following series fixes an issue with guest RPCIT processing discovered during development of [1] as well as proposes a few additional optimizations to the current RPCIT codepath. [1] https://lore.kernel.org/linux-s390/20221019144435.369902-1-schne...@linux.ibm.com/ Matthew Rosato (3): s390x/pci: RPCIT second pass when mappings exhausted s390x/pci: coalesce unmap operations s390x/pci: shrink DMA aperture to be bound by vfio DMA limit Thanks, I've queued patch 2 and 3 now to my s390x-next branch, too: https://gitlab.com/thuth/qemu/-/commits/s390x-next/ Thomas
Re: [PATCH 11/30] meson: cleanup dummy-cpus.c rules
Hi On Fri, Dec 9, 2022 at 3:48 PM Paolo Bonzini wrote: > > Now that qtest is available on all targets including Windows, dummy-cpus.c > is included unconditionally in the build. > > Signed-off-by: Paolo Bonzini (and doesn't have to be target specific) Reviewed-by: Marc-André Lureau > --- > accel/meson.build | 9 ++--- > 1 file changed, 2 insertions(+), 7 deletions(-) > > diff --git a/accel/meson.build b/accel/meson.build > index 259c35c4c882..3a480cc2efef 100644 > --- a/accel/meson.build > +++ b/accel/meson.build > @@ -11,10 +11,5 @@ if have_system >subdir('stubs') > endif > > -dummy_ss = ss.source_set() > -dummy_ss.add(files( > - 'dummy-cpus.c', > -)) > - > -specific_ss.add_all(when: ['CONFIG_SOFTMMU'], if_true: dummy_ss) > -specific_ss.add_all(when: ['CONFIG_XEN'], if_true: dummy_ss) > +# qtest > +softmmu_ss.add(files('dummy-cpus.c')) > -- > 2.38.1 > > -- Marc-André Lureau
Re: [PATCH 12/30] modinfo: lookup compile_commands.json by object
On Fri, Dec 9, 2022 at 3:43 PM Paolo Bonzini wrote: > > With Meson 0.63 having fixed various issues with extract_objects, the > compile_commands.json lookups can be simplified. If the lookup uses > the object file as key, there is no need to use the command line to > distinguish among all entries for a given source. > > Cc: Gerd Hoffmann > Signed-off-by: Paolo Bonzini Reviewed-by: Marc-André Lureau > --- > meson.build| 14 -- > scripts/modinfo-collect.py | 23 +++ > 2 files changed, 15 insertions(+), 22 deletions(-) > > diff --git a/meson.build b/meson.build > index 9ccbe0f6e4ee..8a9ed5628317 100644 > --- a/meson.build > +++ b/meson.build > @@ -3123,16 +3123,11 @@ foreach d, list : modules > softmmu_mods += sl >endif >if module_ss.sources() != [] > -# FIXME: Should use sl.extract_all_objects(recursive: true) as > -# input. Sources can be used multiple times but objects are > -# unique when it comes to lookup in compile_commands.json. > -# Depnds on a mesion version with > -# https://github.com/mesonbuild/meson/pull/8900 > modinfo_files += custom_target(d + '-' + m + '.modinfo', > output: d + '-' + m + '.modinfo', > - input: module_ss.sources() + genh, > + input: > sl.extract_all_objects(recursive: true), > capture: true, > - command: [modinfo_collect, > module_ss.sources()]) > + command: [modinfo_collect, '@INPUT@']) >endif > else >if d == 'block' > @@ -3165,12 +3160,11 @@ foreach d, list : target_modules > c_args: c_args, > pic: true) > softmmu_mods += sl > -# FIXME: Should use sl.extract_all_objects(recursive: true) too. > modinfo_files += custom_target(module_name + '.modinfo', > output: module_name + '.modinfo', > - input: target_module_ss.sources() > + genh, > + input: > sl.extract_all_objects(recursive: true), > capture: true, > - command: [modinfo_collect, > '--target', target, target_module_ss.sources()]) > + command: [modinfo_collect, > '--target', target, '@INPUT@']) >endif > endif >endforeach > diff --git a/scripts/modinfo-collect.py b/scripts/modinfo-collect.py > index 4e7584df6676..48bd92bd6180 100755 > --- a/scripts/modinfo-collect.py > +++ b/scripts/modinfo-collect.py > @@ -7,15 +7,6 @@ > import shlex > import subprocess > > -def find_command(src, target, compile_commands): > -for command in compile_commands: > -if command['file'] != src: > -continue > -if target != '' and command['command'].find(target) == -1: > -continue > -return command['command'] > -return 'false' > - > def process_command(src, command): > skip = False > out = [] > @@ -43,14 +34,22 @@ def main(args): > print("MODINFO_DEBUG target %s" % target) > arch = target[:-8] # cut '-softmmu' > print("MODINFO_START arch \"%s\" MODINFO_END" % arch) > + > with open('compile_commands.json') as f: > -compile_commands = json.load(f) > -for src in args: > +compile_commands_json = json.load(f) > +compile_commands = { x['output']: x for x in compile_commands_json } > + > +for obj in args: > +entry = compile_commands.get(obj, None) > +if not entry: > +sys.stderr.print('modinfo: Could not find object file', obj) > +sys.exit(1) > +src = entry['file'] > if not src.endswith('.c'): > print("MODINFO_DEBUG skip %s" % src) > continue > +command = entry['command'] > print("MODINFO_DEBUG src %s" % src) > -command = find_command(src, target, compile_commands) > cmdline = process_command(src, command) > print("MODINFO_DEBUG cmd", cmdline) > result = subprocess.run(cmdline, stdout = subprocess.PIPE, > -- > 2.38.1 > > -- Marc-André Lureau
Re: [PATCH 06/30] meson: tweak hardening options for Windows
On 12/12/22 09:18, Marc-André Lureau wrote: - # Disable ASLR for debug builds to allow debugging with gdb - if get_option('optimization') == '0' -qemu_ldflags += cc.get_supported_link_arguments('-Wl,--dynamicbase') - endif + qemu_ldflags += cc.get_supported_link_arguments('-Wl,--dynamicbase', '-Wl,--high-entropy-va') What about the comment for disabling ASLR on debug builds? The problem with that line is that it _enables_ ASLR for debug builds, and nobody has complained about gdb since last April... And nobody has complained to Debian or other distros that have enabled the flag for years now. I'll clarify the commit message. I wonder if we really have to add those flags ourself. Imho, we can leave them to the compiler default or distrib.. I bet most of the deps don't use those flags explicitly either. I think so, at least Firefox does. In general QEMU tries to do more build-time hardening than the average pacakge. Paolo
Re: [PATCH v13 0/7] s390x: CPU Topology
On 12/9/22 14:32, Thomas Huth wrote: On 08/12/2022 10.44, Pierre Morel wrote: Hi, Implementation discussions == CPU models -- Since the S390_FEAT_CONFIGURATION_TOPOLOGY is already in the CPU model for old QEMU we could not activate it as usual from KVM but needed a KVM capability: KVM_CAP_S390_CPU_TOPOLOGY. Checking and enabling this capability enables S390_FEAT_CONFIGURATION_TOPOLOGY. Migration - Once the S390_FEAT_CONFIGURATION_TOPOLOGY is enabled in the source host the STFL(11) is provided to the guest. Since the feature is already in the CPU model of older QEMU, a migration from a new QEMU enabling the topology to an old QEMU will keep STFL(11) enabled making the guest get an exception for illegal operation as soon as it uses the PTF instruction. I now thought that it is not possible to enable "ctop" on older QEMUs since the don't enable the KVM capability? ... or is it still somehow possible? What did I miss? Thomas Enabling ctop with ctop=on on old QEMU is not possible, this is right. But, if STFL(11) is enable in the source KVM by a new QEMU, I can see that even with -ctop=off the STFL(11) is migrated to the destination. It is highly possible that I missed something in the cpu model. A solution proposed by Cedric was to add a new machine but we did not want this because we decided that we do not want to wait for a new machine. Another solution could be to have a we can have a new CPU feature overruling ctop like S390_FEAT_CPU_TOPOLOGY in the last series version 12. I am not sure it must be linked with the creation of a new machine. The solution here in this series is to add a VMState which will block the migration with older QEMU if the topology is activated with ctop on a new QEMU. Regards, Pierre A VMState keeping track of the S390_FEAT_CONFIGURATION_TOPOLOGY allows to forbid the migration in such a case. Note that the VMState will be used to hold information on the topology once we implement topology change for a running guest. -- Pierre Morel IBM Lab Boeblingen
Re: [PATCH v13 1/7] s390x/cpu topology: Creating CPU topology device
On 12/9/22 14:50, Thomas Huth wrote: On 08/12/2022 10.44, Pierre Morel wrote: We will need a Topology device to transfer the topology during migration and to implement machine reset. The device creation is fenced by s390_has_topology(). Signed-off-by: Pierre Morel --- [...] diff --git a/hw/s390x/cpu-topology.c b/hw/s390x/cpu-topology.c new file mode 100644 index 00..b3e59873f6 --- /dev/null +++ b/hw/s390x/cpu-topology.c @@ -0,0 +1,149 @@ +/* + * CPU Topology + * + * Copyright IBM Corp. 2022 + * Author(s): Pierre Morel + + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "hw/qdev-properties.h" +#include "hw/boards.h" +#include "qemu/typedefs.h" +#include "target/s390x/cpu.h" +#include "hw/s390x/s390-virtio-ccw.h" +#include "hw/s390x/cpu-topology.h" + +/** + * s390_has_topology + * + * Return false until the commit activating the topology is + * commited. + */ +bool s390_has_topology(void) +{ + return false; +} + +/** + * s390_get_topology + * + * Returns a pointer to the topology. + * + * This function is called when we know the topology exist. + * Testing if the topology exist is done with s390_has_topology() + */ +S390Topology *s390_get_topology(void) +{ + static S390Topology *s390Topology; + + if (!s390Topology) { + s390Topology = S390_CPU_TOPOLOGY( + object_resolve_path(TYPE_S390_CPU_TOPOLOGY, NULL)); + } + + assert(s390Topology); I think you can move the assert() into the body of the if-statement. Yes, clearly. + return s390Topology; +} + +/** + * s390_init_topology + * @machine: The Machine state, used to retrieve the SMP parameters + * @errp: the error pointer in case of problem + * + * This function creates and initialize the S390Topology with + * the QEMU -smp parameters we will use during adding cores to the + * topology. + */ +void s390_init_topology(MachineState *machine, Error **errp) +{ + DeviceState *dev; + + if (machine->smp.threads > 1) { + error_setg(errp, "CPU Topology do not support multithreading"); s/CPU Toplogy do/CPU topology does/ Yes, thanks. Regards, Pierre -- Pierre Morel IBM Lab Boeblingen
Re: [PATCH 13/30] configure: remove backwards-compatibility code
Hi On Fri, Dec 9, 2022 at 3:28 PM Paolo Bonzini wrote: > > The cmd_line.txt mangling is only needed when rebuilding from very old > trees and is kept mostly as an example of how to extend it. However, > Meson 0.63 introduces a deprecation mechanism for meson_options.txt > that can be used instead, so get rid of our home-grown hack. For the curious https://mesonbuild.com/Release-notes-for-0-63-0.html#deprecate-an-option-and-replace-it-with-a-new-one > > Signed-off-by: Paolo Bonzini Reviewed-by: Marc-André Lureau > --- > configure | 10 -- > 1 file changed, 10 deletions(-) > > diff --git a/configure b/configure > index 6efc2055ce09..113db838a16f 100755 > --- a/configure > +++ b/configure > @@ -2556,16 +2556,6 @@ if test "$skip_meson" = no; then >if test "$?" -ne 0 ; then >error_exit "meson setup failed" >fi > -else > - if test -f meson-private/cmd_line.txt; then > -# Adjust old command line options whose type was changed > -# Avoids having to use "setup --wipe" when Meson is upgraded > -perl -i -ne ' > - s/^gettext = true$/gettext = auto/; > - s/^gettext = false$/gettext = disabled/; > - /^b_staticpic/ && next; > - print;' meson-private/cmd_line.txt > - fi > fi It's unlikely that users will jump directly from those old versions to this, so it's fine to me if don't handle adjustments in that case. > > # Save the configure command line for later reuse. > -- > 2.38.1 > > -- Marc-André Lureau
Re: [PATCH v1 06/24] vfio-user: Define type vfio_user_pci_dev_info
On 11/9/22 00:13, John Johnson wrote: New class for vfio-user with its class and instance constructors and destructors, and its pci ops. Signed-off-by: Elena Ufimtseva Signed-off-by: John G Johnson Signed-off-by: Jagannathan Raman --- hw/vfio/Kconfig | 10 +++ hw/vfio/common.c | 5 hw/vfio/pci.c| 89 hw/vfio/pci.h| 8 + 4 files changed, 112 insertions(+) diff --git a/hw/vfio/Kconfig b/hw/vfio/Kconfig index 7cdba05..301894e 100644 --- a/hw/vfio/Kconfig +++ b/hw/vfio/Kconfig @@ -2,6 +2,10 @@ config VFIO bool depends on LINUX +config VFIO_USER +bool +depends on VFIO + config VFIO_PCI bool default y @@ -9,6 +13,12 @@ config VFIO_PCI select EDID depends on LINUX && PCI +config VFIO_USER_PCI +bool +default y +select VFIO_USER +depends on VFIO_PCI + config VFIO_CCW bool default y diff --git a/hw/vfio/common.c b/hw/vfio/common.c index c7bf0aa..c589bd9 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -1774,6 +1774,11 @@ void vfio_reset_handler(void *opaque) QLIST_FOREACH(group, &vfio_group_list, next) { QLIST_FOREACH(vbasedev, &group->device_list, next) { if (vbasedev->dev->realized && vbasedev->needs_reset) { +if (vbasedev->ops->vfio_hot_reset_multi == NULL) { +error_printf("%s: No hot reset handler specified\n", + vbasedev->name); +continue; +} Since needs_reset is false, which is the case for VFIO User, vfio_hot_reset_multi won't be called. I don't think we care much about adding this error message. In another patch, may be. vbasedev->ops->vfio_hot_reset_multi(vbasedev); } } diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 80b03a2..dc19869 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -19,6 +19,7 @@ */ #include "qemu/osdep.h" +#include CONFIG_DEVICES #include #include @@ -3421,3 +3422,91 @@ static void register_vfio_pci_dev_type(void) } type_init(register_vfio_pci_dev_type) + + +#ifdef CONFIG_VFIO_USER_PCI Why not introduce a new file hw/vfio/user.c file ? It would be cleaner. + +/* + * vfio-user routines. + */ + +/* + * Emulated devices don't use host hot reset + */ +static void vfio_user_compute_needs_reset(VFIODevice *vbasedev) +{ +vbasedev->needs_reset = false; +} + +static VFIODeviceOps vfio_user_pci_ops = { +.vfio_compute_needs_reset = vfio_user_compute_needs_reset, +.vfio_eoi = vfio_intx_eoi, +.vfio_get_object = vfio_pci_get_object, +.vfio_save_config = vfio_pci_save_config, +.vfio_load_config = vfio_pci_load_config, +}; + +static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp) +{ +ERRP_GUARD(); +VFIOUserPCIDevice *udev = VFIO_USER_PCI(pdev); +VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); +VFIODevice *vbasedev = &vdev->vbasedev; + +/* + * TODO: make option parser understand SocketAddress + * and use that instead of having scalar options + * for each socket type. + */ +if (!udev->sock_name) { +error_setg(errp, "No socket specified"); +error_append_hint(errp, "Use -device vfio-user-pci,socket=\n"); +return; +} + +vbasedev->name = g_strdup_printf("VFIO user <%s>", udev->sock_name); +vbasedev->ops = &vfio_user_pci_ops; +vbasedev->type = VFIO_DEVICE_TYPE_PCI; +vbasedev->dev = DEVICE(vdev); + +} + +static void vfio_user_instance_finalize(Object *obj) +{ +VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj); + +vfio_put_device(vdev); +} + +static Property vfio_user_pci_dev_properties[] = { +DEFINE_PROP_STRING("socket", VFIOUserPCIDevice, sock_name), This looks like a good candidate for using a chardev. It could only support AF_UNIX to start with if fd passing is the required feature. But at least, the model would be using a well known backend. I think vhost has the same kind of constraints. Thanks, C. +DEFINE_PROP_END_OF_LIST(), +}; + +static void vfio_user_pci_dev_class_init(ObjectClass *klass, void *data) +{ +DeviceClass *dc = DEVICE_CLASS(klass); +PCIDeviceClass *pdc = PCI_DEVICE_CLASS(klass); + +device_class_set_props(dc, vfio_user_pci_dev_properties); +dc->desc = "VFIO over socket PCI device assignment"; +pdc->realize = vfio_user_pci_realize; +} + +static const TypeInfo vfio_user_pci_dev_info = { +.name = TYPE_VFIO_USER_PCI, +.parent = TYPE_VFIO_PCI_BASE, +.instance_size = sizeof(VFIOUserPCIDevice), +.class_init = vfio_user_pci_dev_class_init, +.instance_init = vfio_instance_init, +.instance_finalize = vfio_user_instance_finalize, +}; + +static void register_vfio_user_dev_type(void) +{ +type_register_static(&vfio_user_pci_dev_info); +} + +type_init(register_vfio_user_dev_type) + +#endif /* VFIO_USER_PCI */ diff --git a/hw/vfio/pci.h b/hw/vfio/
Re: [PATCH 15/30] meson: cleanup compiler detection
On Fri, Dec 9, 2022 at 3:29 PM Paolo Bonzini wrote: > > Detect all compilers at the beginning of meson.build, and store > the available languages in an array. > > Signed-off-by: Paolo Bonzini Reviewed-by: Marc-André Lureau > --- > meson.build | 62 ++--- > 1 file changed, 35 insertions(+), 27 deletions(-) > > diff --git a/meson.build b/meson.build > index 8a9ed5628317..c4fa82ae8ba4 100644 > --- a/meson.build > +++ b/meson.build > @@ -15,9 +15,21 @@ ss = import('sourceset') > fs = import('fs') > > sh = find_program('sh') > -cc = meson.get_compiler('c') > config_host = keyval.load(meson.current_build_dir() / 'config-host.mak') > enable_modules = 'CONFIG_MODULES' in config_host > +targetos = host_machine.system() > + > +cc = meson.get_compiler('c') > +all_languages = ['c'] > +if add_languages('cpp', required: false, native: false) > + all_languages += ['cpp'] > + cxx = meson.get_compiler('cpp') > +endif > +if targetos == 'darwin' and \ > + add_languages('objc', required: get_option('cocoa'), native: false) > + all_languages += ['objc'] > + objc = meson.get_compiler('objc') > +endif > > # Temporary directory used for files created while > # configure runs. Since it is in the build directory > @@ -54,8 +66,6 @@ if cpu in ['riscv32', 'riscv64'] >cpu = 'riscv' > endif > > -targetos = host_machine.system() > - > target_dirs = config_host['TARGET_DIRS'].split() > have_linux_user = false > have_bsd_user = false > @@ -161,7 +171,7 @@ if 'dtrace' in get_option('trace_backends') > # semaphores are linked into the main binary and not the module's shared > # object. > add_global_arguments('-DSTAP_SDT_V2', > - native: false, language: ['c', 'cpp', 'objc']) > + native: false, language: all_languages) >endif > endif > > @@ -203,7 +213,7 @@ endif > if get_option('fuzzing') >add_project_link_arguments(['-Wl,-T,', >(meson.current_source_dir() / > 'tests/qtest/fuzz/fork_fuzz.ld')], > - native: false, language: ['c', 'cpp', 'objc']) > + native: false, language: all_languages) > ># Specify a filter to only instrument code that is directly related to ># virtual-devices. > @@ -216,7 +226,7 @@ if get_option('fuzzing') > args: ['-fsanitize-coverage-allowlist=/dev/null', > '-fsanitize-coverage=trace-pc'] ) > > add_global_arguments('-fsanitize-coverage-allowlist=instrumentation-filter', > - native: false, language: ['c', 'cpp', 'objc']) > + native: false, language: all_languages) >endif > >if get_option('fuzzing_engine') == '' > @@ -225,9 +235,9 @@ if get_option('fuzzing') > # everything with fsanitize=fuzzer-no-link. Otherwise, the linker will be > # unable to bind the fuzzer-related callbacks added by instrumentation. > add_global_arguments('-fsanitize=fuzzer-no-link', > - native: false, language: ['c', 'cpp', 'objc']) > + native: false, language: all_languages) > add_global_link_arguments('-fsanitize=fuzzer-no-link', > - native: false, language: ['c', 'cpp', 'objc']) > + native: false, language: all_languages) > # For the actual fuzzer binaries, we need to link against the libfuzzer > # library. They need to be configurable, to support OSS-Fuzz > fuzz_exe_ldflags = ['-fsanitize=fuzzer'] > @@ -238,15 +248,11 @@ if get_option('fuzzing') >endif > endif > > -add_global_arguments(qemu_cflags, native: false, language: ['c']) > -add_global_arguments(qemu_objcflags, native: false, language: ['objc']) > - > # Check that the C++ compiler exists and works with the C compiler. > link_language = 'c' > linker = cc > qemu_cxxflags = [] > -if add_languages('cpp', required: false, native: false) > - cxx = meson.get_compiler('cpp') > +if 'cpp' in all_languages >add_global_arguments(['-D__STDC_LIMIT_MACROS', '-D__STDC_CONSTANT_MACROS', > '-D__STDC_FORMAT_MACROS'], > native: false, language: 'cpp') >foreach k: qemu_cflags > @@ -255,7 +261,6 @@ if add_languages('cpp', required: false, native: false) >qemu_cxxflags += [k] > endif >endforeach > - add_global_arguments(qemu_cxxflags, native: false, language: 'cpp') > >if cxx.links(files('scripts/main.c'), args: qemu_cflags) > link_language = 'cpp' > @@ -271,22 +276,21 @@ if targetos != 'sunos' and not > config_host.has_key('CONFIG_TSAN') >qemu_ldflags += linker.get_supported_link_arguments('-Wl,--warn-common') > endif > > -add_global_link_arguments(qemu_ldflags, native: false, language: ['c', > 'cpp', 'objc']) > +add_global_link_arguments(qemu_ldflags, native: false, language: > all_languages) > > +add_global_arguments(qemu_cflags, native: false, lan
Re: [PATCH v13 0/7] s390x: CPU Topology
On 12/12/2022 09.51, Pierre Morel wrote: On 12/9/22 14:32, Thomas Huth wrote: On 08/12/2022 10.44, Pierre Morel wrote: Hi, Implementation discussions == CPU models -- Since the S390_FEAT_CONFIGURATION_TOPOLOGY is already in the CPU model for old QEMU we could not activate it as usual from KVM but needed a KVM capability: KVM_CAP_S390_CPU_TOPOLOGY. Checking and enabling this capability enables S390_FEAT_CONFIGURATION_TOPOLOGY. Migration - Once the S390_FEAT_CONFIGURATION_TOPOLOGY is enabled in the source host the STFL(11) is provided to the guest. Since the feature is already in the CPU model of older QEMU, a migration from a new QEMU enabling the topology to an old QEMU will keep STFL(11) enabled making the guest get an exception for illegal operation as soon as it uses the PTF instruction. I now thought that it is not possible to enable "ctop" on older QEMUs since the don't enable the KVM capability? ... or is it still somehow possible? What did I miss? Thomas Enabling ctop with ctop=on on old QEMU is not possible, this is right. But, if STFL(11) is enable in the source KVM by a new QEMU, I can see that even with -ctop=off the STFL(11) is migrated to the destination. Is this with the "host" CPU model or another one? And did you explicitly specify "ctop=off" at the command line, or are you just using the default setting by not specifying it? Thomas
Re: [PATCH 18/30] configure, meson: move --enable-modules to Meson
On Fri, Dec 9, 2022 at 3:44 PM Paolo Bonzini wrote: > > Signed-off-by: Paolo Bonzini On Fri, Dec 9, 2022 at 3:39 PM Paolo Bonzini wrote: > > All uses of pkg-config have been moved to Meson. > > Signed-off-by: Paolo Bonzini Reviewed-by: Marc-André Lureau > --- > configure | 21 + > meson.build | 7 ++- > meson_options.txt | 2 ++ > scripts/meson-buildoptions.sh | 3 +++ > 4 files changed, 12 insertions(+), 21 deletions(-) > > diff --git a/configure b/configure > index 9c336203d8d9..26d10aeffd82 100755 > --- a/configure > +++ b/configure > @@ -273,7 +273,6 @@ sanitizers="no" > tsan="no" > fortify_source="yes" > EXESUF="" > -modules="no" > prefix="/usr/local" > qemu_suffix="qemu" > softmmu="yes" > @@ -705,12 +704,6 @@ for opt do >;; >--disable-debug-info) meson_option_add -Ddebug=false >;; > - --enable-modules) > - modules="yes" > - ;; > - --disable-modules) > - modules="no" > - ;; >--cpu=*) >;; >--target-list=*) target_list="$optarg" > @@ -1001,7 +994,6 @@ cat << EOF >linux-user all linux usermode emulation targets >bsd-userall BSD usermode emulation targets >pie Position Independent Executables > - modules modules support (non-Windows) >debug-tcg TCG debugging (default is disabled) >debug-info debugging information >safe-stack SafeStack Stack Smash Protection. Depends on > @@ -1260,16 +1252,8 @@ else >QEMU_CFLAGS="$QEMU_CFLAGS -Wno-missing-braces" > fi > > -# Our module code doesn't support Windows > -if test "$modules" = "yes" && test "$mingw32" = "yes" ; then > - error_exit "Modules are not available for Windows" > -fi > - > -# Static linking is not possible with plugins, modules or PIE > +# Resolve default for --enable-plugins > if test "$static" = "yes" ; then > - if test "$modules" = "yes" ; then > -error_exit "static and modules are mutually incompatible" > - fi >if test "$plugins" = "yes"; then > error_exit "static and plugins are mutually incompatible" >else > @@ -2229,9 +2213,6 @@ if test "$solaris" = "yes" ; then > fi > echo "SRC_PATH=$source_path" >> $config_host_mak > echo "TARGET_DIRS=$target_list" >> $config_host_mak > -if test "$modules" = "yes"; then > - echo "CONFIG_MODULES=y" >> $config_host_mak > -fi > > # XXX: suppress that > if [ "$bsd" = "yes" ] ; then > diff --git a/meson.build b/meson.build > index f63ab7f83bed..99c1bde4d154 100644 > --- a/meson.build > +++ b/meson.build > @@ -16,7 +16,6 @@ fs = import('fs') > > sh = find_program('sh') > config_host = keyval.load(meson.current_build_dir() / 'config-host.mak') > -enable_modules = 'CONFIG_MODULES' in config_host > targetos = host_machine.system() > > cc = meson.get_compiler('c') > @@ -84,6 +83,12 @@ have_ga = get_option('guest_agent') \ >.require(targetos in ['sunos', 'linux', 'windows', 'freebsd'], > error_message: 'unsupported OS for QEMU guest agent') \ >.allowed() > +enable_modules = get_option('modules') \ > + .require(targetos != 'windows', > + error_message: 'Modules are not available for Windows') \ > + .require(not get_option('prefer_static'), > + error_message: 'Modules are incompatible with static linking') \ > + .allowed() > have_block = have_system or have_tools > > python = import('python').find_installation() > diff --git a/meson_options.txt b/meson_options.txt > index 4b749ca54900..e492aaa73fbc 100644 > --- a/meson_options.txt > +++ b/meson_options.txt > @@ -44,6 +44,8 @@ option('fuzzing', type : 'boolean', value: false, > description: 'build fuzzing targets') > option('gettext', type : 'feature', value : 'auto', > description: 'Localization of the GTK+ user interface') > +option('modules', type : 'feature', value : 'disabled', > + description: 'modules support (non Windows)') > option('module_upgrades', type : 'boolean', value : false, > description: 'try to load modules from alternate paths for upgrades') > option('install_blobs', type : 'boolean', value : true, > diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh > index aa6e30ea911e..f91797741eef 100644 > --- a/scripts/meson-buildoptions.sh > +++ b/scripts/meson-buildoptions.sh > @@ -119,6 +119,7 @@ meson_options_help() { >printf "%s\n" ' lzo lzo compression support' >printf "%s\n" ' malloc-trim enable libc malloc_trim() for memory > optimization' >printf "%s\n" ' membarrier membarrier system call (for Linux 4.14+ > or Windows' > + printf "%s\n" ' modules modules support (non Windows)' >printf "%s\n" ' mpath Multipath persistent reservation > passthrough' >printf "%s\n" ' multiprocessOut of process device emulation support' >printf "%s\n" ' netmap netmap network backend support' > @@ -338,6 +339,8 @@ _meson_option_parse() { > --disable-memb
Re: [PATCH 17/30] configure: remove pkg-config functions
On Fri, Dec 9, 2022 at 3:39 PM Paolo Bonzini wrote: > > All uses of pkg-config have been moved to Meson. > > Signed-off-by: Paolo Bonzini Reviewed-by: Marc-André Lureau > --- > configure | 19 +++ > docs/devel/build-system.rst | 4 > 2 files changed, 3 insertions(+), 20 deletions(-) > > diff --git a/configure b/configure > index fb28dd3963bd..9c336203d8d9 100755 > --- a/configure > +++ b/configure > @@ -365,11 +365,7 @@ smbd="$SMBD" > strip="${STRIP-${cross_prefix}strip}" > widl="${WIDL-${cross_prefix}widl}" > windres="${WINDRES-${cross_prefix}windres}" > -pkg_config_exe="${PKG_CONFIG-${cross_prefix}pkg-config}" > -query_pkg_config() { > -"${pkg_config_exe}" ${QEMU_PKG_CONFIG_FLAGS} "$@" > -} > -pkg_config=query_pkg_config > +pkg_config="${PKG_CONFIG-${cross_prefix}pkg-config}" > sdl2_config="${SDL2_CONFIG-${cross_prefix}sdl2-config}" > > # default flags for all hosts > @@ -745,9 +741,7 @@ for opt do >;; >--without-default-features) # processed above >;; > - --static) > -static="yes" > -QEMU_PKG_CONFIG_FLAGS="--static $QEMU_PKG_CONFIG_FLAGS" > + --static) static="yes" >;; >--bindir=*) bindir="$optarg" >;; > @@ -1419,13 +1413,6 @@ EOF >fi > fi > > -## > -# pkg-config probe > - > -if ! has "$pkg_config_exe"; then > - error_exit "pkg-config binary '$pkg_config_exe' not found" > -fi > - > ## > # fdt probe > > @@ -2423,7 +2410,7 @@ if test "$skip_meson" = no; then >test -n "$objcc" && echo "objc = [$(meson_quote $objcc $CPU_CFLAGS)]" >> > $cross >echo "ar = [$(meson_quote $ar)]" >> $cross >echo "nm = [$(meson_quote $nm)]" >> $cross > - echo "pkgconfig = [$(meson_quote $pkg_config_exe)]" >> $cross > + echo "pkgconfig = [$(meson_quote $pkg_config)]" >> $cross >echo "ranlib = [$(meson_quote $ranlib)]" >> $cross >if has $sdl2_config; then > echo "sdl2-config = [$(meson_quote $sdl2_config)]" >> $cross > diff --git a/docs/devel/build-system.rst b/docs/devel/build-system.rst > index 9db18aff159e..66cfe7b8bdc8 100644 > --- a/docs/devel/build-system.rst > +++ b/docs/devel/build-system.rst > @@ -103,10 +103,6 @@ developers in checking for system features: > Print $MESSAGE to stderr, followed by $MORE... and then exit from the > configure script with non-zero status > > -``query_pkg_config $ARGS...`` > - Run pkg-config passing it $ARGS. If QEMU is doing a static build, > - then --static will be automatically added to $ARGS > - > > Stage 2: Meson > == > -- > 2.38.1 > > -- Marc-André Lureau
Re: [PATCH v2 14/15] block: Don't poll in bdrv_replace_child_noperm()
On 12/9/22 17:53, Paolo Bonzini wrote: On 11/18/22 18:41, Kevin Wolf wrote: In order to make sure that bdrv_replace_child_noperm() doesn't have to poll any more, get rid of the bdrv_parent_drained_begin_single() call. This is possible now because we can require that the parent is already drained through the child in question when the function is called and we don't call the parent drain callbacks more than once. The additional drain calls needed in callers cause the test case to run its code in the drain handler too early (bdrv_attach_child() drains now), so modify it to only enable the code after the test setup has completed. Signed-off-by: Kevin Wolf I hate to bear bad news, but this breaks the Windows builds on github (msys-32bit, msys-64bit) with an obscure but 100% reproducible 51/88 qemu:unit / test-bdrv-drain ERROR 1.30s (exit status 3221225477 or signal 3221225349 SIGinvalid) The exit status is 0xC005 aka a Windows SIGSEGV. With some luck it could be reproducible with Wine (but no gdb). I am testing dropping this patch and squashing diff --git a/block.c b/block.c index 1a2a8d9de907..bb9c85222168 100644 --- a/block.c +++ b/block.c @@ -2870,7 +2870,9 @@ static void bdrv_replace_child_noperm(BdrvChild *child, */ new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); if (new_bs_quiesce_counter && !child->quiesced_parent) { -bdrv_parent_drained_begin_single(child, true); +bdrv_parent_drained_begin_single(child); +AIO_WAIT_WHILE(bdrv_child_get_parent_aio_context(child), + bdrv_parent_drained_poll_single(c)); } if (old_bs) { in patch 15/15, which should at least allow me to keep a lot of the cleanups I had on top of this series. Paolo
Re: [PATCH v13 4/7] s390x/cpu_topology: CPU topology migration
On 12/9/22 15:56, Cédric Le Goater wrote: On 12/8/22 10:44, Pierre Morel wrote: The migration can only take place if both source and destination of the migration both use or both do not use the CPU topology facility. We indicate a change in topology during migration postload for the case the topology changed between source and destination. Signed-off-by: Pierre Morel + +const VMStateDescription vmstate_cpu_topology = { + .name = "cpu_topology", + .version_id = 1, + .post_load = cpu_topology_postload, Please use 'cpu_topology_post_load', it is easier to catch with a grep. OK Regards, Pierre -- Pierre Morel IBM Lab Boeblingen
Re: [PATCH 03/11] RISC-V: Adding T-Head SYNC instructions
On 2022/9/8 15:29, Richard Henderson wrote: On 9/6/22 13:22, Christoph Muellner wrote: +NOP_PRIVCHECK(th_sfence_vmas, REQUIRE_PRIV_MHS) +NOP_PRIVCHECK(th_sync, REQUIRE_PRIV_MHSU) +NOP_PRIVCHECK(th_sync_i, REQUIRE_PRIV_MHSU) +NOP_PRIVCHECK(th_sync_is, REQUIRE_PRIV_MHSU) +NOP_PRIVCHECK(th_sync_s, REQUIRE_PRIV_MHSU) These should not be nops: th_sfence_vmas requires a tlb flush; th.sync{,.i} needs to end the current translation block; th.sync.{s,is} needs multiprocessor sync, which involves a call to async_safe_run_on_cpu. Hi Richard, I have fixed the description of T-Head custom synchronization instructions according to the implementation of hardware. Sorry for the misleading. https://github.com/T-head-Semi/thead-extension-spec/tree/master/xtheadsync The fix is as below: 1)th.sync.s has the same function with th.sync. 2) th.sync. has the same function with th.sync.i 3) th.sync has the function of memory barrier, but it is stricter than RISC-V fence instruction as it order all the instructions instead of load/store instructions. 4) th.sync.i has the function to flush the pipeline besides the function of th.sync. On QEMU, I think they should be emulated them as below: 1) th.sync should be emulated as " 'tcg_gen_mb()' and 'end the current translation block'" on QEMU. 2) th.sync should be emulated as " 'tcg_gen_mb()' and 'end the current translation block'" on QEMU because we don't have the cache model for guest on QEMU. Thus we don't have to synchronize between the icache and dcache for guest. 'tcg_gen_mb' is for the function of memory barrier, and 'end the current translation block' is to reflect the influence of other instructions, such as to take interrupts which only at the end of translation block. Maybe we can also just implement these instructions as 'tcg_gen_mb' because currently all CSR instructions which may influence the interrupts taking have ended the TB on QEMU. Is it right? Thanks, Zhiwei r~
Re: [RFC PATCH v2 01/22] include: import xen public headers
On 09/12/2022 09:55, David Woodhouse wrote: From: Joao Martins Signed-off-by: Joao Martins [dwmw2: Update to Xen public headers from 4.16.2 release] Signed-off-by: David Woodhouse --- include/standard-headers/xen/arch-x86/cpuid.h | 118 ++ .../xen/arch-x86/xen-x86_32.h | 194 +++ .../xen/arch-x86/xen-x86_64.h | 241 include/standard-headers/xen/arch-x86/xen.h | 398 +++ include/standard-headers/xen/event_channel.h | 388 ++ include/standard-headers/xen/features.h | 143 +++ include/standard-headers/xen/grant_table.h| 686 +++ include/standard-headers/xen/hvm/hvm_op.h | 395 +++ include/standard-headers/xen/hvm/params.h | 318 + include/standard-headers/xen/memory.h | 754 include/standard-headers/xen/physdev.h| 383 ++ include/standard-headers/xen/sched.h | 202 include/standard-headers/xen/trace.h | 341 ++ include/standard-headers/xen/vcpu.h | 248 include/standard-headers/xen/version.h| 113 ++ include/standard-headers/xen/xen-compat.h | 46 + include/standard-headers/xen/xen.h| 1049 + 17 files changed, 6017 insertions(+) create mode 100644 include/standard-headers/xen/arch-x86/cpuid.h create mode 100644 include/standard-headers/xen/arch-x86/xen-x86_32.h create mode 100644 include/standard-headers/xen/arch-x86/xen-x86_64.h create mode 100644 include/standard-headers/xen/arch-x86/xen.h create mode 100644 include/standard-headers/xen/event_channel.h create mode 100644 include/standard-headers/xen/features.h create mode 100644 include/standard-headers/xen/grant_table.h create mode 100644 include/standard-headers/xen/hvm/hvm_op.h create mode 100644 include/standard-headers/xen/hvm/params.h create mode 100644 include/standard-headers/xen/memory.h create mode 100644 include/standard-headers/xen/physdev.h create mode 100644 include/standard-headers/xen/sched.h create mode 100644 include/standard-headers/xen/trace.h create mode 100644 include/standard-headers/xen/vcpu.h create mode 100644 include/standard-headers/xen/version.h create mode 100644 include/standard-headers/xen/xen-compat.h create mode 100644 include/standard-headers/xen/xen.h Reviewed-by: Paul Durrant
Re: [RFC PATCH v2 02/22] xen: add CONFIG_XENFV_MACHINE and CONFIG_XEN_EMU options for Xen emulation
On 09/12/2022 09:55, David Woodhouse wrote: From: David Woodhouse The XEN_EMU option will cover core Xen support in target/, which exists only for x86 with KVM today but could theoretically also be implemented on Arm/Aarch64 and with TCG or other accelerators. It will also cover the support for architecture-independent grant table and event channel support which will be added in hw/xen/. The XENFV_MACHINE option is for the xenfv platform support, which will now be used both by XEN_EMU and by real Xen. The XEN option remains dependent on the Xen runtime libraries, and covers support for real Xen. Some code which currently resides under CONFIG_XEN will be moving to CONFIG_XENFV_MACHINE over time. Signed-off-by: David Woodhouse --- accel/Kconfig | 1 + hw/Kconfig | 1 + hw/xen/Kconfig | 3 +++ meson.build| 1 + target/Kconfig | 4 5 files changed, 10 insertions(+) create mode 100644 hw/xen/Kconfig Reviewed-by: Paul Durrant
Re: [PATCH v13 2/7] s390x/cpu topology: reporting the CPU topology to the guest
On 12/9/22 16:43, Cédric Le Goater wrote: On 12/8/22 10:44, Pierre Morel wrote: The guest uses the STSI instruction to get information on the CPU topology. Let us implement the STSI instruction for the basis CPU topology level, level 2. Signed-off-by: Pierre Morel --- ... +#define S390_TOPOLOGY_MAX_MNEST 2 + +void insert_stsi_15_1_x(S390CPU *cpu, int sel2, __u64 addr, uint8_t ar) +{ + union { + char place_holder[S390_TOPOLOGY_SYSIB_SIZE]; + SysIB_151x sysib; + } buffer QEMU_ALIGNED(8); + int len; + + if (s390_is_pv() || !s390_has_topology() || Isn't the test s390_is_pv() redundant since patch 6 deactivates S390_FEAT_CONFIGURATION_TOPOLOGY for PV guests ? Yes you are right it is. I remove it. Regards, Pierre -- Pierre Morel IBM Lab Boeblingen
Re: [PATCH 03/11] RISC-V: Adding T-Head SYNC instructions
On 2022/9/8 15:29, Richard Henderson wrote: On 9/6/22 13:22, Christoph Muellner wrote: +NOP_PRIVCHECK(th_sfence_vmas, REQUIRE_PRIV_MHS) +NOP_PRIVCHECK(th_sync, REQUIRE_PRIV_MHSU) +NOP_PRIVCHECK(th_sync_i, REQUIRE_PRIV_MHSU) +NOP_PRIVCHECK(th_sync_is, REQUIRE_PRIV_MHSU) +NOP_PRIVCHECK(th_sync_s, REQUIRE_PRIV_MHSU) These should not be nops: th_sfence_vmas requires a tlb flush; th.sync{,.i} needs to end the current translation block; th.sync.{s,is} needs multiprocessor sync, which involves a call to async_safe_run_on_cpu. Hi Richard, I have fixed the description of T-Head custom synchronization instructions according to the implementation of hardware. Sorry for the misleading. https://github.com/T-head-Semi/thead-extension-spec/tree/master/xtheadsync The fix is as below: 1)th.sync.s has the same function with th.sync. 2) th.sync.is has the same function with th.sync.i 3) th.sync has the function of memory barrier, but it is stricter than RISC-V fence instruction as it order all the instructions instead of load/store instructions. 4) th.sync.i has the function to flush the pipeline besides the function of th.sync. On QEMU, I think they should be emulated them as below: 1) th.sync should be emulated as " 'tcg_gen_mb()' and 'end the current translation block'" on QEMU. 2) th.sync.i should be emulated as " 'tcg_gen_mb()' and 'end the current translation block'" on QEMU because we don't have the cache model for guest on QEMU. Thus we don't have to synchronize between the icache and dcache for guest. 'tcg_gen_mb' is for the function of memory barrier. And 'end the current translation block' is to reflect the influence of other instructions, such as taking interrupts which happens only at the end of a translation block. Maybe we can also just implement these instructions as 'tcg_gen_mb' because currently all CSR instructions which may influence the interrupts taking have ended the TB on QEMU. Is it right? Thanks, Zhiwei r~
Re: [PATCH v1 03/24] vfio-user: add container IO ops vector
On 9/12/22 17:10, Cédric Le Goater wrote: Hello John, On 11/9/22 00:13, John Johnson wrote: Used for communication with VFIO driver (prep work for vfio-user, which will communicate over a socket) Signed-off-by: John G Johnson --- hw/vfio/common.c | 126 -- include/hw/vfio/vfio-common.h | 33 +++ 2 files changed, 117 insertions(+), 42 deletions(-) diff --git a/hw/vfio/common.c b/hw/vfio/common.c index ace9562..83d69b9 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -432,12 +432,12 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container, goto unmap_exit; } - ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap); + ret = CONT_DMA_UNMAP(container, unmap, bitmap); I am not sure these macros are very useful, compared to : container->ops->dma_unmap(container, unmap, bitmap); I was going to report the same. +/* + * The next 2 ops vectors are how Devices and Containers + * communicate with the server. The default option is + * through ioctl() to the kernel VFIO driver, but vfio-user + * can use a socket to a remote process. + */ + +struct VFIOContIO { VFIOContainerOps seems more adequate with the current VFIO terminology in QEMU. Yes please, abbreviated "Cont" is not helpful.
Re: [PATCH v1 00/24] vfio-user client
Cc'ing Mark & Edgar. On 9/11/22 00:13, John Johnson wrote: Hello, This is the 6th revision of the vfio-user client implementation. It is the first patch series (the previous revisions were RFCs) First of all, thank you for your time reviewing the RFC versions. The vfio-user framework consists of 3 parts: 1) The VFIO user protocol specification. 2) A client - the VFIO device in QEMU that encapsulates VFIO messages and sends them to the server. 3) A server - a remote process that emulates a device. This patchset implements parts 1 and 2. The libvfio-user project (https://github.com/nutanix/libvfio-user) can be used by a remote process to handle the protocol to implement the third part. We also have upstreamed a patch series that implement a server using QEMU. Contributors: John G Johnson John Levon Thanos Makatos Elena Ufimtseva Jagannathan Raman John Johnson (23): vfio-user: add VFIO base abstract class vfio-user: add container IO ops vector vfio-user: add region cache vfio-user: add device IO ops vector vfio-user: Define type vfio_user_pci_dev_info vfio-user: connect vfio proxy to remote server vfio-user: define socket receive functions vfio-user: define socket send functions vfio-user: get device info vfio-user: get region info vfio-user: region read/write vfio-user: pci_user_realize PCI setup vfio-user: get and set IRQs vfio-user: forward msix BAR accesses to server vfio-user: proxy container connect/disconnect vfio-user: dma map/unmap operations vfio-user: add dma_unmap_all vfio-user: secure DMA support vfio-user: dma read/write operations vfio-user: pci reset vfio-user: add 'x-msg-timeout' option that specifies msg wait times vfio-user: add coalesced posted writes vfio-user: add trace points
[Bug 1523246] Re: Virtio-blk does not support TRIM
On which version of qemu the discard option is supported? I have emulated a VM with below qemu options related to disk type: -drive file=disk.img,if=none,id=disk0,l2-cache- size=8M,format=qcow2,discard=on,detect-zeroes=unmap,aio=io_uring -device virtio-blk-pci,drive=disk0,scsi=off,bootindex=2 the disk file on the host side is located on xfs mountpoint on RAID level 10 array. After I downloaded a file with 1GB size on the guest OS, I saw the size of the disk file on the host has been increased as well. But when I delete the downloaded file and issue fstrim --all -v command, the disk file on host has not been decreased. The version of qemu I'm using is 5.2.0 -- You received this bug notification because you are a member of qemu- devel-ml, which is subscribed to QEMU. https://bugs.launchpad.net/bugs/1523246 Title: Virtio-blk does not support TRIM Status in QEMU: Fix Released Bug description: When model=virtio is used, TRIM is not supported. # mount -o discard /dev/vda4 /mnt # mount | tail -1 /dev/vda4 on /mnt type fuseblk (rw,nosuid,nodev,relatime,user_id=0,group_id=0,allow_other,blksize=4096) # fstrim /mnt/ fstrim: /mnt/: the discard operation is not supported Booting without model=virtio allows using TRIM (in Windows as well). Full QEMU line: qemu-system-x86_64 -enable-kvm -cpu host -bios /usr/share/ovmf/ovmf_x64.bin -smp 2 -m 7G -vga qxl -usbdevice tablet -net nic,model=virtio -net user -drive discard=unmap,detect- zeroes=unmap,cache=none,file=vms/win10.hd.img.vmdk,format=vmdk,if=virtio To manage notifications about this bug go to: https://bugs.launchpad.net/qemu/+bug/1523246/+subscriptions
Re: [PATCH v3 1/2] hw/nvme: Implement shadow doorbell buffer support
On Dec 8 12:39, Guenter Roeck wrote: > On Thu, Dec 08, 2022 at 12:13:55PM -0800, Guenter Roeck wrote: > > On Thu, Dec 08, 2022 at 10:47:42AM -0800, Guenter Roeck wrote: > > > > > > > > A cq head doorbell mmio is skipped... And it is not the fault of the > > > > kernel. The kernel is in it's good right to skip the mmio since the cq > > > > eventidx is not properly updated. > > > > > > > > Adding that and it boots properly on riscv. But I'm perplexed as to why > > > > this didnt show up on our regularly tested platforms. > > > > > > > > Gonna try to get this in for 7.2! > > > > > > I see another problem with sparc64. > > > > > > [5.261508] could not locate request for tag 0x0 > > > [5.261711] nvme nvme0: invalid id 0 completed on queue 1 > > > > > > That is seen repeatedly until the request times out. I'll test with > > > your patch to see if it resolves this problem as well, and will bisect > > > otherwise. > > > > > The second problem is unrelated to the doorbell problem. > > It is first seen in qemu v7.1. I'll try to bisect. > > > > Unfortunately, the problem observed with sparc64 also bisects to this > patch. Making things worse, "hw/nvme: fix missing cq eventidx update" > does not fix it (which is why I initially thought it was unrelated). > > I used the following qemu command line. > > qemu-system-sparc64 -M sun4v -cpu "TI UltraSparc IIi" -m 512 -snapshot \ > -device nvme,serial=foo,drive=d0,bus=pciB \ > -drive file=rootfs.ext2,if=none,format=raw,id=d0 \ > -kernel arch/sparc/boot/image -no-reboot \ > -append "root=/dev/nvme0n1 console=ttyS0" \ > -nographic -monitor none > Hi Guenter, Thank you very much for the detailed reports and I apologize for the fallout of this. I think this is related to missing endian conversions when handling the shadow doorbells. I'm not sure if there is a kernel issue here as well, because as far as I can tell, the shadow doorbells are updated like so: old_value = *dbbuf_db; *dbbuf_db = value; (where `value` is the new head/tail value depending on if this is an sq or cq). Keith, is the kernel doing something magically here I am not aware of, or isn't this missing some cpu_to_le32() / le32_to_cpu() calls as well? signature.asc Description: PGP signature
Re: [PATCH v2 11/14] ui: Move HMP commands from monitor to new ui/ui-hmp-cmds.c
Markus Armbruster writes: > This moves these commands from MAINTAINERS section "Human > Monitor (HMP)" to "Graphics". > > Signed-off-by: Markus Armbruster > Reviewed-by: Daniel P. Berrangé > Reviewed-by: Philippe Mathieu-Daudé [...] > diff --git a/ui/ui-hmp-cmds.c b/ui/ui-hmp-cmds.c > new file mode 100644 > index 00..c26a1f00d0 > --- /dev/null > +++ b/ui/ui-hmp-cmds.c > @@ -0,0 +1,361 @@ > +/* > + * Human Monitor Interface commands Make this * HMP commands related to UI > + * > + * Copyright IBM, Corp. 2011 > + * > + * Authors: > + * Anthony Liguori > + * > + * This work is licensed under the terms of the GNU GPL, version 2. See > + * the COPYING file in the top-level directory. > + * > + * Contributions after 2012-01-13 are licensed under the terms of the > + * GNU GPL, version 2 or (at your option) any later version. > + */ [...]
Re: [PATCH v13 0/7] s390x: CPU Topology
On 12/12/22 10:07, Thomas Huth wrote: On 12/12/2022 09.51, Pierre Morel wrote: On 12/9/22 14:32, Thomas Huth wrote: On 08/12/2022 10.44, Pierre Morel wrote: Hi, Implementation discussions == CPU models -- Since the S390_FEAT_CONFIGURATION_TOPOLOGY is already in the CPU model for old QEMU we could not activate it as usual from KVM but needed a KVM capability: KVM_CAP_S390_CPU_TOPOLOGY. Checking and enabling this capability enables S390_FEAT_CONFIGURATION_TOPOLOGY. Migration - Once the S390_FEAT_CONFIGURATION_TOPOLOGY is enabled in the source host the STFL(11) is provided to the guest. Since the feature is already in the CPU model of older QEMU, a migration from a new QEMU enabling the topology to an old QEMU will keep STFL(11) enabled making the guest get an exception for illegal operation as soon as it uses the PTF instruction. I now thought that it is not possible to enable "ctop" on older QEMUs since the don't enable the KVM capability? ... or is it still somehow possible? What did I miss? Thomas Enabling ctop with ctop=on on old QEMU is not possible, this is right. But, if STFL(11) is enable in the source KVM by a new QEMU, I can see that even with -ctop=off the STFL(11) is migrated to the destination. Is this with the "host" CPU model or another one? And did you explicitly specify "ctop=off" at the command line, or are you just using the default setting by not specifying it? With explicit cpumodel and using ctop=off like in sudo /usr/local/bin/qemu-system-s390x_master \ -m 512M \ -enable-kvm -smp 4,sockets=4,cores=2,maxcpus=8 \ -cpu z14,ctop=off \ -machine s390-ccw-virtio-7.2,accel=kvm \ ... regards, Pierre -- Pierre Morel IBM Lab Boeblingen
Re: [PATCH 1/1] qemu-iotests/stream-under-throttle: do not shutdown QEMU
Am 07.12.22 um 14:14 schrieb Christian Borntraeger: Without a kernel or boot disk a QEMU on s390 will exit (usually with a disabled wait state). This breaks the stream-under-throttle test case. Do not exit qemu if on s390. Signed-off-by: Christian Borntraeger --- tests/qemu-iotests/tests/stream-under-throttle | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/qemu-iotests/tests/stream-under-throttle b/tests/qemu-iotests/tests/stream-under-throttle index 8d2d9e16840d..c24dfbcaa2f2 100755 --- a/tests/qemu-iotests/tests/stream-under-throttle +++ b/tests/qemu-iotests/tests/stream-under-throttle @@ -88,6 +88,8 @@ class TestStreamWithThrottle(iotests.QMPTestCase): 'x-iops-total=1,x-bps-total=104857600') self.vm.add_blockdev(self.vm.qmp_to_opts(blockdev)) self.vm.add_device('virtio-blk,iothread=iothr0,drive=throttled-node') +if iotests.qemu_default_machine == 's390-ccw-virtio': +self.vm.add_args('-no-shutdown') self.vm.launch() def tearDown(self) -> None: ping. I guess, this will come after the release?
Re: [PATCH v13 0/7] s390x: CPU Topology
On 12/9/22 15:45, Cédric Le Goater wrote: On 12/8/22 10:44, Pierre Morel wrote: Hi, Implementation discussions == CPU models -- Since the S390_FEAT_CONFIGURATION_TOPOLOGY is already in the CPU model for old QEMU we could not activate it as usual from KVM but needed a KVM capability: KVM_CAP_S390_CPU_TOPOLOGY. Checking and enabling this capability enables S390_FEAT_CONFIGURATION_TOPOLOGY. Migration - Once the S390_FEAT_CONFIGURATION_TOPOLOGY is enabled in the source host the STFL(11) is provided to the guest. Since the feature is already in the CPU model of older QEMU, a migration from a new QEMU enabling the topology to an old QEMU will keep STFL(11) enabled making the guest get an exception for illegal operation as soon as it uses the PTF instruction. A VMState keeping track of the S390_FEAT_CONFIGURATION_TOPOLOGY allows to forbid the migration in such a case. Note that the VMState will be used to hold information on the topology once we implement topology change for a running guest. Topology Until we introduce bookss and drawers, polarization and dedication the topology is kept very simple and is specified uniquely by the core_id of the vCPU which is also the vCPU address. Testing === To use the QEMU patches, you will need Linux V6-rc1 or newer, or use the following Linux mainline patches: f5ecfee94493 2022-07-20 KVM: s390: resetting the Topology-Change-Report 24fe0195bc19 2022-07-20 KVM: s390: guest support for topology function 0130337ec45b 2022-07-20 KVM: s390: Cleanup ipte lock access and SIIF fac.. Currently this code is for KVM only, I have no idea if it is interesting to provide a TCG patch. If ever it will be done in another series. Documentation = To have a better understanding of the S390x CPU Topology and its implementation in QEMU you can have a look at the documentation in the last patch of this series. The admin will want to match the host and the guest topology, taking into account that the guest does not recognize multithreading. Consequently, two vCPU assigned to threads of the same real CPU should preferably be assigned to the same socket of the guest machine. Future developments === Two series are actively prepared: - Adding drawers, book, polarization and dedication to the vCPU. - changing the topology with a running guest Since we have time before the next QEMU 8.0 release, could you please send the whole patchset ? Having the full picture would help in taking decisions for downstream also. I am still uncertain about the usefulness of S390Topology object because, as for now, the state can be computed on the fly, the reset can be handled at the machine level directly under s390_machine_reset() and so could migration if the machine had a vmstate (not the case today but quite easy to add). So before merging anything that could be difficult to maintain and/or backport, I would prefer to see it all ! The goal of dedicating an object for topology was to ease the maintenance and portability by using the QEMU object framework. If on contrary it is a problem for maintenance or backport we surely better not use it. Any other opinion? Regards, Pierre -- Pierre Morel IBM Lab Boeblingen
Re: [PATCH v13 0/7] s390x: CPU Topology
On 12/12/2022 11.10, Pierre Morel wrote: On 12/12/22 10:07, Thomas Huth wrote: On 12/12/2022 09.51, Pierre Morel wrote: On 12/9/22 14:32, Thomas Huth wrote: On 08/12/2022 10.44, Pierre Morel wrote: Hi, Implementation discussions == CPU models -- Since the S390_FEAT_CONFIGURATION_TOPOLOGY is already in the CPU model for old QEMU we could not activate it as usual from KVM but needed a KVM capability: KVM_CAP_S390_CPU_TOPOLOGY. Checking and enabling this capability enables S390_FEAT_CONFIGURATION_TOPOLOGY. Migration - Once the S390_FEAT_CONFIGURATION_TOPOLOGY is enabled in the source host the STFL(11) is provided to the guest. Since the feature is already in the CPU model of older QEMU, a migration from a new QEMU enabling the topology to an old QEMU will keep STFL(11) enabled making the guest get an exception for illegal operation as soon as it uses the PTF instruction. I now thought that it is not possible to enable "ctop" on older QEMUs since the don't enable the KVM capability? ... or is it still somehow possible? What did I miss? Thomas Enabling ctop with ctop=on on old QEMU is not possible, this is right. But, if STFL(11) is enable in the source KVM by a new QEMU, I can see that even with -ctop=off the STFL(11) is migrated to the destination. Is this with the "host" CPU model or another one? And did you explicitly specify "ctop=off" at the command line, or are you just using the default setting by not specifying it? With explicit cpumodel and using ctop=off like in sudo /usr/local/bin/qemu-system-s390x_master \ -m 512M \ -enable-kvm -smp 4,sockets=4,cores=2,maxcpus=8 \ -cpu z14,ctop=off \ -machine s390-ccw-virtio-7.2,accel=kvm \ ... Ok ... that sounds like a bug somewhere in your patches or in the kernel code ... the guest should never see STFL bit 11 if ctop=off, should it? Thomas
[PATCH v4] riscv: Allow user to set the satp mode
RISC-V specifies multiple sizes for addressable memory and Linux probes for the machine's support at startup via the satp CSR register (done in csr.c:validate_vm). As per the specification, sv64 must support sv57, which in turn must support sv48...etc. So we can restrict machine support by simply setting the "highest" supported mode and the bare mode is always supported. You can set the satp mode using the new properties "mbare", "sv32", "sv39", "sv48", "sv57" and "sv64" as follows: -cpu rv64,sv57=on # Linux will boot using sv57 scheme -cpu rv64,sv39=on # Linux will boot using sv39 scheme We take the highest level set by the user: -cpu rv64,sv48=on,sv57=on # Linux will boot using sv57 scheme We make sure that invalid configurations are rejected: -cpu rv64,sv32=on # Can't enable 32-bit satp mode in 64-bit -cpu rv64,sv39=off,sv48=on # sv39 must be supported if higher modes are # enabled We accept "redundant" configurations: -cpu rv64,sv48=on,sv57=off # sv39 must be supported if higher modes are In addition, we now correctly set the device-tree entry 'mmu-type' using those new properties. Co-Developed-by: Ludovic Henry Signed-off-by: Ludovic Henry Signed-off-by: Alexandre Ghiti --- v4: - Use custom boolean properties instead of OnOffAuto properties, based on ARMVQMap, as suggested by Andrew v3: - Free sv_name as pointed by Bin - Replace satp-mode with boolean properties as suggested by Andrew - Removed RB from Atish as the patch considerably changed v2: - Use error_setg + return as suggested by Alistair - Add RB from Atish - Fixed checkpatch issues missed in v1 - Replaced Ludovic email address with the rivos one hw/riscv/virt.c| 20 +++-- target/riscv/cpu.c | 217 +++-- target/riscv/cpu.h | 25 ++ target/riscv/csr.c | 13 ++- 4 files changed, 256 insertions(+), 19 deletions(-) diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index a5bc7353b4..9bb5ba7366 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -228,7 +228,8 @@ static void create_fdt_socket_cpus(RISCVVirtState *s, int socket, int cpu; uint32_t cpu_phandle; MachineState *mc = MACHINE(s); -char *name, *cpu_name, *core_name, *intc_name; +uint8_t satp_mode_max; +char *name, *cpu_name, *core_name, *intc_name, *sv_name; for (cpu = s->soc[socket].num_harts - 1; cpu >= 0; cpu--) { cpu_phandle = (*phandle)++; @@ -236,14 +237,15 @@ static void create_fdt_socket_cpus(RISCVVirtState *s, int socket, cpu_name = g_strdup_printf("/cpus/cpu@%d", s->soc[socket].hartid_base + cpu); qemu_fdt_add_subnode(mc->fdt, cpu_name); -if (riscv_feature(&s->soc[socket].harts[cpu].env, - RISCV_FEATURE_MMU)) { -qemu_fdt_setprop_string(mc->fdt, cpu_name, "mmu-type", -(is_32_bit) ? "riscv,sv32" : "riscv,sv48"); -} else { -qemu_fdt_setprop_string(mc->fdt, cpu_name, "mmu-type", -"riscv,none"); -} + +satp_mode_max = satp_mode_max_from_map( +s->soc[socket].harts[cpu].cfg.satp_mode.map, +is_32_bit); +sv_name = g_strdup_printf("riscv,%s", + satp_mode_str(satp_mode_max, is_32_bit)); +qemu_fdt_setprop_string(mc->fdt, cpu_name, "mmu-type", sv_name); +g_free(sv_name); + name = riscv_isa_string(&s->soc[socket].harts[cpu]); qemu_fdt_setprop_string(mc->fdt, cpu_name, "riscv,isa", name); g_free(name); diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index d14e95c9dc..639231ce2e 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -27,6 +27,7 @@ #include "time_helper.h" #include "exec/exec-all.h" #include "qapi/error.h" +#include "qapi/visitor.h" #include "qemu/error-report.h" #include "hw/qdev-properties.h" #include "migration/vmstate.h" @@ -199,7 +200,7 @@ static const char * const riscv_intr_names[] = { "reserved" }; -static void register_cpu_props(DeviceState *dev); +static void register_cpu_props(Object *obj); const char *riscv_cpu_get_trap_name(target_ulong cause, bool async) { @@ -237,7 +238,7 @@ static void riscv_any_cpu_init(Object *obj) set_misa(env, MXL_RV64, RVI | RVM | RVA | RVF | RVD | RVC | RVU); #endif set_priv_version(env, PRIV_VERSION_1_12_0); -register_cpu_props(DEVICE(obj)); +register_cpu_props(obj); } #if defined(TARGET_RISCV64) @@ -246,7 +247,7 @@ static void rv64_base_cpu_init(Object *obj) CPURISCVState *env = &RISCV_CPU(obj)->env; /* We set this in the realise function */ set_misa(env, MXL_RV64, 0); -register_cpu_props(DEVICE(obj)); +register_cpu_props(obj); /* Set latest version of privileged specification */ set_priv_version(env, PRIV_VERSION_1_12_0); } @@ -279,7 +280,7 @@ static void rv128_base_cpu_init(Object *obj) CPURISCVStat
Re: [PATCH] include: Don't include qemu/osdep.h
On Mon, Dec 12, 2022 at 3:05 PM Markus Armbruster wrote: > > docs/devel/style.rst mandates: > > The "qemu/osdep.h" header contains preprocessor macros that affect > the behavior of core system headers like . It must be > the first include so that core system headers included by external > libraries get the preprocessor macros that QEMU depends on. > > Do not include "qemu/osdep.h" from header files since the .c file > will have already included it. > > A few violations have crept in. Fix them. > > Signed-off-by: Markus Armbruster > --- > bsd-user/qemu.h | 1 - > crypto/block-luks-priv.h| 1 - > include/hw/cxl/cxl_host.h | 1 - > include/hw/input/pl050.h| 1 - > include/hw/tricore/triboard.h | 1 - > include/qemu/userfaultfd.h | 1 - > net/vmnet_int.h | 1 - > qga/cutils.h| 1 - > target/hexagon/hex_arch_types.h | 1 - > target/hexagon/mmvec/macros.h | 1 - > target/riscv/pmu.h | 1 - > qga/cutils.c| 3 ++- > 12 files changed, 2 insertions(+), 12 deletions(-) > Reviewed-by: Bin Meng
Re: [PATCH 2/5] include/hw/cxl: Include hw/cxl/*.h where needed
On Fri, 9 Dec 2022 14:47:59 +0100 Markus Armbruster wrote: > hw/cxl/cxl_component.h needs CDATObject from hw/cxl/cxl_cdat.h. > > hw/cxl/cxl_device.h needs CXLComponentState from > hw/cxl/cxl_component.h. > > Signed-off-by: Markus Armbruster Acked-by: Jonathan Cameron Thanks, > --- > include/hw/cxl/cxl_component.h | 1 + > include/hw/cxl/cxl_device.h| 1 + > 2 files changed, 2 insertions(+) > > diff --git a/include/hw/cxl/cxl_component.h b/include/hw/cxl/cxl_component.h > index 34075cfb72..5dca21e95b 100644 > --- a/include/hw/cxl/cxl_component.h > +++ b/include/hw/cxl/cxl_component.h > @@ -18,6 +18,7 @@ > #include "qemu/compiler.h" > #include "qemu/range.h" > #include "qemu/typedefs.h" > +#include "hw/cxl/cxl_cdat.h" > #include "hw/register.h" > #include "qapi/error.h" > > diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h > index fd475b947b..3f91969db0 100644 > --- a/include/hw/cxl/cxl_device.h > +++ b/include/hw/cxl/cxl_device.h > @@ -10,6 +10,7 @@ > #ifndef CXL_DEVICE_H > #define CXL_DEVICE_H > > +#include "hw/cxl/cxl_component.h" > #include "hw/pci/pci.h" > #include "hw/register.h" >
Re: [PATCH 3/5] hw/acpi/Kconfig: Add missing dependencies to ACPI_ICH9
On 8/12/22 00:12, Bernhard Beschow wrote: ich9_lpc_realize() uses apm_init() and ich9_smbus_realize() uses pm_smbus_init(), so both APM and ACPI_SMBUS are provided by the device models managed by ACPI_ICH9. Signed-off-by: Bernhard Beschow --- hw/acpi/Kconfig | 2 ++ hw/i386/Kconfig | 1 - hw/isa/Kconfig | 1 - 3 files changed, 2 insertions(+), 2 deletions(-) Reviewed-by: Philippe Mathieu-Daudé
Re: [PATCH v2 1/5] qemu-img.c: Move IO_BUF_SIZE to the top of the file
On 28.11.22 15:15, Nir Soffer wrote: This macro is used by various commands (compare, convert, rebase) but it is defined somewhere in the middle of the file. I'm going to use it in the new checksum command so lets clean up a bit before that. --- qemu-img.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) Looks good, but is missing your S-o-b – with it added: Reviewed-by: Hanna Reitz
[Bug 1523246] Re: Virtio-blk does not support TRIM
$ git tag --contains 37b06f8d46fe602e630e4 | grep ^v | sort | head -n1 v4.0.0 How did you check the size of the file? It might appear bigger than it is due to sparse blocks. -- You received this bug notification because you are a member of qemu- devel-ml, which is subscribed to QEMU. https://bugs.launchpad.net/bugs/1523246 Title: Virtio-blk does not support TRIM Status in QEMU: Fix Released Bug description: When model=virtio is used, TRIM is not supported. # mount -o discard /dev/vda4 /mnt # mount | tail -1 /dev/vda4 on /mnt type fuseblk (rw,nosuid,nodev,relatime,user_id=0,group_id=0,allow_other,blksize=4096) # fstrim /mnt/ fstrim: /mnt/: the discard operation is not supported Booting without model=virtio allows using TRIM (in Windows as well). Full QEMU line: qemu-system-x86_64 -enable-kvm -cpu host -bios /usr/share/ovmf/ovmf_x64.bin -smp 2 -m 7G -vga qxl -usbdevice tablet -net nic,model=virtio -net user -drive discard=unmap,detect- zeroes=unmap,cache=none,file=vms/win10.hd.img.vmdk,format=vmdk,if=virtio To manage notifications about this bug go to: https://bugs.launchpad.net/qemu/+bug/1523246/+subscriptions
Re: [PATCH 4/5] hw/isa/Kconfig: Add missing dependency to VT82C686
On 8/12/22 00:12, Bernhard Beschow wrote: Both ACPI_PIIX4 (directly) and ACPI_ICH9 (indirectly) require ACPI to be selected. Require it for VT82C686's ACPI controller too for consistency. Signed-off-by: Bernhard Beschow --- hw/isa/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/isa/Kconfig b/hw/isa/Kconfig index 0a6a04947c..bc2e3ecf02 100644 --- a/hw/isa/Kconfig +++ b/hw/isa/Kconfig @@ -63,6 +63,7 @@ config VT82C686 select IDE_VIA select MC146818RTC select PARALLEL +depends on ACPI The VT82C686 *provides* the ACPI interface, so here we want to "select" ACPI (if we need a VT82C686, then ACPI will be available).
Re: [PATCH 5/5] hw/ppc/Kconfig: Remove unused dependencies from PEGASOS2
On 8/12/22 00:12, Bernhard Beschow wrote: Removes the following dependencies from ppc-softmmu: - CONFIG_ACPI_CPU_HOTPLUG - CONFIG_ACPI_CXL - CONFIG_ACPI_HMAT - CONFIG_ACPI_MEMORY_HOTPLUG - CONFIG_ACPI_NVDIMM - CONFIG_ACPI_PCIHP - CONFIG_ACPI_PIIX4 - CONFIG_ACPI_X86 - CONFIG_MEM_DEVICE Signed-off-by: Bernhard Beschow --- hw/ppc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/ppc/Kconfig b/hw/ppc/Kconfig index b8d2522f45..0ab77177a8 100644 --- a/hw/ppc/Kconfig +++ b/hw/ppc/Kconfig @@ -77,7 +77,7 @@ config PEGASOS2 select SMBUS_EEPROM select VOF # This should come with VT82C686 -select ACPI_X86 +select ACPI With the previous patch (fixed) you can remove both the comment and the "select" line.
Re: [PATCH v2 2/5] Support format or cache specific out file
On 28.11.22 15:15, Nir Soffer wrote: Extend the test finder to find tests with format (*.out.qcow2) or cache specific (*.out.nocache) out file. This worked before only for the numbered tests. --- tests/qemu-iotests/findtests.py | 10 -- 1 file changed, 8 insertions(+), 2 deletions(-) This patch lacks an S-o-b, too. diff --git a/tests/qemu-iotests/findtests.py b/tests/qemu-iotests/findtests.py index dd77b453b8..f4344ce78c 100644 --- a/tests/qemu-iotests/findtests.py +++ b/tests/qemu-iotests/findtests.py @@ -38,31 +38,37 @@ def chdir(path: Optional[str] = None) -> Iterator[None]: os.chdir(saved_dir) class TestFinder: def __init__(self, test_dir: Optional[str] = None) -> None: self.groups = defaultdict(set) with chdir(test_dir): self.all_tests = glob.glob('[0-9][0-9][0-9]') self.all_tests += [f for f in glob.iglob('tests/*') - if not f.endswith('.out') and - os.path.isfile(f + '.out')] + if self.is_test(f)] So previously a file was only considered a test file if there was a corresponding reference output file (`f + '.out'`), so files without such a reference output aren’t considered test files... for t in self.all_tests: with open(t, encoding="utf-8") as f: for line in f: if line.startswith('# group: '): for g in line.split()[2:]: self.groups[g].add(t) break +def is_test(self, fname: str) -> bool: +""" +The tests directory contains tests (no extension) and out files +(*.out, *.out.{format}, *.out.{option}). +""" +return re.search(r'.+\.out(\.\w+)?$', fname) is None ...but this new function doesn’t check that. I think we should check it (just whether there’s any variant of `/{fname}\.out(\.\w+)?/` to go with `fname`) so that behavior isn’t changed. Hanna
Re: [PATCH 1/5] include/hw/pci: Clean up superfluous inclusion of pci*/*.h cxl/*.h
On Fri, 9 Dec 2022 14:47:58 +0100 Markus Armbruster wrote: Hi Markus, One comment on the CXL ones. Others CXL related changes all looks fine to me. Thanks for cleaning these up. Jonathan > diff --git a/include/hw/cxl/cxl.h b/include/hw/cxl/cxl.h > index 38e0e271d5..5129557bee 100644 > --- a/include/hw/cxl/cxl.h > +++ b/include/hw/cxl/cxl.h > @@ -13,7 +13,6 @@ > > #include "qapi/qapi-types-machine.h" > #include "qapi/qapi-visit-machine.h" > -#include "hw/pci/pci_bridge.h" If we drop this, we probably want a forwards def of struct PXBDev I should probably be using the typedef in here as well rather than struct PXBDev * in CXLFixed Window so we'd need to deal with making that visible too. > #include "hw/pci/pci_host.h" > #include "cxl_pci.h" > #include "cxl_component.h" > #define CXL_VENDOR_ID 0x1e98
Re: [PATCH v2 4/5] iotests: Test qemu-img checksum
On 28.11.22 15:15, Nir Soffer wrote: Add simple tests computing a checksum for image with all kinds of extents in raw and qcow2 formats. The test can be extended later for other formats, format options (e..g compressed qcow2), protocols (e.g. nbd), and image with a backing chain, but I'm not sure this is really needed. To help debugging in case of failures, the output includes a json map of the test image. Signed-off-by: Nir Soffer --- tests/qemu-iotests/tests/qemu-img-checksum| 63 +++ .../tests/qemu-img-checksum.out.qcow2 | 11 .../tests/qemu-img-checksum.out.raw | 10 +++ 3 files changed, 84 insertions(+) create mode 100755 tests/qemu-iotests/tests/qemu-img-checksum create mode 100644 tests/qemu-iotests/tests/qemu-img-checksum.out.qcow2 create mode 100644 tests/qemu-iotests/tests/qemu-img-checksum.out.raw Thanks! Reviewed-by: Hanna Reitz
Re: [PATCH v2 3/5] qemu-img: Add checksum command
On 28.11.22 15:15, Nir Soffer wrote: The checksum command compute a checksum for disk image content using the blkhash library[1]. The blkhash library is not packaged yet, but it is available via copr[2]. Example run: $ ./qemu-img checksum -p fedora-35.qcow2 6e5c00c995056319d52395f8d91c7f84725ae3da69ffcba4de4c7d22cff713a5 fedora-35.qcow2 The block checksum is constructed by splitting the image to fixed sized blocks and computing a digest of every block. The image checksum is the digest of the all block digests. The checksum uses internally the "sha256" algorithm but it cannot be compared with checksums created by other tools such as `sha256sum`. The blkhash library supports sparse images, zero detection, and optimizes zero block hashing (they are practically free). The library uses multiple threads to speed up the computation. Comparing to `sha256sum`, `qemu-img checksum` is 3.5-4800[3] times faster, depending on the amount of data in the image: $ ./qemu-img info /scratch/50p.raw file format: raw virtual size: 6 GiB (6442450944 bytes) disk size: 2.91 GiB $ hyperfine -w2 -r5 -p "sleep 1" "./qemu-img checksum /scratch/50p.raw" \ "sha256sum /scratch/50p.raw" Benchmark 1: ./qemu-img checksum /scratch/50p.raw Time (mean ± σ): 1.849 s ± 0.037 s[User: 7.764 s, System: 0.962 s] Range (min … max):1.813 s … 1.908 s5 runs Benchmark 2: sha256sum /scratch/50p.raw Time (mean ± σ): 14.585 s ± 0.072 s[User: 13.537 s, System: 1.003 s] Range (min … max): 14.501 s … 14.697 s5 runs Summary './qemu-img checksum /scratch/50p.raw' ran 7.89 ± 0.16 times faster than 'sha256sum /scratch/50p.raw' The new command is available only when `blkhash` is available during build. To test the new command please install the `blkhash-devel` package: $ dnf copr enable nsoffer/blkhash $ sudo dnf install blkhash-devel [1] https://gitlab.com/nirs/blkhash [2] https://copr.fedorainfracloud.org/coprs/nsoffer/blkhash/ [3] Computing checksum for 8T empty image: qemu-img checksum: 3.7s, sha256sum (estimate): 17,749s Signed-off-by: Nir Soffer --- docs/tools/qemu-img.rst | 24 ++ meson.build | 10 ++- meson_options.txt | 2 + qemu-img-cmds.hx| 8 ++ qemu-img.c | 183 5 files changed, 226 insertions(+), 1 deletion(-) [...] diff --git a/qemu-img.c b/qemu-img.c index c03d6b4b31..4b4ca7add3 100644 --- a/qemu-img.c +++ b/qemu-img.c [...] @@ -1613,20 +1617,199 @@ out: qemu_vfree(buf1); qemu_vfree(buf2); blk_unref(blk2); out2: blk_unref(blk1); out3: qemu_progress_end(); return ret; } +#ifdef CONFIG_BLKHASH +/* + * Compute image checksum. + */ +static int img_checksum(int argc, char **argv) +{ +const char *digest_name = "sha256"; +const size_t block_size = 64 * KiB; + +_Static_assert(QEMU_IS_ALIGNED(IO_BUF_SIZE, block_size), + "IO_BUF_SIZE should be alligned to block_size"); (s/alligned/aligned/) A suggestion: We have a `QEMU_BUILD_BUG_MSG()` macro in include/qemu/compiler.h. Nowadays it just unconditionally resolves to _Static_assert, I think before C11 was adopted it used a custom implementation. Still, it is what seems to be used throughout the actual qemu code (disregarding roms/ and pc-bios/), so I think it would be more fitting to use. But that’s just a suggestion. It always resolves to _Static_assert anyway, so using _Static_assert seems by no means wrong. So with the spelling fixed: Reviewed-by: Hanna Reitz
Re: [PATCH v2 5/5] qemu-img: Speed up checksum
On 28.11.22 15:15, Nir Soffer wrote: Add coroutine based loop inspired by `qemu-img convert` design. Changes compared to `qemu-img convert`: - State for the entire image is kept in ImgChecksumState - State for single worker coroutine is kept in ImgChecksumworker. - "Writes" are always in-order, ensured using a queue. - Calling block status once per image extent, when the current extent is consumed by the workers. - Using 1m buffer size - testings shows that this gives best read performance both with buffered and direct I/O. - Number of coroutines is not configurable. Testing does not show improvement when using more than 8 coroutines. - Progress include entire image, not only the allocated state. Comparing to the simple read loop shows that this version is up to 4.67 times faster when computing a checksum for an image full of zeroes. For real images it is 1.59 times faster with direct I/O, and with buffered I/O there is no difference. Test results on Dell PowerEdge R640 in a CentOS Stream 9 container: | image| size | i/o | before | after | change | |--|--|---|||| | zero [1] | 6g | buffered | 1.600s ±0.014s | 0.342s ±0.016s | x4.67 | | zero | 6g | direct| 4.684s ±0.093s | 2.211s ±0.009s | x2.12 | | real [2] | 6g | buffered | 1.841s ±0.075s | 1.806s ±0.036s | x1.02 | | real | 6g | direct| 3.094s ±0.079s | 1.947s ±0.017s | x1.59 | | nbd [3] | 6g | buffered | 2.455s ±0.183s | 1.808s ±0.016s | x1.36 | | nbd | 6g | direct| 3.540s ±0.020s | 1.749s ±0.018s | x2.02 | [1] raw image full of zeroes [2] raw fedora 35 image with additional random data, 50% full [3] image [2] exported by qemu-nbd via unix socket Signed-off-by: Nir Soffer --- qemu-img.c | 350 ++--- 1 file changed, 277 insertions(+), 73 deletions(-) Reviewed-by: Hanna Reitz
[PATCH-for-8.0] hw/acpi: Rename tco.c -> ich9_tco.c
tco.c contains the ICH9 implementation of its "total cost of ownership". Rename it accordingly to emphasis this is a part of the ICH9 model. Signed-off-by: Philippe Mathieu-Daudé --- Based-on: <20221207231205.1106381-1-shen...@gmail.com> "Clean up dependencies of ACPI controllers" --- MAINTAINERS | 4 ++-- hw/acpi/ich9.c| 2 +- hw/acpi/{tco.c => ich9_tco.c} | 2 +- hw/acpi/meson.build | 2 +- include/hw/acpi/ich9.h| 2 +- include/hw/acpi/{tco.h => ich9_tco.h} | 2 +- tests/qtest/tco-test.c| 2 +- 7 files changed, 8 insertions(+), 8 deletions(-) rename hw/acpi/{tco.c => ich9_tco.c} (99%) rename include/hw/acpi/{tco.h => ich9_tco.h} (97%) diff --git a/MAINTAINERS b/MAINTAINERS index 6966490c94..35bde4a97e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1644,8 +1644,8 @@ F: hw/isa/piix3.c F: hw/isa/lpc_ich9.c F: hw/i2c/smbus_ich9.c F: hw/acpi/piix4.c -F: hw/acpi/ich9.c -F: include/hw/acpi/ich9.h +F: hw/acpi/ich9*.c +F: include/hw/acpi/ich9*.h F: include/hw/southbridge/piix.h F: hw/misc/sga.c F: hw/isa/apm.c diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c index bd9bbade70..b10afa372a 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -34,7 +34,7 @@ #include "sysemu/reset.h" #include "sysemu/runstate.h" #include "hw/acpi/acpi.h" -#include "hw/acpi/tco.h" +#include "hw/acpi/ich9_tco.h" #include "hw/i386/ich9.h" #include "hw/mem/pc-dimm.h" diff --git a/hw/acpi/tco.c b/hw/acpi/ich9_tco.c similarity index 99% rename from hw/acpi/tco.c rename to hw/acpi/ich9_tco.c index 4783721e4e..b68348707b 100644 --- a/hw/acpi/tco.c +++ b/hw/acpi/ich9_tco.c @@ -12,7 +12,7 @@ #include "hw/i386/ich9.h" #include "migration/vmstate.h" -#include "hw/acpi/tco.h" +#include "hw/acpi/ich9_tco.h" #include "trace.h" enum { diff --git a/hw/acpi/meson.build b/hw/acpi/meson.build index cfae2f58f6..30054a8cdc 100644 --- a/hw/acpi/meson.build +++ b/hw/acpi/meson.build @@ -22,7 +22,7 @@ acpi_ss.add(when: 'CONFIG_ACPI_PIIX4', if_true: files('piix4.c')) acpi_ss.add(when: 'CONFIG_ACPI_PCIHP', if_true: files('pcihp.c')) acpi_ss.add(when: 'CONFIG_ACPI_PCIHP', if_false: files('acpi-pci-hotplug-stub.c')) acpi_ss.add(when: 'CONFIG_ACPI_VIOT', if_true: files('viot.c')) -acpi_ss.add(when: 'CONFIG_ACPI_ICH9', if_true: files('ich9.c', 'tco.c')) +acpi_ss.add(when: 'CONFIG_ACPI_ICH9', if_true: files('ich9.c', 'ich9_tco.c')) acpi_ss.add(when: 'CONFIG_ACPI_ERST', if_true: files('erst.c')) acpi_ss.add(when: 'CONFIG_IPMI', if_true: files('ipmi.c'), if_false: files('ipmi-stub.c')) acpi_ss.add(when: 'CONFIG_PC', if_false: files('acpi-x86-stub.c')) diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h index 7ca92843c6..d41866a229 100644 --- a/include/hw/acpi/ich9.h +++ b/include/hw/acpi/ich9.h @@ -27,7 +27,7 @@ #include "hw/acpi/pcihp.h" #include "hw/acpi/memory_hotplug.h" #include "hw/acpi/acpi_dev_interface.h" -#include "hw/acpi/tco.h" +#include "hw/acpi/ich9_tco.h" #define ACPI_PCIHP_ADDR_ICH9 0x0cc0 diff --git a/include/hw/acpi/tco.h b/include/hw/acpi/ich9_tco.h similarity index 97% rename from include/hw/acpi/tco.h rename to include/hw/acpi/ich9_tco.h index a1e0da8213..c4393caee0 100644 --- a/include/hw/acpi/tco.h +++ b/include/hw/acpi/ich9_tco.h @@ -1,5 +1,5 @@ /* - * QEMU ICH9 TCO emulation + * QEMU ICH9 TCO emulation (total cost of ownership) * * Copyright (c) 2015 Paulo Alcantara * diff --git a/tests/qtest/tco-test.c b/tests/qtest/tco-test.c index 254f735370..d7b61ccfa5 100644 --- a/tests/qtest/tco-test.c +++ b/tests/qtest/tco-test.c @@ -16,7 +16,7 @@ #include "hw/pci/pci_regs.h" #include "hw/i386/ich9.h" #include "hw/acpi/ich9.h" -#include "hw/acpi/tco.h" +#include "hw/acpi/ich9_tco.h" #define RCBA_BASE_ADDR0xfed1c000 #define PM_IO_BASE_ADDR 0xb000 -- 2.38.1
Re: [PATCH v4 4/4] scripts: add script to compare compatible properties
* Maksim Davydov (davydov-...@yandex-team.ru) wrote: > This script run QEMU to obtain compat_props of machines and default > values of different types and produce appropriate table. This table > can be used to compare machine types to choose the most suitable > machine. Also this table in json or csv format should be used to check that > new machine doesn't affect previous ones by comparing tables with and > without new machine. > Default values of properties are needed to fill "holes" in the table (one > machine has these properties and another not. For instance, 2.12 mt has > `{ "EPYC-" TYPE_X86_CPU, "xlevel", "0x800a" }`, but compat_pros of > 3.1 mt doesn't have it. So, to compare these machines we need to fill > unknown value of "EPYC-x86_64-cpu-xlevel" for 3.1 mt. This unknown value > in the table I called "hole". To get values (default values) for these > "holes" the script uses list of appropriate methods.) > > Notes: > * some init values from the devices can't be available like properties > from virtio-9p when configure has --disable-virtfs. This situations will > be seen in the table as "unavailable driver". > * Default values can be obtained in an unobvious way, like x86 features. > If the script doesn't know how to get property default value to compare > one machine with another it fills "holes" with "unavailable method". This > is done because script uses whitelist model to get default values of > different types. It means that the method that can't be applied to a new > type that can crash this script. It is better to get an "unavailable > driver" when creating a new machine with new compatible properties than > to break this script. So it turns out a more stable and generic script. > * If the default value can't be obtained because this property doesn't > exist or because this property can't have default value, appropriate > "hole" will be filled by "unknown property" or "no default value" > * If the property is applied to the abstract class, the script collects > default values from all child classes (set of default values) Nice; just a suggestion - have you considered adding a flag to specify the qemu binaries separately, so that you can compare the machine type definitions of two qemu binaries for the same machine type? Dave > Example: ./scripts/compare_mt.py --mt pc-q35-3.1 pc-q35-4.0 > > ╒═══╤══╤╕ > │ │ pc-q35-3.1 │ >pc-q35-4.0 │ > ╞═══╪══╪╡ > │ Cascadelake-Server-x86_64-cpu:mpx │ True │ > False│ > ├───┼──┼┤ > │ Cascadelake-Server-x86_64-cpu:stepping│ 5 │ >6 │ > ├───┼──┼┤ > │ Icelake-Client-x86_64-cpu:mpx │ True │ > unavailable driver │ > ├───┼──┼┤ > │ Icelake-Server-x86_64-cpu:mpx │ True │ > False│ > ├───┼──┼┤ > │ Opteron_G3-x86_64-cpu:rdtscp │False │ > True│ > ├───┼──┼┤ > │ Opteron_G4-x86_64-cpu:rdtscp │False │ > True│ > ├───┼──┼┤ > │ Opteron_G5-x86_64-cpu:rdtscp │False │ > True│ > ├───┼──┼┤ > │ Skylake-Client-IBRS-x86_64-cpu:mpx│ True │ > False│ > ├───┼──┼┤ > │ Skylake-Client-x86_64-cpu:mpx │ True │ > False│ > ├───┼──┼┤ > │ Skylake-Server-IBRS-x86_64-cpu:mpx│ True │ > False│ > ├───┼──┼┤ > │ Skylake-Server-x86_64-cpu:mpx │ True │ > False│ > ├───┼──┼┤ > │ intel-iommu:dma-drain │False │ >
Re: [PATCH 6/5] include/hw/cxl: Break inclusion loop
On Sat, 10 Dec 2022 08:09:06 +0100 Markus Armbruster wrote: > Markus Armbruster writes: > > > hw/cxl/cxl_pci.h and hw/cxl/cxl_cdat.h include each other. Neither > > header actually needs the other one. Drop both #include directives. > > > > Signed-off-by: Markus Armbruster > > --- > > include/hw/cxl/cxl_cdat.h | 1 - > > include/hw/cxl/cxl_pci.h | 1 - > > 2 files changed, 2 deletions(-) > > > > diff --git a/include/hw/cxl/cxl_cdat.h b/include/hw/cxl/cxl_cdat.h > > index 7f67638685..e3fd737f9d 100644 > > --- a/include/hw/cxl/cxl_cdat.h > > +++ b/include/hw/cxl/cxl_cdat.h > > @@ -10,7 +10,6 @@ > > #ifndef CXL_CDAT_H > > #define CXL_CDAT_H > > > > -#include "hw/cxl/cxl_pci.h" > > #include "hw/pci/pcie_doe.h" The include was to get to CXL_VENDOR_ID which is in hw/cxl/cxl_pci.h Can move that elsewhere perhaps, though I don't think we need to if we break the loop by dropping the other one. > > > > /* > > diff --git a/include/hw/cxl/cxl_pci.h b/include/hw/cxl/cxl_pci.h > > index aca14845ab..01e15ed5b4 100644 > > --- a/include/hw/cxl/cxl_pci.h > > +++ b/include/hw/cxl/cxl_pci.h > > @@ -11,7 +11,6 @@ > > #define CXL_PCI_H > > > > #include "qemu/compiler.h" > > -#include "hw/cxl/cxl_cdat.h" Guess that's a left over of some earlier refactoring. Good to get rid of this one. > > > > #define CXL_VENDOR_ID 0x1e98 > > Friday afternoon post with insufficient testing... Everything still > builds fine, but cxl_component.h is no longer self-contained. I'll > squash in the appended patch and revise the commit message. By staring at the code rather than any automation I'm failing to spot what it needs from cxl_pci.h. Can you add that info to the commit message? > > > diff --git a/include/hw/cxl/cxl_component.h b/include/hw/cxl/cxl_component.h > index 5dca21e95b..78f83ed742 100644 > --- a/include/hw/cxl/cxl_component.h > +++ b/include/hw/cxl/cxl_component.h > @@ -19,6 +19,7 @@ > #include "qemu/range.h" > #include "qemu/typedefs.h" > #include "hw/cxl/cxl_cdat.h" > +#include "hw/cxl/cxl_pci.h" > #include "hw/register.h" > #include "qapi/error.h" > >
Re: [PATCH v1 06/24] vfio-user: Define type vfio_user_pci_dev_info
On Mon, Dec 12, 2022 at 10:01:33AM +0100, Cédric Le Goater wrote: > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > > index 80b03a2..dc19869 100644 > > --- a/hw/vfio/pci.c > > +++ b/hw/vfio/pci.c > > @@ -19,6 +19,7 @@ > >*/ > > #include "qemu/osdep.h" > > +#include CONFIG_DEVICES > > #include > > #include > > @@ -3421,3 +3422,91 @@ static void register_vfio_pci_dev_type(void) > > } > > type_init(register_vfio_pci_dev_type) > > + > > + > > +#ifdef CONFIG_VFIO_USER_PCI > > Why not introduce a new file hw/vfio/user.c file ? It would be > cleaner. user.c is in this series, and holds the vfio-user implementation - it's not a PCI specific thing. So it would have to be hw/vfio/user_pci.c or something regards john
Re: [PATCH v2 2/5] target/riscv: Update VS timer whenever htimedelta changes
On Mon, Dec 12, 2022 at 11:23 AM Alistair Francis wrote: > > On Thu, Dec 8, 2022 at 6:41 PM Anup Patel wrote: > > > > On Thu, Dec 8, 2022 at 9:00 AM Alistair Francis > > wrote: > > > > > > On Tue, Nov 8, 2022 at 11:07 PM Anup Patel > > > wrote: > > > > > > > > The htimedelta[h] CSR has impact on the VS timer comparison so we > > > > should call riscv_timer_write_timecmp() whenever htimedelta changes. > > > > > > > > Fixes: 3ec0fe18a31f ("target/riscv: Add vstimecmp suppor") > > > > Signed-off-by: Anup Patel > > > > Reviewed-by: Alistair Francis > > > > > > This patch breaks my Xvisor test. When running OpenSBI and Xvisor like > > > this: > > > > > > qemu-system-riscv64 -machine virt \ > > > -m 1G -serial mon:stdio -serial null -nographic \ > > > -append 'vmm.console=uart@1000 vmm.bootcmd="vfs mount initrd > > > /;vfs run /boot.xscript;vfs cat /system/banner.txt; guest kick guest0; > > > vserial bind guest0/uart0"' \ > > > -smp 4 -d guest_errors \ > > > -bios none \ > > > -device loader,file=./images/qemuriscv64/vmm.bin,addr=0x8020 \ > > > -kernel ./images/qemuriscv64/fw_jump.elf \ > > > -initrd ./images/qemuriscv64/vmm-disk-linux.img -cpu rv64,h=true > > > > > > Running: > > > > > > Xvisor v0.3.0-129-gbc33f339 (Jan 1 1970 00:00:00) > > > > > > I see this failure: > > > > > > INIT: bootcmd: guest kick guest0 > > > > > > guest0: Kicked > > > > > > INIT: bootcmd: vserial bind guest0/uart0 > > > > > > [guest0/uart0] cpu_vcpu_stage2_map: guest_phys=0x3B9AC000 > > > size=0x4096 map failed > > > > > > do_error: CPU3: VCPU=guest0/vcpu0 page fault failed (error -1) > > > > > >zero=0x ra=0x80001B4E > > > > > > sp=0x8001CF80 gp=0x > > > > > > tp=0x s0=0x8001CFB0 > > > > > > s1=0x a0=0x10001048 > > > > > > a1=0x a2=0x00989680 > > > > > > a3=0x3B9ACA00 a4=0x0048 > > > > > > a5=0x a6=0x00019000 > > > > > > a7=0x s2=0x > > > > > > s3=0x s4=0x > > > > > > s5=0x s6=0x > > > > > > s7=0x s8=0x > > > > > > s9=0x s10=0x > > > > > > s11=0x t0=0x4000 > > > > > > t1=0x0100 t2=0x > > > > > > t3=0x t4=0x > > > > > > t5=0x t6=0x > > > > > >sepc=0x80001918 sstatus=0x00024120 > > > > > > hstatus=0x0002002001C0 sp_exec=0x10A64000 > > > > > > scause=0x0017 stval=0x3B9ACAF8 > > > > > > htval=0x0EE6B2BE htinst=0x00D03021 > > > > > > I have tried updating to a newer Xvisor release, but with that I don't > > > get any serial output. > > > > > > Can you help get the Xvisor tests back up and running? > > > > I tried the latest Xvisor-next (https://github.com/avpatel/xvisor-next) > > with your QEMU riscv-to-apply.next branch and it works fine (both > > with and without Sstc). > > Does it work with the latest release? Yes, the latest Xvisor-next repo works for QEMU v7.2.0-rc4 and your riscv-to-apply.next branch (commit 51bb9de2d188) Regards, Anup > > Alistair > > > > > Here's the QEMU command which I use: > > > > qemu-system-riscv64 -M virt -m 512M -nographic \ > > -bios opensbi/build/platform/generic/firmware/fw_jump.bin \ > > -kernel ../xvisor-next/build/vmm.bin \ > > -initrd rbd_v64.img \ > > -append "vmm.bootcmd=\"vfs mount initrd /;vfs run /boot.xscript;vfs > > cat /system/banner.txt\"" \ > > -smp 4 > > > > Also, I will be releasing Xvisor-0.3.2 by the end of Dec 2022 so I > > suggest using this upcoming release in your test. > > > > Regards, > > Anup
[PATCH v3 2/4] hw/nvme: rename shadow doorbell related trace events
From: Klaus Jensen Rename the trace events related to writing the event index and reading the doorbell value to make it more clear that the event is associated with an actual update (write or read respectively). Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Klaus Jensen --- hw/nvme/ctrl.c | 11 +++ hw/nvme/trace-events | 8 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 6b70c1e39831..cfab21b3436e 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -1337,8 +1337,9 @@ static inline void nvme_blk_write(BlockBackend *blk, int64_t offset, static void nvme_update_cq_head(NvmeCQueue *cq) { pci_dma_read(PCI_DEVICE(cq->ctrl), cq->db_addr, &cq->head, -sizeof(cq->head)); -trace_pci_nvme_shadow_doorbell_cq(cq->cqid, cq->head); + sizeof(cq->head)); + +trace_pci_nvme_update_cq_head(cq->cqid, cq->head); } static void nvme_post_cqes(void *opaque) @@ -6147,16 +6148,18 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req) static void nvme_update_sq_eventidx(const NvmeSQueue *sq) { +trace_pci_nvme_update_sq_eventidx(sq->sqid, sq->tail); + pci_dma_write(PCI_DEVICE(sq->ctrl), sq->ei_addr, &sq->tail, sizeof(sq->tail)); -trace_pci_nvme_eventidx_sq(sq->sqid, sq->tail); } static void nvme_update_sq_tail(NvmeSQueue *sq) { pci_dma_read(PCI_DEVICE(sq->ctrl), sq->db_addr, &sq->tail, sizeof(sq->tail)); -trace_pci_nvme_shadow_doorbell_sq(sq->sqid, sq->tail); + +trace_pci_nvme_update_sq_tail(sq->sqid, sq->tail); } static void nvme_process_sq(void *opaque) diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events index fccb79f48973..b16f2260b4fd 100644 --- a/hw/nvme/trace-events +++ b/hw/nvme/trace-events @@ -84,8 +84,8 @@ pci_nvme_enqueue_event_noqueue(int queued) "queued %d" pci_nvme_enqueue_event_masked(uint8_t typ) "type 0x%"PRIx8"" pci_nvme_no_outstanding_aers(void) "ignoring event; no outstanding AERs" pci_nvme_enqueue_req_completion(uint16_t cid, uint16_t cqid, uint32_t dw0, uint32_t dw1, uint16_t status) "cid %"PRIu16" cqid %"PRIu16" dw0 0x%"PRIx32" dw1 0x%"PRIx32" status 0x%"PRIx16"" -pci_nvme_eventidx_cq(uint16_t cqid, uint16_t new_eventidx) "cqid %"PRIu16" new_eventidx %"PRIu16"" -pci_nvme_eventidx_sq(uint16_t sqid, uint16_t new_eventidx) "sqid %"PRIu16" new_eventidx %"PRIu16"" +pci_nvme_update_cq_eventidx(uint16_t cqid, uint16_t new_eventidx) "cqid %"PRIu16" new_eventidx %"PRIu16"" +pci_nvme_update_sq_eventidx(uint16_t sqid, uint16_t new_eventidx) "sqid %"PRIu16" new_eventidx %"PRIu16"" pci_nvme_mmio_read(uint64_t addr, unsigned size) "addr 0x%"PRIx64" size %d" pci_nvme_mmio_write(uint64_t addr, uint64_t data, unsigned size) "addr 0x%"PRIx64" data 0x%"PRIx64" size %d" pci_nvme_mmio_doorbell_cq(uint16_t cqid, uint16_t new_head) "cqid %"PRIu16" new_head %"PRIu16"" @@ -102,8 +102,8 @@ pci_nvme_mmio_start_success(void) "setting controller enable bit succeeded" pci_nvme_mmio_stopped(void) "cleared controller enable bit" pci_nvme_mmio_shutdown_set(void) "shutdown bit set" pci_nvme_mmio_shutdown_cleared(void) "shutdown bit cleared" -pci_nvme_shadow_doorbell_cq(uint16_t cqid, uint16_t new_shadow_doorbell) "cqid %"PRIu16" new_shadow_doorbell %"PRIu16"" -pci_nvme_shadow_doorbell_sq(uint16_t sqid, uint16_t new_shadow_doorbell) "sqid %"PRIu16" new_shadow_doorbell %"PRIu16"" +pci_nvme_update_cq_head(uint16_t cqid, uint16_t new_head) "cqid %"PRIu16" new_head %"PRIu16"" +pci_nvme_update_sq_tail(uint16_t sqid, uint16_t new_tail) "sqid %"PRIu16" new_tail %"PRIu16"" pci_nvme_open_zone(uint64_t slba, uint32_t zone_idx, int all) "open zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32"" pci_nvme_close_zone(uint64_t slba, uint32_t zone_idx, int all) "close zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32"" pci_nvme_finish_zone(uint64_t slba, uint32_t zone_idx, int all) "finish zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32"" -- 2.38.1
[PATCH v3 4/4] hw/nvme: fix missing cq eventidx update
From: Klaus Jensen Prior to reading the shadow doorbell cq head, we have to update the eventidx. Otherwise, we risk that the driver will skip an mmio doorbell write. This happens on riscv64, as reported by Guenter. Adding the missing update to the cq eventidx fixes the issue. Fixes: 3f7fe8de3d49 ("hw/nvme: Implement shadow doorbell buffer support") Cc: qemu-sta...@nongnu.org Cc: qemu-ri...@nongnu.org Reported-by: Guenter Roeck Signed-off-by: Klaus Jensen --- hw/nvme/ctrl.c | 17 +++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 8af70f0216f0..3df29ea68b2f 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -1334,10 +1334,22 @@ static inline void nvme_blk_write(BlockBackend *blk, int64_t offset, } } +static void nvme_update_cq_eventidx(const NvmeCQueue *cq) +{ +uint32_t v = cpu_to_le32(cq->head); + +trace_pci_nvme_update_cq_eventidx(cq->cqid, cq->head); + +pci_dma_write(PCI_DEVICE(cq->ctrl), cq->ei_addr, &v, sizeof(v)); +} + static void nvme_update_cq_head(NvmeCQueue *cq) { -pci_dma_read(PCI_DEVICE(cq->ctrl), cq->db_addr, &cq->head, - sizeof(cq->head)); +uint32_t v; + +pci_dma_read(PCI_DEVICE(cq->ctrl), cq->db_addr, &v, sizeof(v)); + +cq->head = le32_to_cpu(v); trace_pci_nvme_update_cq_head(cq->cqid, cq->head); } @@ -1355,6 +1367,7 @@ static void nvme_post_cqes(void *opaque) hwaddr addr; if (n->dbbuf_enabled) { +nvme_update_cq_eventidx(cq); nvme_update_cq_head(cq); } -- 2.38.1
[PATCH v3 1/4] hw/nvme: use QOM accessors
From: Klaus Jensen Replace various ->parent_obj use with the equivalent QOM accessors. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Klaus Jensen --- hw/nvme/ctrl.c | 89 +++--- 1 file changed, 48 insertions(+), 41 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index e54276dc1dc7..6b70c1e39831 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -449,7 +449,7 @@ static int nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size) return 0; } -return pci_dma_read(&n->parent_obj, addr, buf, size); +return pci_dma_read(PCI_DEVICE(n), addr, buf, size); } static int nvme_addr_write(NvmeCtrl *n, hwaddr addr, const void *buf, int size) @@ -469,7 +469,7 @@ static int nvme_addr_write(NvmeCtrl *n, hwaddr addr, const void *buf, int size) return 0; } -return pci_dma_write(&n->parent_obj, addr, buf, size); +return pci_dma_write(PCI_DEVICE(n), addr, buf, size); } static bool nvme_nsid_valid(NvmeCtrl *n, uint32_t nsid) @@ -514,24 +514,27 @@ static uint8_t nvme_sq_empty(NvmeSQueue *sq) static void nvme_irq_check(NvmeCtrl *n) { +PCIDevice *pci = PCI_DEVICE(n); uint32_t intms = ldl_le_p(&n->bar.intms); -if (msix_enabled(&(n->parent_obj))) { +if (msix_enabled(pci)) { return; } if (~intms & n->irq_status) { -pci_irq_assert(&n->parent_obj); +pci_irq_assert(pci); } else { -pci_irq_deassert(&n->parent_obj); +pci_irq_deassert(pci); } } static void nvme_irq_assert(NvmeCtrl *n, NvmeCQueue *cq) { +PCIDevice *pci = PCI_DEVICE(n); + if (cq->irq_enabled) { -if (msix_enabled(&(n->parent_obj))) { +if (msix_enabled(pci)) { trace_pci_nvme_irq_msix(cq->vector); -msix_notify(&(n->parent_obj), cq->vector); +msix_notify(pci, cq->vector); } else { trace_pci_nvme_irq_pin(); assert(cq->vector < 32); @@ -546,7 +549,7 @@ static void nvme_irq_assert(NvmeCtrl *n, NvmeCQueue *cq) static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq) { if (cq->irq_enabled) { -if (msix_enabled(&(n->parent_obj))) { +if (msix_enabled(PCI_DEVICE(n))) { return; } else { assert(cq->vector < 32); @@ -570,7 +573,7 @@ static void nvme_req_clear(NvmeRequest *req) static inline void nvme_sg_init(NvmeCtrl *n, NvmeSg *sg, bool dma) { if (dma) { -pci_dma_sglist_init(&sg->qsg, &n->parent_obj, 0); +pci_dma_sglist_init(&sg->qsg, PCI_DEVICE(n), 0); sg->flags = NVME_SG_DMA; } else { qemu_iovec_init(&sg->iov, 0); @@ -1333,7 +1336,7 @@ static inline void nvme_blk_write(BlockBackend *blk, int64_t offset, static void nvme_update_cq_head(NvmeCQueue *cq) { -pci_dma_read(&cq->ctrl->parent_obj, cq->db_addr, &cq->head, +pci_dma_read(PCI_DEVICE(cq->ctrl), cq->db_addr, &cq->head, sizeof(cq->head)); trace_pci_nvme_shadow_doorbell_cq(cq->cqid, cq->head); } @@ -1363,7 +1366,7 @@ static void nvme_post_cqes(void *opaque) req->cqe.sq_id = cpu_to_le16(sq->sqid); req->cqe.sq_head = cpu_to_le16(sq->head); addr = cq->dma_addr + cq->tail * n->cqe_size; -ret = pci_dma_write(&n->parent_obj, addr, (void *)&req->cqe, +ret = pci_dma_write(PCI_DEVICE(n), addr, (void *)&req->cqe, sizeof(req->cqe)); if (ret) { trace_pci_nvme_err_addr_write(addr); @@ -4615,6 +4618,7 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req) static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n) { +PCIDevice *pci = PCI_DEVICE(n); uint16_t offset = (cq->cqid << 3) + (1 << 2); n->cq[cq->cqid] = NULL; @@ -4625,8 +4629,8 @@ static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n) event_notifier_set_handler(&cq->notifier, NULL); event_notifier_cleanup(&cq->notifier); } -if (msix_enabled(&n->parent_obj)) { -msix_vector_unuse(&n->parent_obj, cq->vector); +if (msix_enabled(pci)) { +msix_vector_unuse(pci, cq->vector); } if (cq->cqid) { g_free(cq); @@ -4664,8 +4668,10 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t irq_enabled) { -if (msix_enabled(&n->parent_obj)) { -msix_vector_use(&n->parent_obj, vector); +PCIDevice *pci = PCI_DEVICE(n); + +if (msix_enabled(pci)) { +msix_vector_use(pci, vector); } cq->ctrl = n; cq->cqid = cqid; @@ -4716,7 +4722,7 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req) trace_pci_nvme_err_invalid_create_cq_addr(prp1); return NVME_INVALID_PRP_OFFSET | NVME_DNR; } -if (unlikely(!msix_enabled(&n->parent_obj) && vector)) { +if (unlikely(!msix_enabled(PCI_DEVICE(n)) && vector
[PATCH v3 3/4] hw/nvme: fix missing endian conversions for doorbell buffers
From: Klaus Jensen The eventidx and doorbell value are not handling endianness correctly. Fix this. Fixes: 3f7fe8de3d49 ("hw/nvme: Implement shadow doorbell buffer support") Cc: qemu-sta...@nongnu.org Reported-by: Guenter Roeck Signed-off-by: Klaus Jensen --- hw/nvme/ctrl.c | 12 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index cfab21b3436e..8af70f0216f0 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -6148,16 +6148,20 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req) static void nvme_update_sq_eventidx(const NvmeSQueue *sq) { +uint32_t v = cpu_to_le32(sq->tail); + trace_pci_nvme_update_sq_eventidx(sq->sqid, sq->tail); -pci_dma_write(PCI_DEVICE(sq->ctrl), sq->ei_addr, &sq->tail, - sizeof(sq->tail)); +pci_dma_write(PCI_DEVICE(sq->ctrl), sq->ei_addr, &v, sizeof(v)); } static void nvme_update_sq_tail(NvmeSQueue *sq) { -pci_dma_read(PCI_DEVICE(sq->ctrl), sq->db_addr, &sq->tail, - sizeof(sq->tail)); +uint32_t v; + +pci_dma_read(PCI_DEVICE(sq->ctrl), sq->db_addr, &v, sizeof(v)); + +sq->tail = le32_to_cpu(v); trace_pci_nvme_update_sq_tail(sq->sqid, sq->tail); } -- 2.38.1
[PATCH v3 0/4] hw/nvme: fix broken shadow doorbells on some platforms
From: Klaus Jensen Guenter reports that hw/nvme is broken on riscv64[1] and big endian platforms[2]. This is a regression since 7.1, so this does not warrent an rc5 for 7.2. I'm sure Guenter can carry this patch in his tree, and maybe we can get this out in a stable release. On riscv, the issue is a missing cq eventidx update. I really wonder why this issue only shows up on riscv64. We have not observed this on other platforms (yet). Further, Guenter also reported problems on big-endian platforms. The issue here is missing endian conversions which patch 3 addresses. This also requires a fix for the Linux kernel that I am posting separately (can't link to it, chicken and egg problem). [1]: https://lore.kernel.org/qemu-devel/20221207174918.ga1151...@roeck-us.net/ [2]: https://lore.kernel.org/qemu-devel/20221209110022.ga3396...@roeck-us.net/ v3: - add patch to fix big-endian platforms v2: - use QOM accessor (Philippe) - added some cleanup patches in front Klaus Jensen (4): hw/nvme: use QOM accessors hw/nvme: rename shadow doorbell related trace events hw/nvme: fix missing endian conversions for doorbell buffers hw/nvme: fix missing cq eventidx update hw/nvme/ctrl.c | 121 ++- hw/nvme/trace-events | 8 +-- 2 files changed, 78 insertions(+), 51 deletions(-) -- 2.38.1
Re: [PATCH 1/2] include/hw/virtio: Break inclusion loop
On Sat, Dec 10, 2022 at 02:39:14PM +0100, Markus Armbruster wrote: hw/virtio/virtio.h and hw/virtio/vhost.h include each other. The former doesn't actually need the latter, so drop that inclusion to break the loop. Signed-off-by: Markus Armbruster --- include/hw/virtio/virtio.h | 1 - hw/virtio/virtio.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) Reviewed-by: Stefano Garzarella diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index a973811cbf..8b68b69e00 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -22,7 +22,6 @@ #include "standard-headers/linux/virtio_config.h" #include "standard-headers/linux/virtio_ring.h" #include "qom/object.h" -#include "hw/virtio/vhost.h" /* * A guest should never accept this. It implies negotiation is broken diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index eb6347ab5d..faedf886da 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -26,6 +26,7 @@ #include "qemu/module.h" #include "qom/object_interfaces.h" #include "hw/virtio/virtio.h" +#include "hw/virtio/vhost.h" #include "migration/qemu-file-types.h" #include "qemu/atomic.h" #include "hw/virtio/virtio-bus.h" -- 2.37.3
Re: [PATCH v3 3/4] hw/nvme: fix missing endian conversions for doorbell buffers
On 12/12/22 12:32, Klaus Jensen wrote: From: Klaus Jensen The eventidx and doorbell value are not handling endianness correctly. Fix this. Fixes: 3f7fe8de3d49 ("hw/nvme: Implement shadow doorbell buffer support") Cc: qemu-sta...@nongnu.org Reported-by: Guenter Roeck Signed-off-by: Klaus Jensen --- hw/nvme/ctrl.c | 12 1 file changed, 8 insertions(+), 4 deletions(-) Reviewed-by: Philippe Mathieu-Daudé
Re: [PATCH] s390x/pci: reset ISM passthrough devices on shutdown and system reset
On 09/12/2022 20.57, Matthew Rosato wrote: ISM device firmware stores unique state information that can can cause a wholesale unmap of the associated IOMMU (e.g. when we get a termination signal for QEMU) to trigger firmware errors because firmware believes we are attempting to invalidate entries that are still in-use by the guest OS (when in fact that guest is in the process of being terminated or rebooted). To alleviate this, register both a shutdown notifier (for unexpected termination cases e.g. virsh destroy) as well as a reset callback (for cases like guest OS reboot). For each of these scenarios, trigger PCI device reset; this is enough to indicate to firmware that the IOMMU is no longer in-use by the guest OS, making it safe to invalidate any associated IOMMU entries. Fixes: 15d0e7942d3b ("s390x/pci: don't fence interpreted devices without MSI-X") Signed-off-by: Matthew Rosato --- hw/s390x/s390-pci-bus.c | 28 hw/s390x/s390-pci-vfio.c| 2 ++ include/hw/s390x/s390-pci-bus.h | 5 + 3 files changed, 35 insertions(+) diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c index 977e7daa15..02751f3597 100644 --- a/hw/s390x/s390-pci-bus.c +++ b/hw/s390x/s390-pci-bus.c @@ -24,6 +24,8 @@ #include "hw/pci/msi.h" #include "qemu/error-report.h" #include "qemu/module.h" +#include "sysemu/reset.h" +#include "sysemu/runstate.h" #ifndef DEBUG_S390PCI_BUS #define DEBUG_S390PCI_BUS 0 @@ -150,10 +152,30 @@ out: psccb->header.response_code = cpu_to_be16(rc); } +static void s390_pci_shutdown_notifier(Notifier *n, void *opaque) +{ +S390PCIBusDevice *pbdev = container_of(n, S390PCIBusDevice, + shutdown_notifier); + +pci_device_reset(pbdev->pdev); +} + +static void s390_pci_reset_cb(void *opaque) +{ +S390PCIBusDevice *pbdev = opaque; + +pci_device_reset(pbdev->pdev); +} + static void s390_pci_perform_unplug(S390PCIBusDevice *pbdev) { HotplugHandler *hotplug_ctrl; +if (pbdev->pft == ZPCI_PFT_ISM) { +notifier_remove(&pbdev->shutdown_notifier); +qemu_unregister_reset(s390_pci_reset_cb, pbdev); +} + /* Unplug the PCI device */ if (pbdev->pdev) { DeviceState *pdev = DEVICE(pbdev->pdev); @@ -,6 +1133,12 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, pbdev->fh |= FH_SHM_VFIO; pbdev->forwarding_assist = false; } +/* Register shutdown notifier and reset callback for ISM devices */ +if (pbdev->pft == ZPCI_PFT_ISM) { +pbdev->shutdown_notifier.notify = s390_pci_shutdown_notifier; +qemu_register_shutdown_notifier(&pbdev->shutdown_notifier); +qemu_register_reset(s390_pci_reset_cb, pbdev); +} } else { pbdev->fh |= FH_SHM_EMUL; /* Always intercept emulated devices */ diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c index 5f0adb0b4a..419763f829 100644 --- a/hw/s390x/s390-pci-vfio.c +++ b/hw/s390x/s390-pci-vfio.c @@ -122,6 +122,8 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev, /* The following values remain 0 until we support other FMB formats */ pbdev->zpci_fn.fmbl = 0; pbdev->zpci_fn.pft = 0; +/* Store function type separately for type-specific behavior */ +pbdev->pft = cap->pft; } Thanks, queued: https://gitlab.com/thuth/qemu/-/commits/s390x-next/ I had to adjust the hunk in s390_pci_read_base() due to a conflict with your earlier patch, please check whether it looks sane to you. Thomas
Re: [PATCH v1 04/24] vfio-user: add region cache
On 9/11/22 00:13, John Johnson wrote: cache VFIO_DEVICE_GET_REGION_INFO results to reduce memory alloc/free cycles and as prep work for vfio-user Signed-off-by: John G Johnson Signed-off-by: Elena Ufimtseva Signed-off-by: Jagannathan Raman --- hw/vfio/ccw.c | 5 - hw/vfio/common.c | 41 +++-- hw/vfio/igd.c | 23 +-- hw/vfio/migration.c | 2 -- hw/vfio/pci-quirks.c | 19 +-- hw/vfio/pci.c | 8 include/hw/vfio/vfio-common.h | 2 ++ 7 files changed, 51 insertions(+), 49 deletions(-) void vfio_put_base_device(VFIODevice *vbasedev) { +if (vbasedev->regions != NULL) { +int i; + +for (i = 0; i < vbasedev->num_regions; i++) { +g_free(vbasedev->regions[i]); +} +g_free(vbasedev->regions); +vbasedev->regions = NULL; +} + if (!vbasedev->group) { return; } @@ -2432,6 +2451,17 @@ int vfio_get_region_info(VFIODevice *vbasedev, int index, { size_t argsz = sizeof(struct vfio_region_info); +/* create region cache */ +if (vbasedev->regions == NULL) { +vbasedev->regions = g_new0(struct vfio_region_info *, + vbasedev->num_regions); +} +/* check cache */ +if (vbasedev->regions[index] != NULL) { +*info = vbasedev->regions[index]; +return 0; +} What about simply allocating/releasing once regions[] in vfio_instance_init / vfio_instance_finalize?
Re: [PATCH v1 06/24] vfio-user: Define type vfio_user_pci_dev_info
On 12/12/22 12:03, John Levon wrote: On Mon, Dec 12, 2022 at 10:01:33AM +0100, Cédric Le Goater wrote: diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 80b03a2..dc19869 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -19,6 +19,7 @@ */ #include "qemu/osdep.h" +#include CONFIG_DEVICES #include #include @@ -3421,3 +3422,91 @@ static void register_vfio_pci_dev_type(void) } type_init(register_vfio_pci_dev_type) + + +#ifdef CONFIG_VFIO_USER_PCI Why not introduce a new file hw/vfio/user.c file ? It would be cleaner. user.c is in this series, and holds the vfio-user implementation - it's not a PCI specific thing. So it would have to be hw/vfio/user_pci.c or something Or hw/vfio/pci-user.c
Re: [PATCH v3 4/4] hw/nvme: fix missing cq eventidx update
On 12/12/22 12:32, Klaus Jensen wrote: From: Klaus Jensen Prior to reading the shadow doorbell cq head, we have to update the eventidx. Otherwise, we risk that the driver will skip an mmio doorbell write. This happens on riscv64, as reported by Guenter. Adding the missing update to the cq eventidx fixes the issue. Fixes: 3f7fe8de3d49 ("hw/nvme: Implement shadow doorbell buffer support") Cc: qemu-sta...@nongnu.org Cc: qemu-ri...@nongnu.org Reported-by: Guenter Roeck Signed-off-by: Klaus Jensen --- hw/nvme/ctrl.c | 17 +++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 8af70f0216f0..3df29ea68b2f 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -1334,10 +1334,22 @@ static inline void nvme_blk_write(BlockBackend *blk, int64_t offset, } } static void nvme_update_cq_head(NvmeCQueue *cq) { -pci_dma_read(PCI_DEVICE(cq->ctrl), cq->db_addr, &cq->head, - sizeof(cq->head)); +uint32_t v; + +pci_dma_read(PCI_DEVICE(cq->ctrl), cq->db_addr, &v, sizeof(v)); + +cq->head = le32_to_cpu(v); Isn't this be part of the previous patch? trace_pci_nvme_update_cq_head(cq->cqid, cq->head); }
Re: [PATCH v3 4/4] hw/nvme: fix missing cq eventidx update
On Dec 12 12:37, Philippe Mathieu-Daudé wrote: > On 12/12/22 12:32, Klaus Jensen wrote: > > From: Klaus Jensen > > > > Prior to reading the shadow doorbell cq head, we have to update the > > eventidx. Otherwise, we risk that the driver will skip an mmio doorbell > > write. This happens on riscv64, as reported by Guenter. > > > > Adding the missing update to the cq eventidx fixes the issue. > > > > Fixes: 3f7fe8de3d49 ("hw/nvme: Implement shadow doorbell buffer support") > > Cc: qemu-sta...@nongnu.org > > Cc: qemu-ri...@nongnu.org > > Reported-by: Guenter Roeck > > Signed-off-by: Klaus Jensen > > --- > > hw/nvme/ctrl.c | 17 +++-- > > 1 file changed, 15 insertions(+), 2 deletions(-) > > > > diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c > > index 8af70f0216f0..3df29ea68b2f 100644 > > --- a/hw/nvme/ctrl.c > > +++ b/hw/nvme/ctrl.c > > @@ -1334,10 +1334,22 @@ static inline void nvme_blk_write(BlockBackend > > *blk, int64_t offset, > > } > > } > > > > static void nvme_update_cq_head(NvmeCQueue *cq) > > { > > -pci_dma_read(PCI_DEVICE(cq->ctrl), cq->db_addr, &cq->head, > > - sizeof(cq->head)); > > +uint32_t v; > > + > > +pci_dma_read(PCI_DEVICE(cq->ctrl), cq->db_addr, &v, sizeof(v)); > > + > > +cq->head = le32_to_cpu(v); > > Isn't this be part of the previous patch? > Argh, I screwed it up. I'll fix it. signature.asc Description: PGP signature
[PATCH v4 0/4] hw/nvme: fix broken shadow doorbells on some platforms
From: Klaus Jensen Guenter reports that hw/nvme is broken on riscv64[1] and big endian platforms[2]. This is a regression since 7.1, so this does not warrent an rc5 for 7.2. I'm sure Guenter can carry this patch in his tree, and maybe we can get this out in a stable release. On riscv, the issue is a missing cq eventidx update. I really wonder why this issue only shows up on riscv64. We have not observed this on other platforms (yet). Further, Guenter also reported problems on big-endian platforms. The issue here is missing endian conversions which patch 3 addresses. This also requires a fix for the Linux kernel that I am posting separately (can't link to it, chicken and egg problem). [1]: https://lore.kernel.org/qemu-devel/20221207174918.ga1151...@roeck-us.net/ [2]: https://lore.kernel.org/qemu-devel/20221209110022.ga3396...@roeck-us.net/ v4: - screwed up the rebase (Philippe) v3: - add patch to fix big-endian platforms v2: - use QOM accessor (Philippe) - added some cleanup patches in front Klaus Jensen (4): hw/nvme: use QOM accessors hw/nvme: rename shadow doorbell related trace events hw/nvme: fix missing endian conversions for doorbell buffers hw/nvme: fix missing cq eventidx update hw/nvme/ctrl.c | 121 ++- hw/nvme/trace-events | 8 +-- 2 files changed, 78 insertions(+), 51 deletions(-) -- 2.38.1
[PATCH v4 4/4] hw/nvme: fix missing cq eventidx update
From: Klaus Jensen Prior to reading the shadow doorbell cq head, we have to update the eventidx. Otherwise, we risk that the driver will skip an mmio doorbell write. This happens on riscv64, as reported by Guenter. Adding the missing update to the cq eventidx fixes the issue. Fixes: 3f7fe8de3d49 ("hw/nvme: Implement shadow doorbell buffer support") Cc: qemu-sta...@nongnu.org Cc: qemu-ri...@nongnu.org Reported-by: Guenter Roeck Signed-off-by: Klaus Jensen --- hw/nvme/ctrl.c | 10 ++ 1 file changed, 10 insertions(+) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index bb505131f5f9..3df29ea68b2f 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -1334,6 +1334,15 @@ static inline void nvme_blk_write(BlockBackend *blk, int64_t offset, } } +static void nvme_update_cq_eventidx(const NvmeCQueue *cq) +{ +uint32_t v = cpu_to_le32(cq->head); + +trace_pci_nvme_update_cq_eventidx(cq->cqid, cq->head); + +pci_dma_write(PCI_DEVICE(cq->ctrl), cq->ei_addr, &v, sizeof(v)); +} + static void nvme_update_cq_head(NvmeCQueue *cq) { uint32_t v; @@ -1358,6 +1367,7 @@ static void nvme_post_cqes(void *opaque) hwaddr addr; if (n->dbbuf_enabled) { +nvme_update_cq_eventidx(cq); nvme_update_cq_head(cq); } -- 2.38.1
[PATCH v4 3/4] hw/nvme: fix missing endian conversions for doorbell buffers
From: Klaus Jensen The eventidx and doorbell value are not handling endianness correctly. Fix this. Fixes: 3f7fe8de3d49 ("hw/nvme: Implement shadow doorbell buffer support") Cc: qemu-sta...@nongnu.org Reported-by: Guenter Roeck Signed-off-by: Klaus Jensen --- hw/nvme/ctrl.c | 19 +-- 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index cfab21b3436e..bb505131f5f9 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -1336,8 +1336,11 @@ static inline void nvme_blk_write(BlockBackend *blk, int64_t offset, static void nvme_update_cq_head(NvmeCQueue *cq) { -pci_dma_read(PCI_DEVICE(cq->ctrl), cq->db_addr, &cq->head, - sizeof(cq->head)); +uint32_t v; + +pci_dma_read(PCI_DEVICE(cq->ctrl), cq->db_addr, &v, sizeof(v)); + +cq->head = le32_to_cpu(v); trace_pci_nvme_update_cq_head(cq->cqid, cq->head); } @@ -6148,16 +6151,20 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req) static void nvme_update_sq_eventidx(const NvmeSQueue *sq) { +uint32_t v = cpu_to_le32(sq->tail); + trace_pci_nvme_update_sq_eventidx(sq->sqid, sq->tail); -pci_dma_write(PCI_DEVICE(sq->ctrl), sq->ei_addr, &sq->tail, - sizeof(sq->tail)); +pci_dma_write(PCI_DEVICE(sq->ctrl), sq->ei_addr, &v, sizeof(v)); } static void nvme_update_sq_tail(NvmeSQueue *sq) { -pci_dma_read(PCI_DEVICE(sq->ctrl), sq->db_addr, &sq->tail, - sizeof(sq->tail)); +uint32_t v; + +pci_dma_read(PCI_DEVICE(sq->ctrl), sq->db_addr, &v, sizeof(v)); + +sq->tail = le32_to_cpu(v); trace_pci_nvme_update_sq_tail(sq->sqid, sq->tail); } -- 2.38.1
Re: [PATCH] tests/qtest/libqos/e1000e: Remove "other" interrupts
On 10/11/2022 12.40, Akihiko Odaki wrote: The "other" kind of interrupts are not used in the tests. Signed-off-by: Akihiko Odaki --- tests/qtest/libqos/e1000e.h | 1 - tests/qtest/libqos/e1000e.c | 1 - 2 files changed, 2 deletions(-) diff --git a/tests/qtest/libqos/e1000e.h b/tests/qtest/libqos/e1000e.h index a22f5fdbad..3bf285af42 100644 --- a/tests/qtest/libqos/e1000e.h +++ b/tests/qtest/libqos/e1000e.h @@ -24,7 +24,6 @@ #define E1000E_RX0_MSG_ID (0) #define E1000E_TX0_MSG_ID (1) -#define E1000E_OTHER_MSG_ID (2) #define E1000E_TDLEN(0x3808) #define E1000E_TDT (0x3818) diff --git a/tests/qtest/libqos/e1000e.c b/tests/qtest/libqos/e1000e.c index 80b3e3db90..3b51bafcb7 100644 --- a/tests/qtest/libqos/e1000e.c +++ b/tests/qtest/libqos/e1000e.c @@ -32,7 +32,6 @@ #define E1000E_IVAR_TEST_CFG \ (((E1000E_RX0_MSG_ID | E1000_IVAR_INT_ALLOC_VALID) << E1000_IVAR_RXQ0_SHIFT) | \ ((E1000E_TX0_MSG_ID | E1000_IVAR_INT_ALLOC_VALID) << E1000_IVAR_TXQ0_SHIFT) | \ - ((E1000E_OTHER_MSG_ID | E1000_IVAR_INT_ALLOC_VALID) << E1000_IVAR_OTHER_SHIFT) | \ E1000_IVAR_TX_INT_EVERY_WB) Thanks, queued to my testing-next branch now: https://gitlab.com/thuth/qemu/-/commits/testing-next/ Thomas
[PATCH v4 1/4] hw/nvme: use QOM accessors
From: Klaus Jensen Replace various ->parent_obj use with the equivalent QOM accessors. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Klaus Jensen --- hw/nvme/ctrl.c | 89 +++--- 1 file changed, 48 insertions(+), 41 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index e54276dc1dc7..6b70c1e39831 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -449,7 +449,7 @@ static int nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size) return 0; } -return pci_dma_read(&n->parent_obj, addr, buf, size); +return pci_dma_read(PCI_DEVICE(n), addr, buf, size); } static int nvme_addr_write(NvmeCtrl *n, hwaddr addr, const void *buf, int size) @@ -469,7 +469,7 @@ static int nvme_addr_write(NvmeCtrl *n, hwaddr addr, const void *buf, int size) return 0; } -return pci_dma_write(&n->parent_obj, addr, buf, size); +return pci_dma_write(PCI_DEVICE(n), addr, buf, size); } static bool nvme_nsid_valid(NvmeCtrl *n, uint32_t nsid) @@ -514,24 +514,27 @@ static uint8_t nvme_sq_empty(NvmeSQueue *sq) static void nvme_irq_check(NvmeCtrl *n) { +PCIDevice *pci = PCI_DEVICE(n); uint32_t intms = ldl_le_p(&n->bar.intms); -if (msix_enabled(&(n->parent_obj))) { +if (msix_enabled(pci)) { return; } if (~intms & n->irq_status) { -pci_irq_assert(&n->parent_obj); +pci_irq_assert(pci); } else { -pci_irq_deassert(&n->parent_obj); +pci_irq_deassert(pci); } } static void nvme_irq_assert(NvmeCtrl *n, NvmeCQueue *cq) { +PCIDevice *pci = PCI_DEVICE(n); + if (cq->irq_enabled) { -if (msix_enabled(&(n->parent_obj))) { +if (msix_enabled(pci)) { trace_pci_nvme_irq_msix(cq->vector); -msix_notify(&(n->parent_obj), cq->vector); +msix_notify(pci, cq->vector); } else { trace_pci_nvme_irq_pin(); assert(cq->vector < 32); @@ -546,7 +549,7 @@ static void nvme_irq_assert(NvmeCtrl *n, NvmeCQueue *cq) static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq) { if (cq->irq_enabled) { -if (msix_enabled(&(n->parent_obj))) { +if (msix_enabled(PCI_DEVICE(n))) { return; } else { assert(cq->vector < 32); @@ -570,7 +573,7 @@ static void nvme_req_clear(NvmeRequest *req) static inline void nvme_sg_init(NvmeCtrl *n, NvmeSg *sg, bool dma) { if (dma) { -pci_dma_sglist_init(&sg->qsg, &n->parent_obj, 0); +pci_dma_sglist_init(&sg->qsg, PCI_DEVICE(n), 0); sg->flags = NVME_SG_DMA; } else { qemu_iovec_init(&sg->iov, 0); @@ -1333,7 +1336,7 @@ static inline void nvme_blk_write(BlockBackend *blk, int64_t offset, static void nvme_update_cq_head(NvmeCQueue *cq) { -pci_dma_read(&cq->ctrl->parent_obj, cq->db_addr, &cq->head, +pci_dma_read(PCI_DEVICE(cq->ctrl), cq->db_addr, &cq->head, sizeof(cq->head)); trace_pci_nvme_shadow_doorbell_cq(cq->cqid, cq->head); } @@ -1363,7 +1366,7 @@ static void nvme_post_cqes(void *opaque) req->cqe.sq_id = cpu_to_le16(sq->sqid); req->cqe.sq_head = cpu_to_le16(sq->head); addr = cq->dma_addr + cq->tail * n->cqe_size; -ret = pci_dma_write(&n->parent_obj, addr, (void *)&req->cqe, +ret = pci_dma_write(PCI_DEVICE(n), addr, (void *)&req->cqe, sizeof(req->cqe)); if (ret) { trace_pci_nvme_err_addr_write(addr); @@ -4615,6 +4618,7 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req) static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n) { +PCIDevice *pci = PCI_DEVICE(n); uint16_t offset = (cq->cqid << 3) + (1 << 2); n->cq[cq->cqid] = NULL; @@ -4625,8 +4629,8 @@ static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n) event_notifier_set_handler(&cq->notifier, NULL); event_notifier_cleanup(&cq->notifier); } -if (msix_enabled(&n->parent_obj)) { -msix_vector_unuse(&n->parent_obj, cq->vector); +if (msix_enabled(pci)) { +msix_vector_unuse(pci, cq->vector); } if (cq->cqid) { g_free(cq); @@ -4664,8 +4668,10 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t irq_enabled) { -if (msix_enabled(&n->parent_obj)) { -msix_vector_use(&n->parent_obj, vector); +PCIDevice *pci = PCI_DEVICE(n); + +if (msix_enabled(pci)) { +msix_vector_use(pci, vector); } cq->ctrl = n; cq->cqid = cqid; @@ -4716,7 +4722,7 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req) trace_pci_nvme_err_invalid_create_cq_addr(prp1); return NVME_INVALID_PRP_OFFSET | NVME_DNR; } -if (unlikely(!msix_enabled(&n->parent_obj) && vector)) { +if (unlikely(!msix_enabled(PCI_DEVICE(n)) && vector
Re: [PATCH] tests/qtest/e1000e-test: De-duplicate constants
On 10/11/2022 12.44, Akihiko Odaki wrote: De-duplicate constants found in e1000e_send_verify() to avoid mismatch and improve readability. Signed-off-by: Akihiko Odaki --- tests/qtest/e1000e-test.c | 19 +-- 1 file changed, 9 insertions(+), 10 deletions(-) Thanks, I added e1000e_receive_verify() to the commit description (since you've modified that, too) and added this to my testing-next branch: https://gitlab.com/thuth/qemu/-/commits/testing-next/ Thomas
Re: [PATCH] tests/qtest/libqos/e1000e: Correctly group register accesses
On 10/11/2022 12.45, Akihiko Odaki wrote: Add a newline after E1000_TCTL write and make it clear that E1000_TCTL write is what enabling transmit. Signed-off-by: Akihiko Odaki --- tests/qtest/libqos/e1000e.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/qtest/libqos/e1000e.c b/tests/qtest/libqos/e1000e.c index 80b3e3db90..0e5cceafe4 100644 --- a/tests/qtest/libqos/e1000e.c +++ b/tests/qtest/libqos/e1000e.c @@ -152,6 +152,7 @@ static void e1000e_pci_start_hw(QOSGraphObject *obj) /* Enable transmit */ e1000e_macreg_write(&d->e1000e, E1000_TCTL, E1000_TCTL_EN); + e1000e_macreg_write(&d->e1000e, E1000_RDBAL, (uint32_t)d->e1000e.rx_ring); e1000e_macreg_write(&d->e1000e, E1000_RDBAH, Thanks, queued: https://gitlab.com/thuth/qemu/-/commits/testing-next/ Thomas
[PATCH v4 2/4] hw/nvme: rename shadow doorbell related trace events
From: Klaus Jensen Rename the trace events related to writing the event index and reading the doorbell value to make it more clear that the event is associated with an actual update (write or read respectively). Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Klaus Jensen --- hw/nvme/ctrl.c | 11 +++ hw/nvme/trace-events | 8 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 6b70c1e39831..cfab21b3436e 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -1337,8 +1337,9 @@ static inline void nvme_blk_write(BlockBackend *blk, int64_t offset, static void nvme_update_cq_head(NvmeCQueue *cq) { pci_dma_read(PCI_DEVICE(cq->ctrl), cq->db_addr, &cq->head, -sizeof(cq->head)); -trace_pci_nvme_shadow_doorbell_cq(cq->cqid, cq->head); + sizeof(cq->head)); + +trace_pci_nvme_update_cq_head(cq->cqid, cq->head); } static void nvme_post_cqes(void *opaque) @@ -6147,16 +6148,18 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req) static void nvme_update_sq_eventidx(const NvmeSQueue *sq) { +trace_pci_nvme_update_sq_eventidx(sq->sqid, sq->tail); + pci_dma_write(PCI_DEVICE(sq->ctrl), sq->ei_addr, &sq->tail, sizeof(sq->tail)); -trace_pci_nvme_eventidx_sq(sq->sqid, sq->tail); } static void nvme_update_sq_tail(NvmeSQueue *sq) { pci_dma_read(PCI_DEVICE(sq->ctrl), sq->db_addr, &sq->tail, sizeof(sq->tail)); -trace_pci_nvme_shadow_doorbell_sq(sq->sqid, sq->tail); + +trace_pci_nvme_update_sq_tail(sq->sqid, sq->tail); } static void nvme_process_sq(void *opaque) diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events index fccb79f48973..b16f2260b4fd 100644 --- a/hw/nvme/trace-events +++ b/hw/nvme/trace-events @@ -84,8 +84,8 @@ pci_nvme_enqueue_event_noqueue(int queued) "queued %d" pci_nvme_enqueue_event_masked(uint8_t typ) "type 0x%"PRIx8"" pci_nvme_no_outstanding_aers(void) "ignoring event; no outstanding AERs" pci_nvme_enqueue_req_completion(uint16_t cid, uint16_t cqid, uint32_t dw0, uint32_t dw1, uint16_t status) "cid %"PRIu16" cqid %"PRIu16" dw0 0x%"PRIx32" dw1 0x%"PRIx32" status 0x%"PRIx16"" -pci_nvme_eventidx_cq(uint16_t cqid, uint16_t new_eventidx) "cqid %"PRIu16" new_eventidx %"PRIu16"" -pci_nvme_eventidx_sq(uint16_t sqid, uint16_t new_eventidx) "sqid %"PRIu16" new_eventidx %"PRIu16"" +pci_nvme_update_cq_eventidx(uint16_t cqid, uint16_t new_eventidx) "cqid %"PRIu16" new_eventidx %"PRIu16"" +pci_nvme_update_sq_eventidx(uint16_t sqid, uint16_t new_eventidx) "sqid %"PRIu16" new_eventidx %"PRIu16"" pci_nvme_mmio_read(uint64_t addr, unsigned size) "addr 0x%"PRIx64" size %d" pci_nvme_mmio_write(uint64_t addr, uint64_t data, unsigned size) "addr 0x%"PRIx64" data 0x%"PRIx64" size %d" pci_nvme_mmio_doorbell_cq(uint16_t cqid, uint16_t new_head) "cqid %"PRIu16" new_head %"PRIu16"" @@ -102,8 +102,8 @@ pci_nvme_mmio_start_success(void) "setting controller enable bit succeeded" pci_nvme_mmio_stopped(void) "cleared controller enable bit" pci_nvme_mmio_shutdown_set(void) "shutdown bit set" pci_nvme_mmio_shutdown_cleared(void) "shutdown bit cleared" -pci_nvme_shadow_doorbell_cq(uint16_t cqid, uint16_t new_shadow_doorbell) "cqid %"PRIu16" new_shadow_doorbell %"PRIu16"" -pci_nvme_shadow_doorbell_sq(uint16_t sqid, uint16_t new_shadow_doorbell) "sqid %"PRIu16" new_shadow_doorbell %"PRIu16"" +pci_nvme_update_cq_head(uint16_t cqid, uint16_t new_head) "cqid %"PRIu16" new_head %"PRIu16"" +pci_nvme_update_sq_tail(uint16_t sqid, uint16_t new_tail) "sqid %"PRIu16" new_tail %"PRIu16"" pci_nvme_open_zone(uint64_t slba, uint32_t zone_idx, int all) "open zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32"" pci_nvme_close_zone(uint64_t slba, uint32_t zone_idx, int all) "close zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32"" pci_nvme_finish_zone(uint64_t slba, uint32_t zone_idx, int all) "finish zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32"" -- 2.38.1
Re: [PATCH 20/30] meson: prepare move of QEMU_CFLAGS to meson
On Fri, Dec 9, 2022 at 3:34 PM Paolo Bonzini wrote: > > Clean up the handling of compiler flags in meson.build, splitting > the general flags that should be included in subprojects as well, > from warning flags that only apply to QEMU itself. The two were > mixed in both configure tests and meson tests. > > This split makes it easier to move the compiler tests piecewise > from configure to Meson. > > Signed-off-by: Paolo Bonzini Reviewed-by: Marc-André Lureau > --- > meson.build | 53 + > 1 file changed, 29 insertions(+), 24 deletions(-) > > diff --git a/meson.build b/meson.build > index 99c1bde4d154..dac343d14797 100644 > --- a/meson.build > +++ b/meson.build > @@ -190,10 +190,23 @@ endif > # Compiler flags # > ## > > -qemu_cflags = config_host['QEMU_CFLAGS'].split() > +qemu_common_flags = [] > +qemu_cflags = [] > +foreach arg : config_host['QEMU_CFLAGS'].split() > + if arg.startswith('-W') > +qemu_cflags += arg > + else > +qemu_common_flags += arg > + endif > +endforeach > qemu_objcflags = config_host['QEMU_OBJCFLAGS'].split() > qemu_ldflags = config_host['QEMU_LDFLAGS'].split() > > +if get_option('gprof') > + qemu_common_flags += ['-p'] > + qemu_ldflags += ['-p'] > +endif > + > if get_option('prefer_static') >qemu_ldflags += get_option('b_pie') ? '-static-pie' : '-static' > endif > @@ -207,10 +220,9 @@ if targetos == 'windows' >qemu_ldflags += cc.get_supported_link_arguments('-Wl,--dynamicbase', > '-Wl,--high-entropy-va') > endif > > -if get_option('gprof') > - qemu_cflags += ['-p'] > - qemu_objcflags += ['-p'] > - qemu_ldflags += ['-p'] > +# Exclude --warn-common with TSan to suppress warnings from the TSan > libraries. > +if targetos != 'sunos' and not config_host.has_key('CONFIG_TSAN') > + qemu_ldflags += cc.get_supported_link_arguments('-Wl,--warn-common') > endif > > # Specify linker-script with add_project_link_arguments so that it is not > placed > @@ -230,8 +242,7 @@ if get_option('fuzzing') >name: '-fsanitize-coverage-allowlist=/dev/null', > args: ['-fsanitize-coverage-allowlist=/dev/null', > '-fsanitize-coverage=trace-pc'] ) > - > add_global_arguments('-fsanitize-coverage-allowlist=instrumentation-filter', > - native: false, language: all_languages) > +qemu_common_flags += > ['-fsanitize-coverage-allowlist=instrumentation-filter'] >endif > >if get_option('fuzzing_engine') == '' > @@ -239,10 +250,8 @@ if get_option('fuzzing') > # compiled code. To build non-fuzzer binaries with --enable-fuzzing, > link > # everything with fsanitize=fuzzer-no-link. Otherwise, the linker will be > # unable to bind the fuzzer-related callbacks added by instrumentation. > -add_global_arguments('-fsanitize=fuzzer-no-link', > - native: false, language: all_languages) > -add_global_link_arguments('-fsanitize=fuzzer-no-link', > - native: false, language: all_languages) > +qemu_common_flags += ['-fsanitize=fuzzer-no-link'] > +qemu_ldflags += ['-fsanitize=fuzzer-no-link'] > # For the actual fuzzer binaries, we need to link against the libfuzzer > # library. They need to be configurable, to support OSS-Fuzz > fuzz_exe_ldflags = ['-fsanitize=fuzzer'] > @@ -253,6 +262,9 @@ if get_option('fuzzing') >endif > endif > > +add_global_arguments(qemu_common_flags, native: false, language: > all_languages) > +add_global_link_arguments(qemu_ldflags, native: false, language: > all_languages) > + > # Check that the C++ compiler exists and works with the C compiler. > link_language = 'c' > linker = cc > @@ -276,16 +288,9 @@ if 'cpp' in all_languages >endif > endif > > -# Exclude --warn-common with TSan to suppress warnings from the TSan > libraries. > -if targetos != 'sunos' and not config_host.has_key('CONFIG_TSAN') > - qemu_ldflags += linker.get_supported_link_arguments('-Wl,--warn-common') > -endif > - > -add_global_link_arguments(qemu_ldflags, native: false, language: > all_languages) > - > -add_global_arguments(qemu_cflags, native: false, language: 'c') > -add_global_arguments(qemu_cxxflags, native: false, language: 'cpp') > -add_global_arguments(qemu_objcflags, native: false, language: 'objc') > +add_project_arguments(qemu_cflags, native: false, language: 'c') > +add_project_arguments(qemu_cxxflags, native: false, language: 'cpp') > +add_project_arguments(qemu_objcflags, native: false, language: 'objc') > if targetos == 'linux' >add_project_arguments('-isystem', meson.current_source_dir() / > 'linux-headers', > '-isystem', 'linux-headers', > @@ -3778,12 +3783,12 @@ link_args = get_option(link_language + '_link_args') > if link_args.length() > 0 >summary_info += {'LDFLAGS': ' '.join(link_args)} > endif > -summary_info += {'QEMU_CFLAGS': ' '.join(qemu_cf
Re: CVMSEG Emulation
On Fri, 9 Dec 2022 at 21:03, Jiaxun Yang wrote: > > > > > 2022年12月9日 17:44,Christopher Wrogg 写道: > > > > I tried both. > > > > Option 1 > > What I did: > > #undef TARGET_VIRT_ADDR_SPACE_BITS and #define > > TARGET_VIRT_ADDR_SPACE_BITS 64 > > The Result: > > perror reports "Cannot allocate memory" > > Option 2: > > What I did: > > TARGET_VIRT_ADDR_SPACE_BITS for me is 30 so I masked by 0x3FFF > > The Result: > > The segfault persists and gdb reports the memory as inaccessible. > > Hmm this looks wired for me, by no chance TARGET_VIRT_ADDR_SPACE_BITS for MIPS > can be 30, on N64 ABI build it should be 48 and 32 for N32 or O32 build. > > It is defined in target/mips/cpu-param.h . For 32-bit user-mode we currently set it to 31, on the theory that userspace only gets access to the bottom half of the address space. -- PMM
Re: [PATCH v2 4/4] tests/qtest: Enable qtest build on Windows
On 25/11/2022 12.41, Bin Meng wrote: From: Bin Meng Now that we have fixed various test case issues as seen when running on Windows, let's enable the qtest build on Windows. Signed-off-by: Bin Meng Reviewed-by: Thomas Huth --- (no changes since v1) tests/qtest/meson.build | 6 -- 1 file changed, 6 deletions(-) diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build index c07a5b1a5f..f0ebb5fac6 100644 --- a/tests/qtest/meson.build +++ b/tests/qtest/meson.build @@ -1,9 +1,3 @@ -# All QTests for now are POSIX-only, but the dependencies are -# really in libqtest, not in the testcases themselves. -if not config_host.has_key('CONFIG_POSIX') - subdir_done() -endif - slow_qtests = { 'ahci-test' : 60, 'bios-tables-test' : 120, Thanks, all four patches queued to my testing-next branch now: https://gitlab.com/thuth/qemu/-/commits/testing-next/ Thomas
Re: [PATCH 21/30] build: move sanitizer tests to meson
Hi On Fri, Dec 9, 2022 at 3:42 PM Paolo Bonzini wrote: > > Signed-off-by: Paolo Bonzini > --- > configure | 151 - > docs/devel/build-system.rst| 4 - > meson.build| 63 +- > meson_options.txt | 4 + > scripts/meson-buildoptions.sh | 6 ++ > tests/qemu-iotests/meson.build | 2 +- > tests/unit/meson.build | 2 +- > 7 files changed, 73 insertions(+), 159 deletions(-) > > diff --git a/configure b/configure > index b0df6c3cf754..babcf5d28a85 100755 > --- a/configure > +++ b/configure > @@ -269,9 +269,6 @@ EXTRA_OBJCFLAGS="" > EXTRA_LDFLAGS="" > > debug_tcg="no" > -sanitizers="no" > -tsan="no" > -fortify_source="yes" > EXESUF="" > prefix="/usr/local" > qemu_suffix="qemu" > @@ -392,14 +389,6 @@ EOF >compile_object > } > > -check_include() { > -cat > $TMPC < -#include <$1> > -int main(void) { return 0; } > -EOF > - compile_object > -} > - > write_c_skeleton() { > cat > $TMPC < int main(void) { return 0; } > @@ -755,15 +744,6 @@ for opt do >debug_tcg="yes" >meson_option_parse --enable-debug-mutex "" >meson_option_add -Doptimization=0 > - fortify_source="no" > - ;; > - --enable-sanitizers) sanitizers="yes" > - ;; > - --disable-sanitizers) sanitizers="no" > - ;; > - --enable-tsan) tsan="yes" > - ;; > - --disable-tsan) tsan="no" >;; >--disable-tcg) tcg="disabled" > plugins="no" > @@ -971,8 +951,6 @@ Advanced options (experts only): > desired devices in configs/devices/) >--with-devices-ARCH=NAME override default configs/devices >--enable-debug enable common debug build options > - --enable-sanitizers enable default sanitizers > - --enable-tsanenable thread sanitizer >--disable-werror disable compilation abort on warning >--disable-stack-protector disable compiler-provided stack protection >--cpu=CPUBuild for host CPU [$cpu] > @@ -1547,91 +1525,6 @@ if ! compile_object "-Werror"; then > ccache_cpp2=yes > fi > > -# > -# clang does not support glibc + FORTIFY_SOURCE. > - > -if test "$fortify_source" != "no"; then > - if echo | $cc -dM -E - | grep __clang__ > /dev/null 2>&1 ; then > -fortify_source="no"; > - elif test -n "$cxx" && has $cxx && > - echo | $cxx -dM -E - | grep __clang__ >/dev/null 2>&1 ; then > -fortify_source="no"; > - else > -fortify_source="yes" > - fi > -fi > - > -## > -# checks for sanitizers > - > -have_asan=no > -have_ubsan=no > -have_asan_iface_h=no > -have_asan_iface_fiber=no > - > -if test "$sanitizers" = "yes" ; then > - write_c_skeleton > - if compile_prog "$CPU_CFLAGS -Werror -fsanitize=address" ""; then > - have_asan=yes > - fi > - > - # we could use a simple skeleton for flags checks, but this also > - # detect the static linking issue of ubsan, see also: > - # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=84285 > - cat > $TMPC << EOF > -#include > -int main(void) { > -void *tmp = malloc(10); > -if (tmp != NULL) { > -return *(int *)(tmp + 2); > -} > -return 1; > -} > -EOF > - if compile_prog "$CPU_CFLAGS -Werror -fsanitize=undefined" ""; then > - have_ubsan=yes > - fi > - > - if check_include "sanitizer/asan_interface.h" ; then > - have_asan_iface_h=yes > - fi > - > - cat > $TMPC << EOF > -#include > -int main(void) { > - __sanitizer_start_switch_fiber(0, 0, 0); > - return 0; > -} > -EOF > - if compile_prog "$CPU_CFLAGS -Werror -fsanitize=address" "" ; then > - have_asan_iface_fiber=yes > - fi > -fi > - > -# Thread sanitizer is, for now, much noisier than the other sanitizers; > -# keep it separate until that is not the case. > -if test "$tsan" = "yes" && test "$sanitizers" = "yes"; then > - error_exit "TSAN is not supported with other sanitiziers." > -fi > -have_tsan=no > -have_tsan_iface_fiber=no > -if test "$tsan" = "yes" ; then > - write_c_skeleton > - if compile_prog "$CPU_CFLAGS -Werror -fsanitize=thread" "" ; then > - have_tsan=yes > - fi > - cat > $TMPC << EOF > -#include > -int main(void) { > - __tsan_create_fiber(0); > - return 0; > -} > -EOF > - if compile_prog "$CPU_CFLAGS -Werror -fsanitize=thread" "" ; then > - have_tsan_iface_fiber=yes > - fi > -fi > - > ## > # functions to probe cross compilers > > @@ -2057,42 +1950,6 @@ case "$vfio_user_server" in > ;; > esac > > -## > -# End of CC checks > -# After here, no more $cc or $ld runs > - > -write_c_skeleton > - > -if test "$fortify_source" = "yes" ; then > - QEMU_CFLAGS="-U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 $QEMU_CFLAGS" > -fi > - > -if test "$have_asan" = "yes"; then > - QEMU_CFLAGS="-fsanitize=address $QEMU_CFLAGS" > - QEMU_LDFLAGS="-fsanitize
Re: [PATCH 23/30] build: move coroutine backend selection to meson
On Fri, Dec 9, 2022 at 3:37 PM Paolo Bonzini wrote: > > To simplify the code, rename coroutine-win32.c to match the option > passed to configure. > > Signed-off-by: Paolo Bonzini Reviewed-by: Marc-André Lureau > --- > configure | 62 --- > meson.build | 32 +- > meson_options.txt | 3 + > scripts/meson-buildoptions.py | 1 + > scripts/meson-buildoptions.sh | 3 + > ...{coroutine-win32.c => coroutine-windows.c} | 0 > util/meson.build | 2 +- > 7 files changed, 38 insertions(+), 65 deletions(-) > rename util/{coroutine-win32.c => coroutine-windows.c} (100%) > > diff --git a/configure b/configure > index fea9cbf3abd0..1f7c5bbba4b9 100755 > --- a/configure > +++ b/configure > @@ -275,7 +275,6 @@ softmmu="yes" > linux_user="" > bsd_user="" > pie="" > -coroutine="" > plugins="$default_feature" > meson="" > ninja="" > @@ -792,8 +791,6 @@ for opt do >;; >--enable-fdt=*) fdt="$optarg" >;; > - --with-coroutine=*) coroutine="$optarg" > - ;; >--with-git=*) git="$optarg" >;; >--with-git-submodules=*) > @@ -949,8 +946,6 @@ Advanced options (experts only): >--disable-werror disable compilation abort on warning >--disable-stack-protector disable compiler-provided stack protection >--cpu=CPUBuild for host CPU [$cpu] > - --with-coroutine=BACKEND coroutine backend. Supported options: > - ucontext, sigaltstack, windows >--enable-plugins > enable plugins via shared library loading >--disable-containers don't use containers for cross-building > @@ -1373,61 +1368,6 @@ case "$fdt" in > ;; > esac > > -## > -# check and set a backend for coroutine > - > -# We prefer ucontext, but it's not always possible. The fallback > -# is sigcontext. On Windows the only valid backend is the Windows > -# specific one. > - > -ucontext_works=no > -if test "$darwin" != "yes"; then > - cat > $TMPC << EOF > -#include > -#ifdef __stub_makecontext > -#error Ignoring glibc stub makecontext which will always fail > -#endif > -int main(void) { makecontext(0, 0, 0); return 0; } > -EOF > - if compile_prog "" "" ; then > -ucontext_works=yes > - fi > -fi > - > -if test "$coroutine" = ""; then > - if test "$mingw32" = "yes"; then > -coroutine=win32 > - elif test "$ucontext_works" = "yes"; then > -coroutine=ucontext > - else > -coroutine=sigaltstack > - fi > -else > - case $coroutine in > - windows) > -if test "$mingw32" != "yes"; then > - error_exit "'windows' coroutine backend only valid for Windows" > -fi > -# Unfortunately the user visible backend name doesn't match the > -# coroutine-*.c filename for this case, so we have to adjust it here. > -coroutine=win32 > -;; > - ucontext) > -if test "$ucontext_works" != "yes"; then > - error_exit "'ucontext' backend requested but makecontext not available" > -fi > -;; > - sigaltstack) > -if test "$mingw32" = "yes"; then > - error_exit "only the 'windows' coroutine backend is valid for Windows" > -fi > -;; > - *) > -error_exit "unknown coroutine backend $coroutine" > -;; > - esac > -fi > - > > # check if ccache is interfering with > # semantic analysis of macros > @@ -2002,8 +1942,6 @@ if [ "$bsd" = "yes" ] ; then >echo "CONFIG_BSD=y" >> $config_host_mak > fi > > -echo "CONFIG_COROUTINE_BACKEND=$coroutine" >> $config_host_mak > - > if test "$plugins" = "yes" ; then > echo "CONFIG_PLUGIN=y" >> $config_host_mak > fi > diff --git a/meson.build b/meson.build > index 7ee9f081d0a1..b9df49667a19 100644 > --- a/meson.build > +++ b/meson.build > @@ -211,6 +211,34 @@ if get_option('prefer_static') >qemu_ldflags += get_option('b_pie') ? '-static-pie' : '-static' > endif > > +coroutine_backend = get_option('coroutine_backend') > +ucontext_probe = ''' > + #include > + #ifdef __stub_makecontext > + #error Ignoring glibc stub makecontext which will always fail > + #endif > + int main(void) { makecontext(0, 0, 0); return 0; }''' > + > +# On Windows the only valid backend is the Windows specific one. > +# For POSIX prefer ucontext, but it's not always possible. The fallback > +# is sigcontext. > +supported_backends = [] > +if targetos == 'windows' > + supported_backends += ['windows'] > +else > + if targetos != 'darwin' and cc.links(ucontext_probe) > +supported_backends += ['ucontext'] > + endif > + supported_backends += ['sigaltstack'] > +endif > + > +if coroutine_backend == 'auto' > + coroutine_backend = supported_backends[0] > +elif coroutine_backend not in supported_backends > + error('"@0@" backend requested but not available. Available backends: > @1@' \ > +.format(coroutin
Re: [PATCH v2 0/2] qga: improve "syslog" domain logging
On 11/29/22 19:38, Andrey Drobyshev wrote: > These patches extend QGA logging interface, primarily under Windows > guests. They enable QGA to write to Windows event log, much like > syslog() on *nix. In addition we get rid of hardcoded log level used by > ga_log(). > > v2: > * Close event_log handle when doing cleanup_agent() > * Fix switch cases indentation as reported by scripts/checkpatch.pl > > Andrey Drobyshev (2): > qga-win: add logging to Windows event log > qga: map GLib log levels to system levels > > configure | 3 +++ > qga/installer/qemu-ga.wxs | 5 > qga/main.c| 50 +++ > qga/meson.build | 19 ++- > qga/messages-win32.mc | 9 +++ > 5 files changed, 81 insertions(+), 5 deletions(-) > create mode 100644 qga/messages-win32.mc > Could you please clarify the status of these patches?
Re: [PATCH 24/30] build: move stack protector flag selection to meson
Hi On Fri, Dec 9, 2022 at 3:42 PM Paolo Bonzini wrote: > > Signed-off-by: Paolo Bonzini Reviewed-by: Marc-André Lureau > --- > configure | 44 ++- > meson.build | 28 +- > meson_options.txt | 2 ++ > scripts/meson-buildoptions.sh | 3 +++ > 4 files changed, 34 insertions(+), 43 deletions(-) > > diff --git a/configure b/configure > index 1f7c5bbba4b9..5d31294f316f 100755 > --- a/configure > +++ b/configure > @@ -175,7 +175,7 @@ compile_prog() { >local_cflags="$1" >local_ldflags="$2" >do_cc $CFLAGS $EXTRA_CFLAGS $CONFIGURE_CFLAGS $QEMU_CFLAGS $local_cflags > -o $TMPE $TMPC \ > - $LDFLAGS $EXTRA_LDFLAGS $CONFIGURE_LDFLAGS $QEMU_LDFLAGS $local_ldflags > + $LDFLAGS $EXTRA_LDFLAGS $CONFIGURE_LDFLAGS $local_ldflags > } > > # symbolically link $1 to $2. Portable version of "ln -sf". > @@ -221,7 +221,6 @@ static="no" > cross_compile="no" > cross_prefix="" > host_cc="cc" > -stack_protector="" > use_containers="yes" > gdb_bin=$(command -v "gdb-multiarch" || command -v "gdb") > > @@ -370,8 +369,6 @@ sdl2_config="${SDL2_CONFIG-${cross_prefix}sdl2-config}" > QEMU_CFLAGS="-fno-strict-aliasing -fno-common -fwrapv" > QEMU_CFLAGS="-D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE > $QEMU_CFLAGS" > > -QEMU_LDFLAGS= > - > # Flags that are needed during configure but later taken care of by Meson > CONFIGURE_CFLAGS="-std=gnu11 -Wall" > CONFIGURE_LDFLAGS= > @@ -773,10 +770,6 @@ for opt do >;; >--disable-werror) werror="no" >;; > - --enable-stack-protector) stack_protector="yes" > - ;; > - --disable-stack-protector) stack_protector="no" > - ;; >--enable-cfi) >cfi="true"; >meson_option_add -Db_lto=true > @@ -944,7 +937,6 @@ Advanced options (experts only): >--with-devices-ARCH=NAME override default configs/devices >--enable-debug enable common debug build options >--disable-werror disable compilation abort on warning > - --disable-stack-protector disable compiler-provided stack protection >--cpu=CPUBuild for host CPU [$cpu] >--enable-plugins > enable plugins via shared library loading > @@ -1157,7 +1149,7 @@ EOF > optflag="$(echo $1 | sed -e 's/^-Wno-/-W/')" > do_objc -Werror $optflag \ >$OBJCFLAGS $EXTRA_OBJCFLAGS $CONFIGURE_OBJCFLAGS $QEMU_OBJCFLAGS \ > - -o $TMPE $TMPM $QEMU_LDFLAGS > + -o $TMPE $TMPM > } > > for flag in $gcc_flags; do > @@ -1169,37 +1161,6 @@ for flag in $gcc_flags; do > fi > done > > -if test "$stack_protector" != "no"; then > - cat > $TMPC << EOF > -int main(int argc, char *argv[]) > -{ > -char arr[64], *p = arr, *c = argv[argc - 1]; > -while (*c) { > -*p++ = *c++; > -} > -return 0; > -} > -EOF > - gcc_flags="-fstack-protector-strong -fstack-protector-all" > - sp_on=0 > - for flag in $gcc_flags; do > -# We need to check both a compile and a link, since some compiler > -# setups fail only on a .c->.o compile and some only at link time > -if compile_object "-Werror $flag" && > - compile_prog "-Werror $flag" ""; then > - QEMU_CFLAGS="$QEMU_CFLAGS $flag" > - QEMU_LDFLAGS="$QEMU_LDFLAGS $flag" > - sp_on=1 > - break > -fi > - done > - if test "$stack_protector" = yes; then > -if test $sp_on = 0; then > - error_exit "Stack protector not supported" > -fi > - fi > -fi > - > # Disable -Wmissing-braces on older compilers that warn even for > # the "universal" C zero initializer {0}. > cat > $TMPC << EOF > @@ -1968,7 +1929,6 @@ echo "PKG_CONFIG=${pkg_config_exe}" >> $config_host_mak > echo "CC=$cc" >> $config_host_mak > echo "QEMU_CFLAGS=$QEMU_CFLAGS" >> $config_host_mak > echo "QEMU_OBJCFLAGS=$QEMU_OBJCFLAGS" >> $config_host_mak > -echo "QEMU_LDFLAGS=$QEMU_LDFLAGS" >> $config_host_mak > echo "EXESUF=$EXESUF" >> $config_host_mak > > # use included Linux headers > diff --git a/meson.build b/meson.build > index b9df49667a19..c5a8dce9e1d6 100644 > --- a/meson.build > +++ b/meson.build > @@ -200,7 +200,7 @@ foreach arg : config_host['QEMU_CFLAGS'].split() >endif > endforeach > qemu_objcflags = config_host['QEMU_OBJCFLAGS'].split() > -qemu_ldflags = config_host['QEMU_LDFLAGS'].split() > +qemu_ldflags = [] > > if get_option('gprof') >qemu_common_flags += ['-p'] > @@ -211,6 +211,32 @@ if get_option('prefer_static') >qemu_ldflags += get_option('b_pie') ? '-static-pie' : '-static' > endif > > +if not get_option('stack_protector').disabled() > + stack_protector_probe = ''' > +int main(int argc, char *argv[]) > +{ > + char arr[64], *p = arr, *c = argv[argc - 1]; > + while (*c) { > + *p++ = *c++; > + } > + return 0; > +}''' > + have_stack_protector = false > + foreach arg : ['-fstack-protector-strong', '-fstack-protector-all'] > +# We need to check both a compile and a lin
Re: [PATCH] FreeBSD: Upgrade to 12.4 release
On 08/12/2022 07.52, Brad Smith wrote: FreeBSD: Upgrade to 12.4 release Signed-off-by: Brad Smith --- .gitlab-ci.d/cirrus.yml | 2 +- tests/vm/freebsd| 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) Thanks, queued to my testing-next branch: https://gitlab.com/thuth/qemu/-/commits/testing-next/ Thomas
Re: [PATCH v2 0/2] qga: improve "syslog" domain logging
Currently, there is a code freeze in QEMU for release 7.2. I will merge this after it https://wiki.qemu.org/Planning/7.2 Best Regards, Konstantin Kostiuk. On Mon, Dec 12, 2022 at 2:17 PM Andrey Drobyshev < andrey.drobys...@virtuozzo.com> wrote: > On 11/29/22 19:38, Andrey Drobyshev wrote: > > These patches extend QGA logging interface, primarily under Windows > > guests. They enable QGA to write to Windows event log, much like > > syslog() on *nix. In addition we get rid of hardcoded log level used by > > ga_log(). > > > > v2: > > * Close event_log handle when doing cleanup_agent() > > * Fix switch cases indentation as reported by scripts/checkpatch.pl > > > > Andrey Drobyshev (2): > > qga-win: add logging to Windows event log > > qga: map GLib log levels to system levels > > > > configure | 3 +++ > > qga/installer/qemu-ga.wxs | 5 > > qga/main.c| 50 +++ > > qga/meson.build | 19 ++- > > qga/messages-win32.mc | 9 +++ > > 5 files changed, 81 insertions(+), 5 deletions(-) > > create mode 100644 qga/messages-win32.mc > > > > Could you please clarify the status of these patches? > >
Re: [PATCH] tests/qtest/vhost-user-blk-test: don't abort all qtests on missing envar
On 25/11/2022 16.58, Christian Schoenebeck wrote: This test requires environment variable QTEST_QEMU_STORAGE_DAEMON_BINARY to be defined for running. If not, it would immediately abort all qtests and prevent other, unrelated tests from running. To fix that, just skip vhost-user-blk-test instead and log a message about missing environment variable. Signed-off-by: Christian Schoenebeck --- I also tried g_test_skip(errmsg) from the setup handlers instead, but it always caused the tests to abort with an error: ../tests/qtest/libqtest.c:179: kill_qemu() tried to terminate QEMU process but encountered exit status 1 (expected 0) I haven't further investigated. tests/qtest/vhost-user-blk-test.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/tests/qtest/vhost-user-blk-test.c b/tests/qtest/vhost-user-blk-test.c index 07a4c2d500..dc37f5af4d 100644 --- a/tests/qtest/vhost-user-blk-test.c +++ b/tests/qtest/vhost-user-blk-test.c @@ -983,6 +983,12 @@ static void register_vhost_user_blk_test(void) .before = vhost_user_blk_test_setup, }; +if (!getenv("QTEST_QEMU_STORAGE_DAEMON_BINARY")) { +g_test_message("QTEST_QEMU_STORAGE_DAEMON_BINARY not defined, " + "skipping vhost-user-blk-test"); +return; +} + /* * tests for vhost-user-blk and vhost-user-blk-pci * The tests are borrowed from tests/virtio-blk-test.c. But some tests Thanks, queued to my testing-next branch now: https://gitlab.com/thuth/qemu/-/commits/testing-next/ Thomas
Re: [RFC PATCH v2 03/22] i386/xen: Add xen-version machine property and init KVM Xen support
On 09/12/2022 09:55, David Woodhouse wrote: From: David Woodhouse This is a machine property for two main reasons. One is that it allows us to set it in default_machine_opts for the xenfv platform when not running on actual Xen. The other is that theoretically we *could* do this with TCG too; we'd just have to implement a bunch of the stuff that KVM already does for us. Signed-off-by: David Woodhouse --- hw/i386/pc.c| 32 +++ hw/i386/pc_piix.c | 10 +++-- include/hw/i386/pc.h| 3 +++ target/i386/kvm/kvm.c | 26 ++ target/i386/meson.build | 1 + target/i386/xen.c | 49 + target/i386/xen.h | 19 7 files changed, 138 insertions(+), 2 deletions(-) create mode 100644 target/i386/xen.c create mode 100644 target/i386/xen.h diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 546b703cb4..9bada1a8ff 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1811,6 +1811,32 @@ static void pc_machine_set_max_fw_size(Object *obj, Visitor *v, pcms->max_fw_size = value; } +static void pc_machine_get_xen_version(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ +PCMachineState *pcms = PC_MACHINE(obj); +uint32_t value = pcms->xen_version; + +visit_type_uint32(v, name, &value, errp); +} + +static void pc_machine_set_xen_version(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ +PCMachineState *pcms = PC_MACHINE(obj); +Error *error = NULL; +uint32_t value; + +visit_type_uint32(v, name, &value, &error); +if (error) { +error_propagate(errp, error); +return; +} + +pcms->xen_version = value; +} static void pc_machine_initfn(Object *obj) { @@ -1978,6 +2004,12 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) NULL, NULL); object_class_property_set_description(oc, PC_MACHINE_SMBIOS_EP, "SMBIOS Entry Point type [32, 64]"); + +object_class_property_add(oc, "xen-version", "uint32", +pc_machine_get_xen_version, pc_machine_set_xen_version, +NULL, NULL); +object_class_property_set_description(oc, "xen-version", +"Xen version to be emulated (in XENVER_version form e.g. 0x4000a for 4.10)"); } Since this is e properly of the general pc machine class, could it be made to report the actual version if running on real Xen and be read-only? AFAICT I could specify "accel=xen,xen-version=" and the feels like it should be an error. static const TypeInfo pc_machine_info = { diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 0ad0ed1603..13286d0739 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -876,7 +876,10 @@ static void xenfv_4_2_machine_options(MachineClass *m) pc_i440fx_4_2_machine_options(m); m->desc = "Xen Fully-virtualized PC"; m->max_cpus = HVM_MAX_VCPUS; -m->default_machine_opts = "accel=xen,suppress-vmdesc=on"; +if (xen_enabled()) +m->default_machine_opts = "accel=xen,suppress-vmdesc=on"; +else +m->default_machine_opts = "accel=kvm,xen-version=0x40002"; } DEFINE_PC_MACHINE(xenfv_4_2, "xenfv-4.2", pc_xen_hvm_init, @@ -888,7 +891,10 @@ static void xenfv_3_1_machine_options(MachineClass *m) m->desc = "Xen Fully-virtualized PC"; m->alias = "xenfv"; m->max_cpus = HVM_MAX_VCPUS; -m->default_machine_opts = "accel=xen,suppress-vmdesc=on"; +if (xen_enabled()) +m->default_machine_opts = "accel=xen,suppress-vmdesc=on"; +else +m->default_machine_opts = "accel=kvm,xen-version=0x30001"; } DEFINE_PC_MACHINE(xenfv, "xenfv-3.1", pc_xen_hvm_init, diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index c95333514e..9b14b18836 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -52,6 +52,9 @@ typedef struct PCMachineState { bool default_bus_bypass_iommu; uint64_t max_fw_size; +/* Xen HVM emulation */ +uint32_t xen_version; + /* ACPI Memory hotplug IO base address */ hwaddr memhp_io_base; diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index a213209379..0a2069b117 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -31,6 +31,7 @@ #include "sysemu/runstate.h" #include "kvm_i386.h" #include "sev.h" +#include "xen.h" #include "hyperv.h" #include "hyperv-proto.h" @@ -774,6 +775,17 @@ static inline bool freq_within_bounds(int freq, int target_freq) return false; } +static uint32_t kvm_arch_xen_version(MachineState *ms) +{ +uint32_t v = object_property_get_int(OBJECT(ms), "xen-version", NULL); + +/* If it was unset, return zero */ +if (v == (uint32_t) -1) +return 0; + +return
Re: [PATCH v4 3/4] hw/nvme: fix missing endian conversions for doorbell buffers
On 12/12/22 12:44, Klaus Jensen wrote: From: Klaus Jensen The eventidx and doorbell value are not handling endianness correctly. Fix this. Fixes: 3f7fe8de3d49 ("hw/nvme: Implement shadow doorbell buffer support") Cc: qemu-sta...@nongnu.org Reported-by: Guenter Roeck Signed-off-by: Klaus Jensen --- hw/nvme/ctrl.c | 19 +-- 1 file changed, 13 insertions(+), 6 deletions(-) Reviewed-by: Philippe Mathieu-Daudé
Re: [PATCH 25/30] build: move warning flag selection to meson
Hi On Fri, Dec 9, 2022 at 3:48 PM Paolo Bonzini wrote: > > Meson already knows to test with the positive form of the flag, which > simplifies the test. Warnings are now tested explicitly for the C++ > compiler, instead of hardcoding those that are only available for > the C language. > > At this point all compiler flags in QEMU_CFLAGS are global and only > depend on the OS. No feature tests are performed in configure. > > Signed-off-by: Paolo Bonzini Reviewed-by: Marc-André Lureau > --- > configure| 94 > contrib/plugins/Makefile | 3 +- > meson.build | 72 -- > 3 files changed, 51 insertions(+), 118 deletions(-) > > diff --git a/configure b/configure > index 5d31294f316f..6df61f4337e4 100755 > --- a/configure > +++ b/configure > @@ -75,7 +75,6 @@ fi > TMPB="qemu-conf" > TMPC="${TMPDIR1}/${TMPB}.c" > TMPO="${TMPDIR1}/${TMPB}.o" > -TMPM="${TMPDIR1}/${TMPB}.m" > TMPE="${TMPDIR1}/${TMPB}.exe" > > rm -f config.log > @@ -157,15 +156,6 @@ do_cc() { > do_compiler_werror "$cc" $CPU_CFLAGS "$@" > } > > -do_objc() { > -do_compiler_werror "$objcc" $CPU_CFLAGS "$@" > -} > - > -# Append $2 to the variable named $1, with space separation > -add_to() { > -eval $1=\${$1:+\"\$$1 \"}\$2 > -} > - > compile_object() { >local_cflags="$1" >do_cc $CFLAGS $EXTRA_CFLAGS $CONFIGURE_CFLAGS $QEMU_CFLAGS $local_cflags > -c -o $TMPO $TMPC > @@ -1091,89 +1081,6 @@ if ! compile_prog "" "" ; then > error_exit "You need at least GCC v7.4 or Clang v6.0 (or XCode Clang > v10.0)" > fi > > -# Accumulate -Wfoo and -Wno-bar separately. > -# We will list all of the enable flags first, and the disable flags second. > -# Note that we do not add -Werror, because that would enable it for all > -# configure tests. If a configure test failed due to -Werror this would > -# just silently disable some features, so it's too error prone. > - > -warn_flags= > -add_to warn_flags -Wundef > -add_to warn_flags -Wwrite-strings > -add_to warn_flags -Wmissing-prototypes > -add_to warn_flags -Wstrict-prototypes > -add_to warn_flags -Wredundant-decls > -add_to warn_flags -Wold-style-declaration > -add_to warn_flags -Wold-style-definition > -add_to warn_flags -Wtype-limits > -add_to warn_flags -Wformat-security > -add_to warn_flags -Wformat-y2k > -add_to warn_flags -Winit-self > -add_to warn_flags -Wignored-qualifiers > -add_to warn_flags -Wempty-body > -add_to warn_flags -Wnested-externs > -add_to warn_flags -Wendif-labels > -add_to warn_flags -Wexpansion-to-defined > -add_to warn_flags -Wimplicit-fallthrough=2 > - > -nowarn_flags= > -add_to nowarn_flags -Wno-initializer-overrides > -add_to nowarn_flags -Wno-missing-include-dirs > -add_to nowarn_flags -Wno-shift-negative-value > -add_to nowarn_flags -Wno-string-plus-int > -add_to nowarn_flags -Wno-typedef-redefinition > -add_to nowarn_flags -Wno-tautological-type-limit-compare > -add_to nowarn_flags -Wno-psabi > -add_to nowarn_flags -Wno-gnu-variable-sized-type-not-at-end > - > -gcc_flags="$warn_flags $nowarn_flags" > - > -cc_has_warning_flag() { > -write_c_skeleton; > - > -# Use the positive sense of the flag when testing for -Wno-wombat > -# support (gcc will happily accept the -Wno- form of unknown > -# warning options). > -optflag="$(echo $1 | sed -e 's/^-Wno-/-W/')" > -compile_prog "-Werror $optflag" "" > -} > - > -objcc_has_warning_flag() { > -cat > $TMPM < -int main(void) { return 0; } > -EOF > - > -# Use the positive sense of the flag when testing for -Wno-wombat > -# support (gcc will happily accept the -Wno- form of unknown > -# warning options). > -optflag="$(echo $1 | sed -e 's/^-Wno-/-W/')" > -do_objc -Werror $optflag \ > - $OBJCFLAGS $EXTRA_OBJCFLAGS $CONFIGURE_OBJCFLAGS $QEMU_OBJCFLAGS \ > - -o $TMPE $TMPM > -} > - > -for flag in $gcc_flags; do > -if cc_has_warning_flag $flag ; then > -QEMU_CFLAGS="$QEMU_CFLAGS $flag" > -fi > -if objcc_has_warning_flag $flag ; then > -QEMU_OBJCFLAGS="$QEMU_OBJCFLAGS $flag" > -fi > -done > - > -# Disable -Wmissing-braces on older compilers that warn even for > -# the "universal" C zero initializer {0}. > -cat > $TMPC << EOF > -struct { > - int a[2]; > -} x = {0}; > -EOF > -if compile_object "-Werror" "" ; then > - : > -else > - QEMU_CFLAGS="$QEMU_CFLAGS -Wno-missing-braces" > -fi > - > # Resolve default for --enable-plugins > if test "$static" = "yes" ; then >if test "$plugins" = "yes"; then > @@ -1928,7 +1835,6 @@ echo "NINJA=$ninja" >> $config_host_mak > echo "PKG_CONFIG=${pkg_config_exe}" >> $config_host_mak > echo "CC=$cc" >> $config_host_mak > echo "QEMU_CFLAGS=$QEMU_CFLAGS" >> $config_host_mak > -echo "QEMU_OBJCFLAGS=$QEMU_OBJCFLAGS" >> $config_host_mak > echo "EXESUF=$EXESUF" >> $config_host_mak > > # use included Linux headers > diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile > index 8a316cd76f2f..b2b9
[PATCH 03/15] block: Pull polling out of bdrv_parent_drained_begin_single()
Only one caller of bdrv_parent_drained_begin_single() passes poll=true; move the polling to that one caller. While this requires exposing bdrv_parent_drained_poll_single to outside block/io.c, this is not a big deal because the bdrv_parent_drained_*_single functions are really internal between block.c and block/io.c. So make that clear while we're at it, by moving them to block_int-io.h. Based on a patch by Kevin Wolf . Signed-off-by: Paolo Bonzini --- block.c | 4 +++- block/io.c | 10 +++--- include/block/block-io.h | 15 --- include/block/block_int-io.h | 21 + 4 files changed, 27 insertions(+), 23 deletions(-) diff --git a/block.c b/block.c index 2f2123f4a4e5..c542a0a33358 100644 --- a/block.c +++ b/block.c @@ -2830,7 +2830,9 @@ static void bdrv_replace_child_noperm(BdrvChild *child, */ new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); if (new_bs_quiesce_counter && !child->quiesced_parent) { -bdrv_parent_drained_begin_single(child, true); +bdrv_parent_drained_begin_single(child); +AIO_WAIT_WHILE(bdrv_child_get_parent_aio_context(child), + bdrv_parent_drained_poll_single(child)); } if (old_bs) { diff --git a/block/io.c b/block/io.c index 571ff8c6493a..fbd9 100644 --- a/block/io.c +++ b/block/io.c @@ -53,7 +53,7 @@ static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore) if (c == ignore) { continue; } -bdrv_parent_drained_begin_single(c, false); +bdrv_parent_drained_begin_single(c); } } @@ -81,7 +81,7 @@ static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore) } } -static bool bdrv_parent_drained_poll_single(BdrvChild *c) +bool bdrv_parent_drained_poll_single(BdrvChild *c) { if (c->klass->drained_poll) { return c->klass->drained_poll(c); @@ -105,9 +105,8 @@ static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore, return busy; } -void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll) +void bdrv_parent_drained_begin_single(BdrvChild *c) { -AioContext *ctx = bdrv_child_get_parent_aio_context(c); IO_OR_GS_CODE(); assert(!c->quiesced_parent); @@ -116,9 +115,6 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll) if (c->klass->drained_begin) { c->klass->drained_begin(c); } -if (poll) { -AIO_WAIT_WHILE(ctx, bdrv_parent_drained_poll_single(c)); -} } static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src) diff --git a/include/block/block-io.h b/include/block/block-io.h index 0e0cd1249705..10659a3f246c 100644 --- a/include/block/block-io.h +++ b/include/block/block-io.h @@ -305,21 +305,6 @@ bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); int co_wrapper_mixed bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); -/** - * bdrv_parent_drained_begin_single: - * - * Begin a quiesced section for the parent of @c. If @poll is true, wait for - * any pending activity to cease. - */ -void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll); - -/** - * bdrv_parent_drained_end_single: - * - * End a quiesced section for the parent of @c. - */ -void bdrv_parent_drained_end_single(BdrvChild *c); - /** * bdrv_drain_poll: * diff --git a/include/block/block_int-io.h b/include/block/block_int-io.h index 8bc061ebb895..0ced9c025acb 100644 --- a/include/block/block_int-io.h +++ b/include/block/block_int-io.h @@ -179,4 +179,25 @@ void bdrv_bsc_invalidate_range(BlockDriverState *bs, */ void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes); +/** + * bdrv_parent_drained_begin_single: + * + * Begin a quiesced section for the parent of @c. + */ +void bdrv_parent_drained_begin_single(BdrvChild *c); + +/** + * bdrv_parent_drained_begin_single: + * + * Check whether the parent of @c has quiesced. + */ +bool bdrv_parent_drained_poll_single(BdrvChild *c); + +/** + * bdrv_parent_drained_end_single: + * + * End a quiesced section for the parent of @c. + */ +void bdrv_parent_drained_end_single(BdrvChild *c); + #endif /* BLOCK_INT_IO_H */ -- 2.38.1
[PATCH 06/15] tests/qemu-iotests/030: test_stream_parallel should use auto_finalize=False
From: Emanuele Giuseppe Esposito First, use run_job() instead of the current logic to run the stream job. Then, use auto_finalize=False to be sure that the job is not automatically deleted once it is done. In this way, if the job finishes before we want, it is not finalized yet so the other commands can still execute without failing. run_job() will then take care of calling job-finalize. Signed-off-by: Emanuele Giuseppe Esposito Suggested-by: Kevin Wolf Message-Id: <20220314131854.2202651-11-eespo...@redhat.com> Signed-off-by: Paolo Bonzini --- tests/qemu-iotests/030 | 12 +++- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030 index 98595d47fec3..e5c13cb5fe4c 100755 --- a/tests/qemu-iotests/030 +++ b/tests/qemu-iotests/030 @@ -256,7 +256,7 @@ class TestParallelOps(iotests.QMPTestCase): pending_jobs.append(job_id) result = self.vm.qmp('block-stream', device=node_name, job_id=job_id, bottom=f'node{i-1}', - speed=1024) + speed=1024, auto_finalize=False) self.assert_qmp(result, 'return', {}) # Do this in reverse: After unthrottling them, some jobs may finish @@ -272,14 +272,8 @@ class TestParallelOps(iotests.QMPTestCase): result = self.vm.qmp('block-job-set-speed', device=job, speed=0) self.assert_qmp(result, 'return', {}) -# Wait for all jobs to be finished. -while len(pending_jobs) > 0: -for event in self.vm.get_qmp_events(wait=True): -if event['event'] == 'BLOCK_JOB_COMPLETED': -job_id = self.dictpath(event, 'data/device') -self.assertTrue(job_id in pending_jobs) -self.assert_qmp_absent(event, 'data/error') -pending_jobs.remove(job_id) +for job in pending_jobs: +self.vm.run_job(job=job, auto_finalize=False) self.assert_no_active_block_jobs() self.vm.shutdown() -- 2.38.1
[PATCH 05/15] tests/unit/test-bdrv-drain.c: graph setup functions can't run in coroutines
From: Emanuele Giuseppe Esposito Graph initialization functions like blk_new(), bdrv_new() and so on should not run in a coroutine. In fact, they might invoke a drain (for example blk_insert_bs eventually calls bdrv_replace_child_noperm) that in turn can invoke callbacks like bdrv_do_drained_begin_quiesce(), that asserts exactly that we are not in a coroutine. Move the initialization phase of test_drv_cb and test_quiesce_common outside the coroutine logic. Signed-off-by: Emanuele Giuseppe Esposito Message-Id: <20220314131854.2202651-9-eespo...@redhat.com> Signed-off-by: Paolo Bonzini --- tests/unit/test-bdrv-drain.c | 110 ++- 1 file changed, 69 insertions(+), 41 deletions(-) diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c index 3be214f30186..f677f1d9fc25 100644 --- a/tests/unit/test-bdrv-drain.c +++ b/tests/unit/test-bdrv-drain.c @@ -126,7 +126,8 @@ static void aio_ret_cb(void *opaque, int ret) } typedef struct CallInCoroutineData { -void (*entry)(void); +void (*entry)(void *); +void *arg; bool done; } CallInCoroutineData; @@ -134,15 +135,16 @@ static coroutine_fn void call_in_coroutine_entry(void *opaque) { CallInCoroutineData *data = opaque; -data->entry(); +data->entry(data->arg); data->done = true; } -static void call_in_coroutine(void (*entry)(void)) +static void call_in_coroutine(void (*entry)(void *), void *arg) { Coroutine *co; CallInCoroutineData data = { .entry = entry, +.arg= arg, .done = false, }; @@ -199,26 +201,28 @@ static void do_drain_end_unlocked(enum drain_type drain_type, BlockDriverState * } } -static void test_drv_cb_common(enum drain_type drain_type, bool recursive) -{ +typedef struct TestDriverCBData { +enum drain_type drain_type; +bool recursive; BlockBackend *blk; BlockDriverState *bs, *backing; -BDRVTestState *s, *backing_s; +} TestDriverCBData; + +static void test_drv_cb_common(void *arg) +{ +TestDriverCBData *data = arg; +BlockBackend *blk = data->blk; +BlockDriverState *bs = data->bs; +BlockDriverState *backing = data->backing; +enum drain_type drain_type = data->drain_type; +bool recursive = data->recursive; +BDRVTestState *s = bs->opaque; +BDRVTestState *backing_s = backing->opaque; BlockAIOCB *acb; int aio_ret; QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, 0); -blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); -bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, - &error_abort); -s = bs->opaque; -blk_insert_bs(blk, bs, &error_abort); - -backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); -backing_s = backing->opaque; -bdrv_set_backing_hd(bs, backing, &error_abort); - /* Simple bdrv_drain_all_begin/end pair, check that CBs are called */ g_assert_cmpint(s->drain_count, ==, 0); g_assert_cmpint(backing_s->drain_count, ==, 0); @@ -252,44 +256,67 @@ static void test_drv_cb_common(enum drain_type drain_type, bool recursive) g_assert_cmpint(s->drain_count, ==, 0); g_assert_cmpint(backing_s->drain_count, ==, 0); +} -bdrv_unref(backing); -bdrv_unref(bs); -blk_unref(blk); +static void test_common_cb(enum drain_type drain_type, bool in_coroutine, + void (*cb)(void *)) +{ +TestDriverCBData data; + +data.drain_type = drain_type; +data.recursive = (drain_type != BDRV_DRAIN); + +data.blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); +data.bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, + &error_abort); +blk_insert_bs(data.blk, data.bs, &error_abort); + +data.backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); +bdrv_set_backing_hd(data.bs, data.backing, &error_abort); + +if (in_coroutine) { +call_in_coroutine(cb, &data); +} else { +cb(&data); +} + +bdrv_unref(data.backing); +bdrv_unref(data.bs); +blk_unref(data.blk); +} + +static void test_drv_cb(enum drain_type drain_type, bool in_coroutine) +{ +test_common_cb(drain_type, in_coroutine, test_drv_cb_common); } static void test_drv_cb_drain_all(void) { -test_drv_cb_common(BDRV_DRAIN_ALL, true); +test_drv_cb(BDRV_DRAIN_ALL, false); } static void test_drv_cb_drain(void) { -test_drv_cb_common(BDRV_DRAIN, false); +test_drv_cb(BDRV_DRAIN, false); } static void test_drv_cb_co_drain_all(void) { -call_in_coroutine(test_drv_cb_drain_all); +test_drv_cb(BDRV_DRAIN_ALL, true); } static void test_drv_cb_co_drain(void) { -call_in_coroutine(test_drv_cb_drain); +test_drv_cb(BDRV_DRAIN, true); } -static void test_quiesce_common(enum drain_type drain_type, bool recursive) +static void test_quiesce_common(void *arg) { -BlockBa
[PATCH 09/15] block-backend: make global properties write-once
The three global properties allow_aio_context_change, disable_request_queuing and allow_write_before_eof are always set for the whole life of a BlockBackend. Make this clear by removing the possibility of clearing them, and by marking the corresponding function GLOBAL_STATE_CODE(). Signed-off-by: Paolo Bonzini --- block/block-backend.c | 24 ++-- block/commit.c| 4 ++-- block/export/export.c | 2 +- block/mirror.c| 4 ++-- block/parallels.c | 2 +- block/qcow.c | 2 +- block/qcow2.c | 2 +- block/qed.c | 2 +- block/stream.c| 4 ++-- block/vdi.c | 2 +- block/vhdx.c | 2 +- block/vmdk.c | 4 ++-- block/vpc.c | 2 +- include/sysemu/block-backend-io.h | 6 +++--- nbd/server.c | 3 +-- tests/unit/test-bdrv-drain.c | 4 ++-- tests/unit/test-block-iothread.c | 2 +- 17 files changed, 37 insertions(+), 34 deletions(-) diff --git a/block/block-backend.c b/block/block-backend.c index c4a884b86c2b..fe42d53d655d 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -73,8 +73,13 @@ struct BlockBackend { uint64_t shared_perm; bool disable_perm; +/* + * Can only become true; should be written before any requests is + * submitted to the BlockBackend. + */ bool allow_aio_context_change; bool allow_write_beyond_eof; +bool disable_request_queuing; /* Protected by BQL */ NotifierList remove_bs_notifiers, insert_bs_notifiers; @@ -82,7 +87,6 @@ struct BlockBackend { int quiesce_counter; CoQueue queued_requests; -bool disable_request_queuing; VMChangeStateEntry *vmsh; bool force_allow_inactivate; @@ -1217,22 +1221,22 @@ void blk_iostatus_set_err(BlockBackend *blk, int error) } } -void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow) +void blk_allow_write_beyond_eof(BlockBackend *blk) { -IO_CODE(); -blk->allow_write_beyond_eof = allow; +GLOBAL_STATE_CODE(); +blk->allow_write_beyond_eof = true; } -void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow) +void blk_allow_aio_context_change(BlockBackend *blk) { -IO_CODE(); -blk->allow_aio_context_change = allow; +GLOBAL_STATE_CODE(); +blk->allow_aio_context_change = true; } -void blk_set_disable_request_queuing(BlockBackend *blk, bool disable) +void blk_disable_request_queuing(BlockBackend *blk) { -IO_CODE(); -blk->disable_request_queuing = disable; +GLOBAL_STATE_CODE(); +blk->disable_request_queuing = true; } static int blk_check_byte_request(BlockBackend *blk, int64_t offset, diff --git a/block/commit.c b/block/commit.c index b34634176797..7a448838301b 100644 --- a/block/commit.c +++ b/block/commit.c @@ -379,7 +379,7 @@ void commit_start(const char *job_id, BlockDriverState *bs, if (ret < 0) { goto fail; } -blk_set_disable_request_queuing(s->base, true); +blk_disable_request_queuing(s->base); s->base_bs = base; /* Required permissions are already taken with block_job_add_bdrv() */ @@ -388,7 +388,7 @@ void commit_start(const char *job_id, BlockDriverState *bs, if (ret < 0) { goto fail; } -blk_set_disable_request_queuing(s->top, true); +blk_disable_request_queuing(s->top); s->backing_file_str = g_strdup(backing_file_str); s->on_error = on_error; diff --git a/block/export/export.c b/block/export/export.c index 7cc0c25c1c9f..6abfe5becb88 100644 --- a/block/export/export.c +++ b/block/export/export.c @@ -155,7 +155,7 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp) blk = blk_new(ctx, perm, BLK_PERM_ALL); if (!fixed_iothread) { -blk_set_allow_aio_context_change(blk, true); +blk_allow_aio_context_change(blk); } ret = blk_insert_bs(blk, bs, errp); diff --git a/block/mirror.c b/block/mirror.c index 251adc5ae02a..28e76e9dbf55 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -1768,8 +1768,8 @@ static BlockJob *mirror_start_job( * ensure that. */ blk_set_force_allow_inactivate(s->target); } -blk_set_allow_aio_context_change(s->target, true); -blk_set_disable_request_queuing(s->target, true); +blk_allow_aio_context_change(s->target); +blk_disable_request_queuing(s->target); s->replaces = g_strdup(replaces); s->on_source_error = on_source_error; diff --git a/block/parallels.c b/block/parallels.c index bbea2f2221ff..44e18a8cc75d 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -576,7 +576,7 @@ static int coroutine_fn parallels_co_create(BlockdevCreateOptions* opts, ret = -EPERM; goto out; } -blk_set_allow_write_beyond_eof(blk, true); +blk_allow_write_beyond_eof(blk);
[PATCH 13/15] block: second argument of bdrv_do_drained_end is always NULL
Remove it and unify the function with bdrv_drained_end. Signed-off-by: Paolo Bonzini --- block/io.c | 21 ++--- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/block/io.c b/block/io.c index 695c8f3f5faa..c2962adf8d2d 100644 --- a/block/io.c +++ b/block/io.c @@ -69,14 +69,11 @@ void bdrv_parent_drained_end_single(BdrvChild *c) } } -static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore) +static void bdrv_parent_drained_end(BlockDriverState *bs) { BdrvChild *c; QLIST_FOREACH(c, &bs->parents, next_parent) { -if (c == ignore) { -continue; -} bdrv_parent_drained_end_single(c); } } @@ -262,7 +259,6 @@ static bool bdrv_drain_poll_top_level(BlockDriverState *bs, static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, bool poll); -static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent); static void bdrv_co_drain_bh_cb(void *opaque) { @@ -381,10 +377,11 @@ void bdrv_drained_begin(BlockDriverState *bs) * This function does not poll, nor must any of its recursively called * functions. */ -static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent) +void bdrv_drained_end(BlockDriverState *bs) { int old_quiesce_counter; +IO_OR_GS_CODE(); assert(bs->quiesce_counter > 0); /* Re-enable things in child-to-parent order */ @@ -393,17 +390,11 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent) if (bs->drv && bs->drv->bdrv_drain_end) { bs->drv->bdrv_drain_end(bs); } -bdrv_parent_drained_end(bs, parent); +bdrv_parent_drained_end(bs); aio_enable_external(bdrv_get_aio_context(bs)); } } -void bdrv_drained_end(BlockDriverState *bs) -{ -IO_OR_GS_CODE(); -bdrv_do_drained_end(bs, NULL); -} - void bdrv_drain(BlockDriverState *bs) { IO_OR_GS_CODE(); @@ -504,7 +495,7 @@ void bdrv_drain_all_end_quiesce(BlockDriverState *bs) g_assert(!bs->refcnt); while (bs->quiesce_counter) { -bdrv_do_drained_end(bs, NULL); +bdrv_drained_end(bs); } } @@ -526,7 +517,7 @@ void bdrv_drain_all_end(void) AioContext *aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); -bdrv_do_drained_end(bs, NULL); +bdrv_drained_end(bs); aio_context_release(aio_context); } -- 2.38.1
[PATCH 10/15] block-backend: always wait for drain before starting operation
All I/O operations call blk_wait_while_drained() immediately after blk_inc_in_flight(), except for blk_abort_aio_request() where it does not hurt to add such a call. Merge the two functions into one, and add a note about a disturbing lack of thread-safety that will be fixed shortly. While at it, make the quiesce_counter check a loop. While the check does not have to be "perfect", i.e. it only matters that an endless stream of I/Os is stopped sooner or later, it is more logical to check the counter repeatedly until it is zero. Signed-off-by: Paolo Bonzini --- block/block-backend.c | 27 --- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/block/block-backend.c b/block/block-backend.c index fe42d53d655d..627d491d4155 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1270,18 +1270,6 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset, return 0; } -/* To be called between exactly one pair of blk_inc/dec_in_flight() */ -static void coroutine_fn blk_wait_while_drained(BlockBackend *blk) -{ -assert(blk->in_flight > 0); - -if (blk->quiesce_counter && !blk->disable_request_queuing) { -blk_dec_in_flight(blk); -qemu_co_queue_wait(&blk->queued_requests, NULL); -blk_inc_in_flight(blk); -} -} - /* To be called between exactly one pair of blk_inc/dec_in_flight() */ static int coroutine_fn blk_co_do_preadv_part(BlockBackend *blk, int64_t offset, int64_t bytes, @@ -1334,7 +1322,6 @@ int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, IO_OR_GS_CODE(); blk_inc_in_flight(blk); -blk_wait_while_drained(blk); ret = blk_co_do_preadv_part(blk, offset, bytes, qiov, 0, flags); blk_dec_in_flight(blk); @@ -1349,7 +1336,6 @@ int coroutine_fn blk_co_preadv_part(BlockBackend *blk, int64_t offset, IO_OR_GS_CODE(); blk_inc_in_flight(blk); -blk_wait_while_drained(blk); ret = blk_co_do_preadv_part(blk, offset, bytes, qiov, qiov_offset, flags); blk_dec_in_flight(blk); @@ -1401,7 +1387,6 @@ int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset, IO_OR_GS_CODE(); blk_inc_in_flight(blk); -blk_wait_while_drained(blk); ret = blk_co_do_pwritev_part(blk, offset, bytes, qiov, qiov_offset, flags); blk_dec_in_flight(blk); @@ -1466,6 +1451,14 @@ void blk_inc_in_flight(BlockBackend *blk) { IO_CODE(); qatomic_inc(&blk->in_flight); +if (!blk->disable_request_queuing) { +/* TODO: this is not thread-safe! */ +while (blk->quiesce_counter) { +qatomic_dec(&blk->in_flight); +qemu_co_queue_wait(&blk->queued_requests, NULL); +qatomic_inc(&blk->in_flight); +} +} } void blk_dec_in_flight(BlockBackend *blk) @@ -1546,7 +1539,6 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, Coroutine *co; blk_inc_in_flight(blk); -blk_wait_while_drained(blk); acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque); acb->rwco = (BlkRwCo) { .blk= blk, @@ -1685,7 +1677,6 @@ int coroutine_fn blk_co_ioctl(BlockBackend *blk, unsigned long int req, IO_OR_GS_CODE(); blk_inc_in_flight(blk); -blk_wait_while_drained(blk); ret = blk_co_do_ioctl(blk, req, buf); blk_dec_in_flight(blk); @@ -1749,7 +1740,6 @@ int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset, IO_OR_GS_CODE(); blk_inc_in_flight(blk); -blk_wait_while_drained(blk); ret = blk_co_do_pdiscard(blk, offset, bytes); blk_dec_in_flight(blk); @@ -1790,7 +1780,6 @@ int coroutine_fn blk_co_flush(BlockBackend *blk) IO_OR_GS_CODE(); blk_inc_in_flight(blk); -blk_wait_while_drained(blk); ret = blk_co_do_flush(blk); blk_dec_in_flight(blk); -- 2.38.1
[PATCH 02/15] Revert "block: Don't poll in bdrv_replace_child_noperm()"
This reverts commit a4e5c80a45b22359cf9c187f0df4f8544812c55c. Signed-off-by: Paolo Bonzini --- block.c | 103 +-- block/io.c | 2 +- include/block/block-io.h | 8 --- tests/unit/test-bdrv-drain.c | 10 4 files changed, 15 insertions(+), 108 deletions(-) diff --git a/block.c b/block.c index 87022f4cd971..2f2123f4a4e5 100644 --- a/block.c +++ b/block.c @@ -2367,20 +2367,6 @@ static void bdrv_replace_child_abort(void *opaque) GLOBAL_STATE_CODE(); /* old_bs reference is transparently moved from @s to @s->child */ -if (!s->child->bs) { -/* - * The parents were undrained when removing old_bs from the child. New - * requests can't have been made, though, because the child was empty. - * - * TODO Make bdrv_replace_child_noperm() transactionable to avoid - * undraining the parent in the first place. Once this is done, having - * new_bs drained when calling bdrv_replace_child_tran() is not a - * requirement any more. - */ -bdrv_parent_drained_begin_single(s->child, false); -assert(!bdrv_parent_drained_poll_single(s->child)); -} -assert(s->child->quiesced_parent); bdrv_replace_child_noperm(s->child, s->old_bs); bdrv_unref(new_bs); } @@ -2396,19 +2382,12 @@ static TransactionActionDrv bdrv_replace_child_drv = { * * Note: real unref of old_bs is done only on commit. * - * Both @child->bs and @new_bs (if non-NULL) must be drained. @new_bs must be - * kept drained until the transaction is completed. - * * The function doesn't update permissions, caller is responsible for this. */ static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs, Transaction *tran) { BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1); - -assert(child->quiesced_parent); -assert(!new_bs || new_bs->quiesce_counter); - *s = (BdrvReplaceChildState) { .child = child, .old_bs = child->bs, @@ -2831,14 +2810,6 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm) return permissions[qapi_perm]; } -/* - * Replaces the node that a BdrvChild points to without updating permissions. - * - * If @new_bs is non-NULL, the parent of @child must already be drained through - * @child. - * - * This function does not poll. - */ static void bdrv_replace_child_noperm(BdrvChild *child, BlockDriverState *new_bs) { @@ -2846,28 +2817,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child, int new_bs_quiesce_counter; assert(!child->frozen); - -/* - * If we want to change the BdrvChild to point to a drained node as its new - * child->bs, we need to make sure that its new parent is drained, too. In - * other words, either child->quiesce_parent must already be true or we must - * be able to set it and keep the parent's quiesce_counter consistent with - * that, but without polling or starting new requests (this function - * guarantees that it doesn't poll, and starting new requests would be - * against the invariants of drain sections). - * - * To keep things simple, we pick the first option (child->quiesce_parent - * must already be true). We also generalise the rule a bit to make it - * easier to verify in callers and more likely to be covered in test cases: - * The parent must be quiesced through this child even if new_bs isn't - * currently drained. - * - * The only exception is for callers that always pass new_bs == NULL. In - * this case, we obviously never need to consider the case of a drained - * new_bs, so we can keep the callers simpler by allowing them not to drain - * the parent. - */ -assert(!new_bs || child->quiesced_parent); assert(old_bs != new_bs); GLOBAL_STATE_CODE(); @@ -2875,6 +2824,15 @@ static void bdrv_replace_child_noperm(BdrvChild *child, assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); } +/* + * If the new child node is drained but the old one was not, flush + * all outstanding requests to the old child node. + */ +new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); +if (new_bs_quiesce_counter && !child->quiesced_parent) { +bdrv_parent_drained_begin_single(child, true); +} + if (old_bs) { if (child->klass->detach) { child->klass->detach(child); @@ -2894,9 +2852,11 @@ static void bdrv_replace_child_noperm(BdrvChild *child, } /* - * If the parent was drained through this BdrvChild previously, but new_bs - * is not drained, allow requests to come in only after the new node has - * been attached. + * If the old child node was drained but the new one is not, allow + * requests to come in only after the new node has been atta
[PATCH 00/12] More cleanups and fixes for drain
There are a few more lines of code that can be removed around draining code, but especially the logic can be simplified by removing unnecessary parameters. Due to the failure of the block-next branch, the first three patches drop patches 14+15 of Kevin's drain cleanup series, and then redo patch 15 in a slightly less satisfactory way that still enables the remaining cleanups. These reverts are not supposed to be applied; either the offending patches are dropped from the branch, or if the issue is fixed then my first three patches can go away. The next three are taken from Emanuele's old subtree drain attempt at removing the AioContext. The main one is the second, which is needed to avoid testcase failures, but I included all of them for simplicity. Patch 7 fixes another latent bug exposed by the later cleanups, and while looking for a fix I noticed a general lack of thread-safety in BlockBackend's drain code. There are some global properties that only need to be documented and enforced to be set only at creation time (patches 8/9), but also queued_requests is not protected by any mutex, which is fixed in patch 10. Finally, patches 11-15 are the actual simplification. Applies on top of block-next. Paolo Emanuele Giuseppe Esposito (3): test-bdrv-drain.c: remove test_detach_by_parent_cb() tests/unit/test-bdrv-drain.c: graph setup functions can't run in coroutines tests/qemu-iotests/030: test_stream_parallel should use auto_finalize=False Paolo Bonzini (12): Revert "block: Remove poll parameter from bdrv_parent_drained_begin_single()" Revert "block: Don't poll in bdrv_replace_child_noperm()" block: Pull polling out of bdrv_parent_drained_begin_single() block-backend: enter aio coroutine only after drain nbd: a BlockExport always has a BlockBackend block-backend: make global properties write-once block-backend: always wait for drain before starting operation block-backend: make queued_requests thread-safe block: limit bdrv_co_yield_to_drain to drain_begin block: second argument of bdrv_do_drained_end is always NULL block: second argument of bdrv_do_drained_begin and bdrv_drain_poll is always NULL block: only get out of coroutine context for polling block.c | 109 block/block-backend.c | 91 +--- block/commit.c| 4 +- block/export/export.c | 2 +- block/io.c| 136 +--- block/mirror.c| 4 +- block/parallels.c | 2 +- block/qcow.c | 2 +- block/qcow2.c | 2 +- block/qed.c | 2 +- block/stream.c| 4 +- block/vdi.c | 2 +- block/vhdx.c | 2 +- block/vmdk.c | 4 +- block/vpc.c | 2 +- include/block/block-io.h | 29 +- include/block/block_int-io.h | 21 include/sysemu/block-backend-io.h | 6 +- nbd/server.c | 15 ++- tests/qemu-iotests/030| 12 +-- tests/unit/test-bdrv-drain.c | 166 +++--- tests/unit/test-block-iothread.c | 2 +- 22 files changed, 258 insertions(+), 361 deletions(-) -- 2.38.1
[PATCH 11/15] block-backend: make queued_requests thread-safe
Protect quiesce_counter and queued_requests with a QemuMutex, so that they are protected from concurrent access in the main thread (for example blk_root_drained_end() reached from bdrv_drain_all()) and in the iothread (where any I/O operation will call blk_inc_in_flight()). Signed-off-by: Paolo Bonzini --- block/block-backend.c | 44 +++ 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/block/block-backend.c b/block/block-backend.c index 627d491d4155..fdf82f1f1599 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -23,6 +23,7 @@ #include "qapi/error.h" #include "qapi/qapi-events-block.h" #include "qemu/id.h" +#include "qemu/thread.h" #include "qemu/main-loop.h" #include "qemu/option.h" #include "trace.h" @@ -85,6 +86,8 @@ struct BlockBackend { NotifierList remove_bs_notifiers, insert_bs_notifiers; QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers; +/* Protected by quiesce_lock. */ +QemuMutex quiesce_lock; int quiesce_counter; CoQueue queued_requests; @@ -372,6 +375,7 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm) block_acct_init(&blk->stats); +qemu_mutex_init(&blk->quiesce_lock); qemu_co_queue_init(&blk->queued_requests); notifier_list_init(&blk->remove_bs_notifiers); notifier_list_init(&blk->insert_bs_notifiers); @@ -490,6 +494,7 @@ static void blk_delete(BlockBackend *blk) assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers)); assert(QLIST_EMPTY(&blk->aio_notifiers)); QTAILQ_REMOVE(&block_backends, blk, link); +qemu_mutex_destroy(&blk->quiesce_lock); drive_info_del(blk->legacy_dinfo); block_acct_cleanup(&blk->stats); g_free(blk); @@ -1451,11 +1456,25 @@ void blk_inc_in_flight(BlockBackend *blk) { IO_CODE(); qatomic_inc(&blk->in_flight); -if (!blk->disable_request_queuing) { -/* TODO: this is not thread-safe! */ + +/* + * Avoid a continuous stream of requests from AIO callbacks, which + * call a user-provided callback while blk->in_flight is elevated, + * if the BlockBackend is being quiesced. + * + * This initial test does not have to be perfect: a race might + * cause an extra I/O to be queued, but sooner or later a nonzero + * quiesce_counter will be observed. + */ +if (!blk->disable_request_queuing && qatomic_read(&blk->quiesce_counter)) { +/* + * ... on the other hand, it is important that the final check and +* the wait are done under the lock, so that no wakeups are missed. + */ +QEMU_LOCK_GUARD(&blk->quiesce_lock); while (blk->quiesce_counter) { qatomic_dec(&blk->in_flight); -qemu_co_queue_wait(&blk->queued_requests, NULL); +qemu_co_queue_wait(&blk->queued_requests, &blk->quiesce_lock); qatomic_inc(&blk->in_flight); } } @@ -2542,7 +2561,8 @@ static void blk_root_drained_begin(BdrvChild *child) BlockBackend *blk = child->opaque; ThrottleGroupMember *tgm = &blk->public.throttle_group_member; -if (++blk->quiesce_counter == 1) { +qatomic_set(&blk->quiesce_counter, blk->quiesce_counter + 1); +if (blk->quiesce_counter == 1) { if (blk->dev_ops && blk->dev_ops->drained_begin) { blk->dev_ops->drained_begin(blk->dev_opaque); } @@ -2560,6 +2580,7 @@ static bool blk_root_drained_poll(BdrvChild *child) { BlockBackend *blk = child->opaque; bool busy = false; + assert(blk->quiesce_counter); if (blk->dev_ops && blk->dev_ops->drained_poll) { @@ -2576,14 +2597,21 @@ static void blk_root_drained_end(BdrvChild *child) assert(blk->public.throttle_group_member.io_limits_disabled); qatomic_dec(&blk->public.throttle_group_member.io_limits_disabled); -if (--blk->quiesce_counter == 0) { +qemu_mutex_lock(&blk->quiesce_lock); +qatomic_set(&blk->quiesce_counter, blk->quiesce_counter - 1); +if (blk->quiesce_counter == 0) { +/* After this point, new I/O will not sleep on queued_requests. */ +qemu_mutex_unlock(&blk->quiesce_lock); + if (blk->dev_ops && blk->dev_ops->drained_end) { blk->dev_ops->drained_end(blk->dev_opaque); } -while (qemu_co_enter_next(&blk->queued_requests, NULL)) { -/* Resume all queued requests */ -} + +/* Now resume all queued requests */ +qemu_mutex_lock(&blk->quiesce_lock); +qemu_co_enter_all(&blk->queued_requests, &blk->quiesce_lock); } +qemu_mutex_unlock(&blk->quiesce_lock); } bool blk_register_buf(BlockBackend *blk, void *host, size_t size, Error **errp) -- 2.38.1
[PATCH 14/15] block: second argument of bdrv_do_drained_begin and bdrv_drain_poll is always NULL
Remove it from the functions, from callers/callees such as bdrv_do_drained_begin_quiesce() and bdrv_drain_poll(), and from bdrv_co_yield_to_drain() and BdrvCoDrainData. Signed-off-by: Paolo Bonzini --- block.c | 4 ++-- block/io.c | 49 include/block/block-io.h | 7 +++--- 3 files changed, 24 insertions(+), 36 deletions(-) diff --git a/block.c b/block.c index c542a0a33358..676bbe0529b0 100644 --- a/block.c +++ b/block.c @@ -1186,13 +1186,13 @@ static char *bdrv_child_get_parent_desc(BdrvChild *c) static void bdrv_child_cb_drained_begin(BdrvChild *child) { BlockDriverState *bs = child->opaque; -bdrv_do_drained_begin_quiesce(bs, NULL); +bdrv_do_drained_begin_quiesce(bs); } static bool bdrv_child_cb_drained_poll(BdrvChild *child) { BlockDriverState *bs = child->opaque; -return bdrv_drain_poll(bs, NULL, false); +return bdrv_drain_poll(bs, false); } static void bdrv_child_cb_drained_end(BdrvChild *child) diff --git a/block/io.c b/block/io.c index c2962adf8d2d..a75f42ee13cb 100644 --- a/block/io.c +++ b/block/io.c @@ -45,14 +45,11 @@ static void bdrv_parent_cb_resize(BlockDriverState *bs); static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes, BdrvRequestFlags flags); -static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore) +static void bdrv_parent_drained_begin(BlockDriverState *bs) { BdrvChild *c, *next; QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) { -if (c == ignore) { -continue; -} bdrv_parent_drained_begin_single(c); } } @@ -86,14 +83,13 @@ bool bdrv_parent_drained_poll_single(BdrvChild *c) return false; } -static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore, - bool ignore_bds_parents) +static bool bdrv_parent_drained_poll(BlockDriverState *bs, bool ignore_bds_parents) { BdrvChild *c, *next; bool busy = false; QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) { -if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) { +if (ignore_bds_parents && c->klass->parent_is_bds) { continue; } busy |= bdrv_parent_drained_poll_single(c); @@ -231,16 +227,14 @@ typedef struct { BlockDriverState *bs; bool done; bool poll; -BdrvChild *parent; } BdrvCoDrainData; /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */ -bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent, - bool ignore_bds_parents) +bool bdrv_drain_poll(BlockDriverState *bs, bool ignore_bds_parents) { IO_OR_GS_CODE(); -if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) { +if (bdrv_parent_drained_poll(bs, ignore_bds_parents)) { return true; } @@ -251,14 +245,12 @@ bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent, return false; } -static bool bdrv_drain_poll_top_level(BlockDriverState *bs, - BdrvChild *ignore_parent) +static bool bdrv_drain_poll_top_level(BlockDriverState *bs) { -return bdrv_drain_poll(bs, ignore_parent, false); +return bdrv_drain_poll(bs, false); } -static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, - bool poll); +static void bdrv_do_drained_begin(BlockDriverState *bs, bool poll); static void bdrv_co_drain_bh_cb(void *opaque) { @@ -270,7 +262,7 @@ static void bdrv_co_drain_bh_cb(void *opaque) AioContext *ctx = bdrv_get_aio_context(bs); aio_context_acquire(ctx); bdrv_dec_in_flight(bs); -bdrv_do_drained_begin(bs, data->parent, data->poll); +bdrv_do_drained_begin(bs, data->poll); aio_context_release(ctx); } else { bdrv_drain_all_begin(); @@ -281,7 +273,6 @@ static void bdrv_co_drain_bh_cb(void *opaque) } static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, -BdrvChild *parent, bool poll) { BdrvCoDrainData data; @@ -297,7 +288,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, .co = self, .bs = bs, .done = false, -.parent = parent, .poll = poll, }; @@ -329,20 +319,19 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, } } -static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, - bool poll) +static void bdrv_do_drained_begin(BlockDriverState *bs, bool poll) { IO_OR_GS_CODE(); if (qemu_in_coroutine()) { -bdrv_co_yield_to_drain(bs, parent, poll); +bdrv_co_yield_to_drain(bs, poll); return; } /*
[PATCH 04/15] test-bdrv-drain.c: remove test_detach_by_parent_cb()
From: Emanuele Giuseppe Esposito This test uses a callback of an I/O function (blk_aio_preadv) to modify the graph, using bdrv_attach_child. This is simply not allowed anymore. I/O cannot change the graph. Before "block/io.c: make bdrv_do_drained_begin_quiesce static and introduce bdrv_drained_begin_no_poll", the test would simply be at risk of failure, because if bdrv_replace_child_noperm() (called to modify the graph) would call a drain, then one callback of .drained_begin() is bdrv_do_drained_begin_quiesce, that specifically asserts that we are not in a coroutine. Now that we fixed the behavior, the drain will invoke a bh in the main loop, so we don't have such problem. However, this test is still illegal and fails because we forbid graph changes from I/O paths. Once we add the required subtree_drains to protect bdrv_replace_child_noperm(), the key problem in this test is in: acb = blk_aio_preadv(blk, 0, &qiov, 0, detach_by_parent_aio_cb, NULL); /* Drain and check the expected result */ bdrv_subtree_drained_begin(parent_b); because the detach_by_parent_aio_cb calls detach_indirect_bh(), that modifies the graph and is invoked during bdrv_subtree_drained_begin(). The call stack is the following: 1. blk_aio_preadv() creates a coroutine, increments in_flight counter and enters the coroutine running blk_aio_read_entry() 2. blk_aio_read_entry() performs the read and then schedules a bh to complete (blk_aio_complete) 3. at this point, subtree_drained_begin() kicks in and waits for all in_flight requests, polling 4. polling allows the bh to be scheduled, so blk_aio_complete runs 5. blk_aio_complete *first* invokes the callback (detach_by_parent_aio_cb) and then decrements the in_flight counter 6. Here we have the problem: detach_by_parent_aio_cb modifies the graph, so both bdrv_unref_child() and bdrv_attach_child() will have subtree_drains inside. And this causes a deadlock, because the nested drain will wait for in_flight counter to go to zero, which is only happening once the drain itself finishes. Different story is test_detach_by_driver_cb(): in this case, detach_by_parent_aio_cb() does not call detach_indirect_bh(), but it is instead called as a bh running in the main loop by detach_by_driver_cb_drained_begin(), the callback for .drained_begin(). This test was added in 231281ab42 and part of the series "Drain fixes and cleanups, part 3" https://lists.nongnu.org/archive/html/qemu-block/2018-05/msg01132.html but as explained above I believe that it is not valid anymore, and can be discarded. Signed-off-by: Emanuele Giuseppe Esposito Reviewed-by: Stefan Hajnoczi Message-Id: <20220314131854.2202651-8-eespo...@redhat.com> Signed-off-by: Paolo Bonzini --- tests/unit/test-bdrv-drain.c | 42 +--- 1 file changed, 10 insertions(+), 32 deletions(-) diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c index 172bc6debc23..3be214f30186 100644 --- a/tests/unit/test-bdrv-drain.c +++ b/tests/unit/test-bdrv-drain.c @@ -1106,7 +1106,6 @@ struct detach_by_parent_data { BdrvChild *child_b; BlockDriverState *c; BdrvChild *child_c; -bool by_parent_cb; }; static struct detach_by_parent_data detach_by_parent_data; @@ -1124,12 +1123,7 @@ static void detach_indirect_bh(void *opaque) static void detach_by_parent_aio_cb(void *opaque, int ret) { -struct detach_by_parent_data *data = &detach_by_parent_data; - g_assert_cmpint(ret, ==, 0); -if (data->by_parent_cb) { -detach_indirect_bh(data); -} } static void detach_by_driver_cb_drained_begin(BdrvChild *child) @@ -1148,33 +1142,26 @@ static BdrvChildClass detach_by_driver_cb_class; *\ / \ * A B C * - * by_parent_cb == true: Test that parent callbacks don't poll - * - * PA has a pending write request whose callback changes the child nodes of - * PB: It removes B and adds C instead. The subtree of PB is drained, which - * will indirectly drain the write request, too. - * - * by_parent_cb == false: Test that bdrv_drain_invoke() doesn't poll + * Test that bdrv_drain_invoke() doesn't poll * * PA's BdrvChildClass has a .drained_begin callback that schedules a BH * that does the same graph change. If bdrv_drain_invoke() calls it, the * state is messed up, but if it is only polled in the single * BDRV_POLL_WHILE() at the end of the drain, this should work fine. */ -static void test_detach_indirect(bool by_parent_cb) +static void test_detach_indirect(void) { BlockBackend *blk; BlockDriverState *parent_a, *parent_b, *a, *b, *c; BdrvChild *child_a, *child_b; BlockAIOCB *acb; +BDRVTestState *s; QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, 0); -if (!by_parent_cb) { -detach_by_driver_cb_class = child_of_bds; -detach_by_driver_cb_class.drained_begin = -detach_by_driver_cb_drained_begin; -} +detach_by_driver_cb_class =
[PATCH 01/15] Revert "block: Remove poll parameter from bdrv_parent_drained_begin_single()"
This reverts commit dcc5d4bc2abed4268bf31908193c4369e4c9d005. Signed-off-by: Paolo Bonzini --- block.c | 4 ++-- block/io.c | 8 ++-- include/block/block-io.h | 5 +++-- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/block.c b/block.c index 6191ac1f440c..87022f4cd971 100644 --- a/block.c +++ b/block.c @@ -2377,7 +2377,7 @@ static void bdrv_replace_child_abort(void *opaque) * new_bs drained when calling bdrv_replace_child_tran() is not a * requirement any more. */ -bdrv_parent_drained_begin_single(s->child); +bdrv_parent_drained_begin_single(s->child, false); assert(!bdrv_parent_drained_poll_single(s->child)); } assert(s->child->quiesced_parent); @@ -3050,7 +3050,7 @@ static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs, * a problem, we already did this), but it will still poll until the parent * is fully quiesced, so it will not be negatively affected either. */ -bdrv_parent_drained_begin_single(new_child); +bdrv_parent_drained_begin_single(new_child, false); bdrv_replace_child_noperm(new_child, child_bs); BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1); diff --git a/block/io.c b/block/io.c index fbd9..aee6e70c1496 100644 --- a/block/io.c +++ b/block/io.c @@ -53,7 +53,7 @@ static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore) if (c == ignore) { continue; } -bdrv_parent_drained_begin_single(c); +bdrv_parent_drained_begin_single(c, false); } } @@ -105,8 +105,9 @@ static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore, return busy; } -void bdrv_parent_drained_begin_single(BdrvChild *c) +void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll) { +AioContext *ctx = bdrv_child_get_parent_aio_context(c); IO_OR_GS_CODE(); assert(!c->quiesced_parent); @@ -115,6 +116,9 @@ void bdrv_parent_drained_begin_single(BdrvChild *c) if (c->klass->drained_begin) { c->klass->drained_begin(c); } +if (poll) { +AIO_WAIT_WHILE(ctx, bdrv_parent_drained_poll_single(c)); +} } static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src) diff --git a/include/block/block-io.h b/include/block/block-io.h index 52869ea08eb5..75d043204355 100644 --- a/include/block/block-io.h +++ b/include/block/block-io.h @@ -308,9 +308,10 @@ bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); /** * bdrv_parent_drained_begin_single: * - * Begin a quiesced section for the parent of @c. + * Begin a quiesced section for the parent of @c. If @poll is true, wait for + * any pending activity to cease. */ -void bdrv_parent_drained_begin_single(BdrvChild *c); +void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll); /** * bdrv_parent_drained_poll_single: -- 2.38.1
[PATCH 08/15] nbd: a BlockExport always has a BlockBackend
exp->common.blk cannot be NULL, nbd_export_delete() is only called from blk_exp_unref() and in turn that can only happen after blk_exp_add() has asserted exp->blk != NULL. Signed-off-by: Paolo Bonzini --- nbd/server.c | 14 ++ 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/nbd/server.c b/nbd/server.c index c53c39560e50..462ddb0e4adb 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -1848,15 +1848,13 @@ static void nbd_export_delete(BlockExport *blk_exp) g_free(exp->description); exp->description = NULL; -if (exp->common.blk) { -if (exp->eject_notifier_blk) { -notifier_remove(&exp->eject_notifier); -blk_unref(exp->eject_notifier_blk); -} -blk_remove_aio_context_notifier(exp->common.blk, blk_aio_attached, -blk_aio_detach, exp); -blk_set_disable_request_queuing(exp->common.blk, false); +if (exp->eject_notifier_blk) { +notifier_remove(&exp->eject_notifier); +blk_unref(exp->eject_notifier_blk); } +blk_remove_aio_context_notifier(exp->common.blk, blk_aio_attached, +blk_aio_detach, exp); +blk_set_disable_request_queuing(exp->common.blk, false); for (i = 0; i < exp->nr_export_bitmaps; i++) { bdrv_dirty_bitmap_set_busy(exp->export_bitmaps[i], false); -- 2.38.1
[Bug 1523246] Re: Virtio-blk does not support TRIM
I've checked the size of disk file with ls -alh and du --apparent-size commands I downloaded the 1G size file 5 times to make sure about sparse blocks issue, but the problem still exists -- You received this bug notification because you are a member of qemu- devel-ml, which is subscribed to QEMU. https://bugs.launchpad.net/bugs/1523246 Title: Virtio-blk does not support TRIM Status in QEMU: Fix Released Bug description: When model=virtio is used, TRIM is not supported. # mount -o discard /dev/vda4 /mnt # mount | tail -1 /dev/vda4 on /mnt type fuseblk (rw,nosuid,nodev,relatime,user_id=0,group_id=0,allow_other,blksize=4096) # fstrim /mnt/ fstrim: /mnt/: the discard operation is not supported Booting without model=virtio allows using TRIM (in Windows as well). Full QEMU line: qemu-system-x86_64 -enable-kvm -cpu host -bios /usr/share/ovmf/ovmf_x64.bin -smp 2 -m 7G -vga qxl -usbdevice tablet -net nic,model=virtio -net user -drive discard=unmap,detect- zeroes=unmap,cache=none,file=vms/win10.hd.img.vmdk,format=vmdk,if=virtio To manage notifications about this bug go to: https://bugs.launchpad.net/qemu/+bug/1523246/+subscriptions