[PULL v2 34/36] ui/dbus: add chardev backend & interface
From: Marc-André Lureau Add a new chardev backend which allows D-Bus client to handle the chardev stream & events. Signed-off-by: Marc-André Lureau Acked-by: Gerd Hoffmann --- qapi/char.json| 27 include/chardev/char-socket.h | 2 + include/qemu/dbus.h | 5 + ui/dbus.h | 44 + ui/dbus-chardev.c | 296 ++ ui/dbus.c | 26 +++ ui/dbus-display1.xml | 75 + ui/meson.build| 1 + 8 files changed, 476 insertions(+) create mode 100644 ui/dbus-chardev.c diff --git a/qapi/char.json b/qapi/char.json index f5133a5eeb37..7b421515751b 100644 --- a/qapi/char.json +++ b/qapi/char.json @@ -358,6 +358,20 @@ 'base': 'ChardevCommon', 'if': 'CONFIG_SPICE' } +## +# @ChardevDBus: +# +# Configuration info for DBus chardevs. +# +# @name: name of the channel (following docs/spice-port-fqdn.txt) +# +# Since: 7.0 +## +{ 'struct': 'ChardevDBus', + 'data': { 'name': 'str' }, + 'base': 'ChardevCommon', + 'if': 'CONFIG_DBUS_DISPLAY' } + ## # @ChardevVC: # @@ -422,6 +436,7 @@ # @spicevmc: Since 1.5 # @spiceport: Since 1.5 # @qemu-vdagent: Since 6.1 +# @dbus: Since 7.0 # @vc: v1.5 # @ringbuf: Since 1.6 # @memory: Since 1.5 @@ -447,6 +462,7 @@ { 'name': 'spicevmc', 'if': 'CONFIG_SPICE' }, { 'name': 'spiceport', 'if': 'CONFIG_SPICE' }, { 'name': 'qemu-vdagent', 'if': 'CONFIG_SPICE_PROTOCOL' }, +{ 'name': 'dbus', 'if': 'CONFIG_DBUS_DISPLAY' }, 'vc', 'ringbuf', # next one is just for compatibility @@ -535,6 +551,15 @@ 'data': { 'data': 'ChardevQemuVDAgent' }, 'if': 'CONFIG_SPICE_PROTOCOL' } +## +# @ChardevDBusWrapper: +# +# Since: 7.0 +## +{ 'struct': 'ChardevDBusWrapper', + 'data': { 'data': 'ChardevDBus' }, + 'if': 'CONFIG_DBUS_DISPLAY' } + ## # @ChardevVCWrapper: # @@ -582,6 +607,8 @@ 'if': 'CONFIG_SPICE' }, 'qemu-vdagent': { 'type': 'ChardevQemuVDAgentWrapper', 'if': 'CONFIG_SPICE_PROTOCOL' }, +'dbus': { 'type': 'ChardevDBusWrapper', + 'if': 'CONFIG_DBUS_DISPLAY' }, 'vc': 'ChardevVCWrapper', 'ringbuf': 'ChardevRingbufWrapper', # next one is just for compatibility diff --git a/include/chardev/char-socket.h b/include/chardev/char-socket.h index 1a9274f2e3ac..6b6e2ceba1d7 100644 --- a/include/chardev/char-socket.h +++ b/include/chardev/char-socket.h @@ -43,6 +43,8 @@ typedef enum { TCP_CHARDEV_STATE_CONNECTED, } TCPChardevState; +typedef ChardevClass SocketChardevClass; + struct SocketChardev { Chardev parent; QIOChannel *ioc; /* Client I/O channel */ diff --git a/include/qemu/dbus.h b/include/qemu/dbus.h index c0cbb1ca44d3..08f00dfd5342 100644 --- a/include/qemu/dbus.h +++ b/include/qemu/dbus.h @@ -12,6 +12,11 @@ #include +#include "qom/object.h" +#include "chardev/char.h" +#include "qemu/notify.h" +#include "qemu/typedefs.h" + /* glib/gio 2.68 */ #define DBUS_METHOD_INVOCATION_HANDLED TRUE #define DBUS_METHOD_INVOCATION_UNHANDLED FALSE diff --git a/ui/dbus.h b/ui/dbus.h index 3e89eafcab6e..64c77cab4441 100644 --- a/ui/dbus.h +++ b/ui/dbus.h @@ -24,6 +24,7 @@ #ifndef UI_DBUS_H_ #define UI_DBUS_H_ +#include "chardev/char-socket.h" #include "qemu/dbus.h" #include "qom/object.h" #include "ui/console.h" @@ -56,11 +57,15 @@ struct DBusDisplay { QemuDBusDisplay1Clipboard *clipboard; QemuDBusDisplay1Clipboard *clipboard_proxy; DBusClipboardRequest clipboard_request[QEMU_CLIPBOARD_SELECTION__COUNT]; + +Notifier notifier; }; #define TYPE_DBUS_DISPLAY "dbus-display" OBJECT_DECLARE_SIMPLE_TYPE(DBusDisplay, DBUS_DISPLAY) +void dbus_display_notifier_add(Notifier *notifier); + #define DBUS_DISPLAY_TYPE_CONSOLE dbus_display_console_get_type() G_DECLARE_FINAL_TYPE(DBusDisplayConsole, dbus_display_console, @@ -95,6 +100,45 @@ dbus_display_listener_get_bus_name(DBusDisplayListener *ddl); extern const DisplayChangeListenerOps dbus_gl_dcl_ops; extern const DisplayChangeListenerOps dbus_dcl_ops; +#define TYPE_CHARDEV_DBUS "chardev-dbus" + +typedef struct DBusChardevClass { +SocketChardevClass parent_class; + +void (*parent_chr_be_event)(Chardev *s, QEMUChrEvent event); +} DBusChardevClass; + +DECLARE_CLASS_CHECKERS(DBusChardevClass, DBUS_CHARDEV, + TYPE_CHARDEV_DBUS) + +typedef struct DBusChardev { +SocketChardev parent; + +bool exported; +QemuDBusDisplay1Chardev *iface; +} DBusChardev; + +DECLARE_INSTANCE_CHECKER(DBusChardev, DBUS_CHARDEV, TYPE_CHARDEV_DBUS) + +#define CHARDEV_IS_DBUS(chr) \ +object_dynamic_cast(OBJECT(chr), TYPE_CHARDEV_DBUS) + +typedef enum { +DBUS_DISPLAY_CHARDEV_OPEN, +DBUS_DISPLAY_CHARDEV_CLOSE, +} DBusDisplayEventType; + +typedef struct DBusDisplayEvent { +DBusDispl
Re: [PATCH 1/2] ui/vnc: refactor arrays of addresses to SocketAddressList
Hi On Mon, Dec 20, 2021 at 10:21 PM Vladimir Sementsov-Ogievskiy < vsement...@virtuozzo.com> wrote: > Let's use SocketAddressList instead of dynamic arrays. > Benefits: > - Automatic cleanup: don't need specific freeing function and drop >some gotos. > - Less indirection: no triple asterix anymore! > - Prepare for the following commit, which will reuse new interface of >vnc_display_listen(). > > Signed-off-by: Vladimir Sementsov-Ogievskiy > Nice Reviewed-by: Marc-André Lureau --- > ui/vnc.c | 129 ++- > 1 file changed, 51 insertions(+), 78 deletions(-) > > diff --git a/ui/vnc.c b/ui/vnc.c > index af02522e84..c9e26c70df 100644 > --- a/ui/vnc.c > +++ b/ui/vnc.c > @@ -3812,30 +3812,19 @@ static int vnc_display_get_address(const char > *addrstr, > return ret; > } > > -static void vnc_free_addresses(SocketAddress ***retsaddr, > - size_t *retnsaddr) > -{ > -size_t i; > - > -for (i = 0; i < *retnsaddr; i++) { > -qapi_free_SocketAddress((*retsaddr)[i]); > -} > -g_free(*retsaddr); > - > -*retsaddr = NULL; > -*retnsaddr = 0; > -} > - > static int vnc_display_get_addresses(QemuOpts *opts, > bool reverse, > - SocketAddress ***retsaddr, > - size_t *retnsaddr, > - SocketAddress ***retwsaddr, > - size_t *retnwsaddr, > + SocketAddressList **saddr_list_ret, > + SocketAddressList **wsaddr_list_ret, > Error **errp) > { > SocketAddress *saddr = NULL; > SocketAddress *wsaddr = NULL; > +g_autoptr(SocketAddressList) saddr_list = NULL; > +SocketAddressList **saddr_tail = &saddr_list; > +SocketAddress *single_saddr = NULL; > +g_autoptr(SocketAddressList) wsaddr_list = NULL; > +SocketAddressList **wsaddr_tail = &wsaddr_list; > QemuOptsIter addriter; > const char *addr; > int to = qemu_opt_get_number(opts, "to", 0); > @@ -3844,23 +3833,16 @@ static int vnc_display_get_addresses(QemuOpts > *opts, > bool ipv4 = qemu_opt_get_bool(opts, "ipv4", false); > bool ipv6 = qemu_opt_get_bool(opts, "ipv6", false); > int displaynum = -1; > -int ret = -1; > - > -*retsaddr = NULL; > -*retnsaddr = 0; > -*retwsaddr = NULL; > -*retnwsaddr = 0; > > addr = qemu_opt_get(opts, "vnc"); > if (addr == NULL || g_str_equal(addr, "none")) { > -ret = 0; > -goto cleanup; > +return 0; > } > if (qemu_opt_get(opts, "websocket") && > !qcrypto_hash_supports(QCRYPTO_HASH_ALG_SHA1)) { > error_setg(errp, > "SHA1 hash support is required for websockets"); > -goto cleanup; > +return -1; > } > > qemu_opt_iter_init(&addriter, opts, "vnc"); > @@ -3871,7 +3853,7 @@ static int vnc_display_get_addresses(QemuOpts *opts, > ipv4, ipv6, > &saddr, errp); > if (rv < 0) { > -goto cleanup; > +return -1; > } > /* Historical compat - first listen address can be used > * to set the default websocket port > @@ -3879,13 +3861,16 @@ static int vnc_display_get_addresses(QemuOpts > *opts, > if (displaynum == -1) { > displaynum = rv; > } > -*retsaddr = g_renew(SocketAddress *, *retsaddr, *retnsaddr + 1); > -(*retsaddr)[(*retnsaddr)++] = saddr; > +QAPI_LIST_APPEND(saddr_tail, saddr); > } > > -/* If we had multiple primary displays, we don't do defaults > - * for websocket, and require explicit config instead. */ > -if (*retnsaddr > 1) { > +if (saddr_list && !saddr_list->next) { > +single_saddr = saddr_list->value; > +} else { > +/* > + * If we had multiple primary displays, we don't do defaults > + * for websocket, and require explicit config instead. > + */ > displaynum = -1; > } > > @@ -3895,57 +3880,50 @@ static int vnc_display_get_addresses(QemuOpts > *opts, > has_ipv4, has_ipv6, > ipv4, ipv6, > &wsaddr, errp) < 0) { > -goto cleanup; > +return -1; > } > > /* Historical compat - if only a single listen address was > * provided, then this is used to set the default listen > * address for websocket too > */ > -if (*retnsaddr == 1 && > -(*retsaddr)[0]->type == SOCKET_ADDRESS_TYPE_INET && > +if (single_saddr && > +single_saddr->type == SOCKET_ADDRESS_TYPE_INET && > wsaddr->type == SOCKET_ADDR
Re: [PATCH v2 1/9] hw/intc: sifive_plic: Add a reset function
On Thu, Dec 16, 2021 at 12:54 PM Alistair Francis wrote: > > From: Alistair Francis > > Signed-off-by: Alistair Francis > --- > hw/intc/sifive_plic.c | 18 ++ > 1 file changed, 18 insertions(+) > Reviewed-by: Bin Meng
[PATCH] target/riscv/pmp: fix no pmp illegal intrs
From: Nikita Shubin As per the privilege specification, any access from S/U mode should fail if no pmp region is configured and pmp is present, othwerwise access should succeed. Fixes: d102f19a208 (target/riscv/pmp: Raise exception if no PMP entry is configured) Resolves: https://gitlab.com/qemu-project/qemu/-/issues/585 Signed-off-by: Nikita Shubin Reviewed-by: Alistair Francis --- target/riscv/op_helper.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/target/riscv/op_helper.c b/target/riscv/op_helper.c index ee7c24efe7..58d992e98a 100644 --- a/target/riscv/op_helper.c +++ b/target/riscv/op_helper.c @@ -146,7 +146,8 @@ target_ulong helper_mret(CPURISCVState *env, target_ulong cpu_pc_deb) uint64_t mstatus = env->mstatus; target_ulong prev_priv = get_field(mstatus, MSTATUS_MPP); -if (!pmp_get_num_rules(env) && (prev_priv != PRV_M)) { +if (riscv_feature(env, RISCV_FEATURE_PMP) && +!pmp_get_num_rules(env) && (prev_priv != PRV_M)) { riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); } -- 2.31.1
Re: [PATCH 4/4] ui: Fix gtk/gl when the scaled virtual console does not fit the window
On 12/21/21 02:48, Marc-André Lureau wrote: Hi On Sun, Dec 19, 2021 at 6:32 AM Alexander Orzechowski wrote: gtk/gl was incorrectly always rendering as if the 'Zoom to Fit' was always checked even if it wasn't. This is now using logic closer to what is being used for the existing cairo code paths. Signed-off-by: Alexander Orzechowski This doesn't work as expected, the display is not being centered correctly. --- ui/gtk-gl-area.c | 34 +- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/ui/gtk-gl-area.c b/ui/gtk-gl-area.c index 01e4e74ee3..ea72f1817b 100644 --- a/ui/gtk-gl-area.c +++ b/ui/gtk-gl-area.c @@ -41,16 +41,40 @@ void gd_gl_area_draw(VirtualConsole *vc) #ifdef CONFIG_GBM QemuDmaBuf *dmabuf = vc->gfx.guest_fb.dmabuf; #endif + GtkDisplayState *s = vc->s; int ww, wh, ws, y1, y2; + int mx, my; + int fbh, fbw; if (!vc->gfx.gls) { return; } gtk_gl_area_make_current(GTK_GL_AREA(vc->gfx.drawing_area)); + + fbw = surface_width(vc->gfx.ds); + fbh = surface_height(vc->gfx.ds); + ws = gdk_window_get_scale_factor(gtk_widget_get_window(vc->gfx.drawing_area)); - ww = gtk_widget_get_allocated_width(vc->gfx.drawing_area) * ws; - wh = gtk_widget_get_allocated_height(vc->gfx.drawing_area) * ws; + ww = gtk_widget_get_allocated_width(vc->gfx.drawing_area); + wh = gtk_widget_get_allocated_height(vc->gfx.drawing_area); + + if (s->full_screen) { + vc->gfx.scale_x = (double)ww / fbw; + vc->gfx.scale_y = (double)wh / fbh; + } else if (s->free_scale) { + double sx, sy; + + sx = (double)ww / fbw; + sy = (double)wh / fbh; + + vc->gfx.scale_x = vc->gfx.scale_y = MIN(sx, sy); + } + + fbw *= vc->gfx.scale_x * ws; + fbh *= vc->gfx.scale_y * ws; + mx = (ww * ws - fbw) / 2; + my = (wh * ws - fbh) / 2; if (vc->gfx.scanout_mode) { if (!vc->gfx.guest_fb.framebuffer) { @@ -70,11 +94,11 @@ void gd_gl_area_draw(VirtualConsole *vc) glBindFramebuffer(GL_READ_FRAMEBUFFER, vc->gfx.guest_fb.framebuffer); /* GtkGLArea sets GL_DRAW_FRAMEBUFFER for us */ - glViewport(0, 0, ww, wh); + glViewport(mx, my, fbw, fbh); y1 = vc->gfx.y0_top ? 0 : vc->gfx.h; y2 = vc->gfx.y0_top ? vc->gfx.h : 0; glBlitFramebuffer(0, y1, vc->gfx.w, y2, - 0, 0, ww, wh, + mx, my, fbw, fbh, GL_COLOR_BUFFER_BIT, GL_NEAREST); #ifdef CONFIG_GBM if (dmabuf) { @@ -98,7 +122,7 @@ void gd_gl_area_draw(VirtualConsole *vc) } gtk_gl_area_make_current(GTK_GL_AREA(vc->gfx.drawing_area)); - surface_gl_setup_viewport(vc->gfx.gls, vc->gfx.ds, ww, wh); + glViewport(mx, my, fbw, fbh); surface_gl_render_texture(vc->gfx.gls, vc->gfx.ds); } -- 2.34.1 -- Marc-André Lureau It seems glBlitFramebuffer takes two coordinates instead of a width and height. I have corrected this locally and I will have v2 up tomorrow. Thanks for the thorough testing! -- Alexander Orzechowski
Re: [PATCH] tests/qtest: Make the filter tests independent from a specific NIC
On 21/12/2021 07.38, Zhang, Chen wrote: -Original Message- From: Qemu-devel On Behalf Of Thomas Huth Sent: Monday, December 20, 2021 6:30 PM To: qemu-devel@nongnu.org; Laurent Vivier Cc: Paolo Bonzini ; Yang Hongyang ; Zhang Chen Subject: [PATCH] tests/qtest: Make the filter tests independent from a specific NIC These filter tests need a NIC, no matter which one, so they use a common NIC by default. However, these common NIC models might not always have been compiled into the QEMU target binary, so assuming that a certain NIC is available is a bad idea. Since the exact type of NIC does not really matter for these tests, let's switch to "-nic" instead of "-netdev" so that QEMU can simply pick a default NIC for us. This way we can now run the tests on other targets that have a default machine with an on-board/default NIC, too. Oh, It's my and Hongyang's abandoned mailbox. Sorry, I only looked at the top of the *.c files and copied the e-mail address from there. Looks good to me. Thanks for the review! By the way, should I add the test/qtest/test-filter* to the MAINTAINER file? That might be helpful indeed to get you CC:-ed correctly next time. Thomas
Re: [PATCH 2/2] qapi/ui: introduce change-vnc-listen
Hi On Mon, Dec 20, 2021 at 10:24 PM Vladimir Sementsov-Ogievskiy < vsement...@virtuozzo.com> wrote: > Add command that can change addresses where VNC server listens for new > connections. Prior to 6.0 this functionality was available through > 'change' qmp command which was deleted. > > Signed-off-by: Vladimir Sementsov-Ogievskiy > Looks good to me, Reviewed-by: Marc-André Lureau Could you write an avocado test for it? (tests/avocado/vnc.py) --- > docs/about/removed-features.rst | 3 ++- > qapi/ui.json| 12 > ui/vnc.c| 26 ++ > 3 files changed, 40 insertions(+), 1 deletion(-) > > diff --git a/docs/about/removed-features.rst > b/docs/about/removed-features.rst > index d42c3341de..20e6901a82 100644 > --- a/docs/about/removed-features.rst > +++ b/docs/about/removed-features.rst > @@ -348,7 +348,8 @@ documentation of ``query-hotpluggable-cpus`` for > additional details. > ``change`` (removed in 6.0) > ''' > > -Use ``blockdev-change-medium`` or ``change-vnc-password`` instead. > +Use ``blockdev-change-medium`` or ``change-vnc-password`` or > +``change-vnc-listen`` instead. > > ``query-events`` (removed in 6.0) > ' > diff --git a/qapi/ui.json b/qapi/ui.json > index d7567ac866..14e6fe0b4c 100644 > --- a/qapi/ui.json > +++ b/qapi/ui.json > @@ -1304,3 +1304,15 @@ > { 'command': 'display-reload', >'data': 'DisplayReloadOptions', >'boxed' : true } > + > +## > +# @change-vnc-listen: > +# > +# Change set of addresses to listen for connections. > +# > +# Since: 7.0 > +# > +## > +{ 'command': 'change-vnc-listen', > + 'data': { 'id': 'str', 'addresses': ['SocketAddress'], > +'*websockets': ['SocketAddress'] } } > diff --git a/ui/vnc.c b/ui/vnc.c > index c9e26c70df..69bbf3b6f6 100644 > --- a/ui/vnc.c > +++ b/ui/vnc.c > @@ -4212,6 +4212,32 @@ fail: > vnc_display_close(vd); > } > > +void qmp_change_vnc_listen(const char *id, SocketAddressList *addresses, > + bool has_websockets, SocketAddressList > *websockets, > + Error **errp) > +{ > +VncDisplay *vd = vnc_display_find(id); > + > +if (!vd) { > +error_setg(errp, "VNC display '%s' not active", id); > +return; > +} > + > +if (vd->listener) { > +qio_net_listener_disconnect(vd->listener); > +object_unref(OBJECT(vd->listener)); > +} > +vd->listener = NULL; > + > +if (vd->wslistener) { > +qio_net_listener_disconnect(vd->wslistener); > +object_unref(OBJECT(vd->wslistener)); > +} > +vd->wslistener = NULL; > + > +vnc_display_listen(vd, addresses, websockets, errp); > +} > + > void vnc_display_add_client(const char *id, int csock, bool skipauth) > { > VncDisplay *vd = vnc_display_find(id); > -- > 2.31.1 > > > -- Marc-André Lureau
[PATCH] MAINTAINERS: Update COLO Proxy section
Signed-off-by: Zhang Chen --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 1de6ce6e44..5479b9376e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2983,6 +2983,7 @@ F: docs/colo-proxy.txt F: net/colo* F: net/filter-rewriter.c F: net/filter-mirror.c +F: tests/qtest/test-filter* Record/replay M: Pavel Dovgalyuk -- 2.25.1
RE: [PATCH] tests/qtest: Make the filter tests independent from a specific NIC
> -Original Message- > From: Thomas Huth > Sent: Tuesday, December 21, 2021 3:35 PM > To: Zhang, Chen ; qemu-devel@nongnu.org; > Laurent Vivier > Cc: Paolo Bonzini > Subject: Re: [PATCH] tests/qtest: Make the filter tests independent from a > specific NIC > > On 21/12/2021 07.38, Zhang, Chen wrote: > > > > > >> -Original Message- > >> From: Qemu-devel >> bounces+chen.zhang=intel@nongnu.org> On Behalf Of Thomas > Huth > >> Sent: Monday, December 20, 2021 6:30 PM > >> To: qemu-devel@nongnu.org; Laurent Vivier > >> Cc: Paolo Bonzini ; Yang Hongyang > >> ; Zhang Chen > >> Subject: [PATCH] tests/qtest: Make the filter tests independent from > >> a specific NIC > >> > >> These filter tests need a NIC, no matter which one, so they use a > >> common NIC by default. However, these common NIC models might not > >> always have been compiled into the QEMU target binary, so assuming > >> that a certain NIC is available is a bad idea. Since the exact type > >> of NIC does not really matter for these tests, let's switch to "-nic" > >> instead of "-netdev" so that QEMU can simply pick a default NIC for us. > >> This way we can now run the tests on other targets that have a > >> default machine with an on-board/default NIC, too. > >> > > > > Oh, It's my and Hongyang's abandoned mailbox. > > Sorry, I only looked at the top of the *.c files and copied the e-mail address > from there. > > > Looks good to me. > > Thanks for the review! > > > By the way, should I add the test/qtest/test-filter* to the MAINTAINER file? > > That might be helpful indeed to get you CC:-ed correctly next time. Already send a patch to update it. Thanks Chen > > Thomas
Re: powernv gitlab ci regression
On 12/21/21 03:37, Daniel Henrique Barboza wrote: Hey, On 12/20/21 18:35, Richard Henderson wrote: Hi guys, Somewhere within Merge tag 'pull-ppc-20211217' of https://github.com/legoater/qemu into staging ppc 7.0 queue: * General cleanup for Mac machines (Peter) * Fixes for FPU exceptions (Lucas) * Support for new ISA31 instructions (Matheus) * Fixes for ivshmem (Daniel) * Cleanups for PowerNV PHB (Christophe and Cedric) * Updates of PowerNV and pSeries documentation (Leonardo and Daniel) * Fixes for PowerNV (Daniel) * Large cleanup of FPU implementation (Richard) * Removal of SoftTLBs support for PPC74x CPUs (Fabiano) * Fixes for exception models in MPCx and 60x CPUs (Fabiano) * Removal of 401/403 CPUs (Cedric) * Deprecation of taihu machine (Thomas) * Large rework of PPC405 machine (Cedric) * Fixes for VSX instructions (Victor and Matheus) * Fix for e6500 CPU (Fabiano) * Initial support for PMU (Daniel) is something that has caused a timeout regression in avocado-system-centos: (047/171) tests/avocado/boot_linux_console.py:BootLinuxConsole.test_ppc_powernv8: INTERRUPTED: Test interrupted by SIGTERM\nRunner error occurred: Timeout reached\nOriginal status: ERROR\n{'name': '047-tests/avocado/boot_linux_console.py:BootLinuxConsole.test_ppc_powernv8', 'logdir': '/builds/qemu-project/qemu/build/tests/results/job-2021-12-17T19.23-... (90.46 s) (048/171) tests/avocado/boot_linux_console.py:BootLinuxConsole.test_ppc_powernv9: INTERRUPTED: Test interrupted by SIGTERM\nRunner error occurred: Timeout reached\nOriginal status: ERROR\n{'name': '048-tests/avocado/boot_linux_console.py:BootLinuxConsole.test_ppc_powernv9', 'logdir': '/builds/qemu-project/qemu/build/tests/results/job-2021-12-17T19.23-... (90.55 s) See e.g. https://gitlab.com/qemu-project/qemu/-/jobs/1898304074 Thanks for letting us know. I bisected it and the culprit is this patch: commit 4db3907a40a087e2cc1839d19a3642539d36610b Author: Daniel Henrique Barboza Date: Fri Dec 17 17:57:18 2021 +0100 target/ppc: enable PMU instruction count This is a patch where I added instruction count in the ppc64 PMU. After this patch the performance of these 2 tests are degraded to the point where we're hitting timeouts in gitlab (didn't hit timeouts in my machine but the performance is noticeable worse). I'll need to see the serial console of the VM booting up to evaluate if there's some kernel module during boot time that is using the PMU and causing the delay. I'll also take a look into improving the performance as well (e.g. using more TCG code and avoid calling helpers). Run with : build/tests/venv/bin/avocado --show=app,console run -t machine:powernv9 build/tests/avocado/boot_linux_console.py * 6.2 ... console: [1.559904] PCI: CLS 0 bytes, default 128 /console: [8.830245] Initialise system trusted keyrings console: [8.832347] Key type blacklist registered console: [8.834558] workingset: timestamp_bits=54 max_order=14 bucket_order=0 console: [9.073051] integrity: Platform Keyring initialized console: [9.073586] Key type asymmetric registered console: [9.074025] Asymmetric key parser 'x509' registered console: [9.075359] Block layer SCSI generic (bsg) driver version 0.4 loaded (major 251) console: [9.095115] IPMI message handler: version 39.2 console: [9.096161] ipmi device interface console: [9.514308] ipmi-powernv ibm,opal:ipmi: IPMI message handler: Found new BMC (man_id: 0x00, prod_id: 0x, dev_id: 0x20) -console: [ 10.171273] IPMI Watchdog: driver initialized \console: [ 10.974462] hvc0: raw protocol on /ibm,opal/consoles/serial@0 (boot console) console: [ 10.975059] hvc0: No interrupts property, using OPAL event console: [ 10.989699] Serial: 8250/16550 driver, 4 ports, IRQ sharing disabled console: [ 11.156033] brd: module loaded console: [ 11.235965] loop: module loaded console: [ 11.249922] libphy: Fixed MDIO Bus: probed console: [ 11.254128] i2c /dev entries driver console: [ 11.255782] powernv-cpufreq: ibm,pstate-min node not found console: [ 11.256134] powernv-cpufreq: Platform driver disabled. System does not support PState control console: [ 11.273326] ipip: IPv4 and MPLS over IPv4 tunneling driver console: [ 11.303989] NET: Registered protocol family 10 console: [ 11.323651] Segment Routing with IPv6 console: [ 11.325267] sit: IPv6, IPv4 and MPLS over IPv4 tunneling driver console: [ 11.335866] NET: Registered protocol family 17 console: [ 11.336900] Key type dns_resolver registered console: [ 11.337358] secvar-sysfs: secvar: failed to retrieve secvar operations. console: [ 11.337877] drmem: No dynamic reconfiguration memory found console: [ 11.341767] Loading compiled-in X.509 certificates console: [ 11.362272] Loaded X.509 cert 'Build time autogenerated kernel key: 987b64c96d830fe42d02bbf502e028ebe85c2b4e' console: [ 11.667162] Key type encrypted registered console: [ 11.674616] ima: No TP
Re: [PATCH v2 4/9] hw/intc: sifive_plic: Cleanup remaining functions
On Thu, Dec 16, 2021 at 12:55 PM Alistair Francis wrote: > > From: Alistair Francis > > We can remove the original sifive_plic_irqs_pending() function and > instead just use the sifive_plic_claim() function (renamed to > sifive_plic_claimed()) to determine if any interrupts are pending. > > This requires move the side effects outside of sifive_plic_claimed(), > but as they are only invoked once that isn't a problem. > > We have also removed all of the old #ifdef debugging logs, so let's > cleanup the last remaining debug function while we are here. > > Signed-off-by: Alistair Francis > --- > hw/intc/sifive_plic.c | 109 +- > 1 file changed, 22 insertions(+), 87 deletions(-) > Reviewed-by: Bin Meng
Re: [PATCH] MAINTAINERS: Update COLO Proxy section
Cc'ing qemu-trivial@ On 12/21/21 09:04, Zhang Chen wrote: > Signed-off-by: Zhang Chen > --- > MAINTAINERS | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/MAINTAINERS b/MAINTAINERS > index 1de6ce6e44..5479b9376e 100644 > --- a/MAINTAINERS > +++ b/MAINTAINERS > @@ -2983,6 +2983,7 @@ F: docs/colo-proxy.txt > F: net/colo* > F: net/filter-rewriter.c > F: net/filter-mirror.c > +F: tests/qtest/test-filter* Reviewed-by: Philippe Mathieu-Daudé
Re: [PATCH] vl: Add opts to device opts list when using JSON syntax for -device
Cc'ing Markus. On 12/20/21 09:45, MkfsSion wrote: > When using JSON syntax for -device, -set option can not find device > specified in JSON by id field. The following commandline is an example: > > $ qemu-system-x86_64 -device '{"id":"foo"}' -set device.foo.bar=1 > qemu-system-x86_64: -set device.foo.bar=1: there is no device "foo" defined > > The patch adds device opts to device opts list when a device opts get > parsed. > > Signed-off-by: MkfsSion BTW per: https://www.qemu.org/docs/master/devel/submitting-a-patch.html#patch-emails-must-include-a-signed-off-by-line "Please use your real name to sign a patch (not an alias or acronym)." > --- > softmmu/vl.c | 2 ++ > 1 file changed, 2 insertions(+) > > diff --git a/softmmu/vl.c b/softmmu/vl.c > index 620a1f1367..0dd5acbc1a 100644 > --- a/softmmu/vl.c > +++ b/softmmu/vl.c > @@ -3400,6 +3400,8 @@ void qemu_init(int argc, char **argv, char **envp) > loc_save(&opt->loc); > assert(opt->opts != NULL); > QTAILQ_INSERT_TAIL(&device_opts, opt, next); > +qemu_opts_from_qdict(qemu_find_opts_err("device", > &error_fatal), > + opt->opts, &error_fatal); > } else { > if (!qemu_opts_parse_noisily(qemu_find_opts("device"), > optarg, true)) {
Re: [PATCH 2/4] ui: Remove unnecessary checks
On 12/21/21 02:40, Marc-André Lureau wrote: Hi On Sun, Dec 19, 2021 at 6:32 AM Alexander Orzechowski wrote: These conditionals should never be false as scale_x and scale_y should scale the fbw and fbh variables such that the ww and wh variables always have a greater magnitude. Signed-off-by: Alexander Orzechowski I don't understand how you reached that conclusion. scale_x/scale_y can have various values, from 0.25 manually, or pretty much anything in freescale. Just adding a breakpoint/debug there and you can see they can be false. --- ui/gtk.c | 27 ++- 1 file changed, 6 insertions(+), 21 deletions(-) diff --git a/ui/gtk.c b/ui/gtk.c index 824334ff3d..f2d74b253d 100644 --- a/ui/gtk.c +++ b/ui/gtk.c @@ -416,13 +416,8 @@ static void gd_update(DisplayChangeListener *dcl, ww = gtk_widget_get_allocated_width(vc->gfx.drawing_area); wh = gtk_widget_get_allocated_height(vc->gfx.drawing_area); - mx = my = 0; - if (ww > fbw) { - mx = (ww - fbw) / 2; - } - if (wh > fbh) { - my = (wh - fbh) / 2; - } + mx = (ww - fbw) / 2; + my = (wh - fbh) / 2; gtk_widget_queue_draw_area(vc->gfx.drawing_area, mx + x1, my + y1, (x2 - x1), (y2 - y1)); @@ -801,13 +796,8 @@ static gboolean gd_draw_event(GtkWidget *widget, cairo_t *cr, void *opaque) fbw *= vc->gfx.scale_x; fbh *= vc->gfx.scale_y; - mx = my = 0; - if (ww > fbw) { - mx = (ww - fbw) / 2; - } - if (wh > fbh) { - my = (wh - fbh) / 2; - } + mx = (ww - fbw) / 2; + my = (wh - fbh) / 2; cairo_rectangle(cr, 0, 0, ww, wh); @@ -850,13 +840,8 @@ static gboolean gd_motion_event(GtkWidget *widget, GdkEventMotion *motion, ws = gdk_window_get_scale_factor( gtk_widget_get_window(vc->gfx.drawing_area)); - mx = my = 0; - if (ww > fbw) { - mx = (ww - fbw) / 2; - } - if (wh > fbh) { - my = (wh - fbh) / 2; - } + mx = (ww - fbw) / 2; + my = (wh - fbh) / 2; x = (motion->x - mx) / vc->gfx.scale_x * ws; y = (motion->y - my) / vc->gfx.scale_y * ws; -- 2.34.1 -- Marc-André Lureau Thanks for the thorough review. I didn't realize you could set the scale manually, but it was under my impression that qemu would set the GDK_HINT_MIN_SIZE property[1] to try to disallow the user from resizing the window any smaller than the size of the virtual console. Qemu provides no mechanism to change the translation of the virtual console within the window in this case where the window is smaller than the virtual console would normally allow, I consider this invalid state. The mx and my variables are only used to translate the virtual console within its render surface. Thus, if qemu were to enter this "invalid" state as I've called it, the view would show the middle of the virtual console and obscure the edges. Without this patch it would show the top left, obscuring the bottom right. I am happy to drop this patch for v2. Please let me know your thoughts. [1]: See ui/gtk.c line 279 with the patches applied. -- Alexander Orzechowski
Re: [RFC v2 2/2] migration: Tally pre-copy, downtime and post-copy bytes independently
On 12/20/21 10:34, David Edmondson wrote: > Provide information on the number of bytes copied in the pre-copy, > downtime and post-copy phases of migration. > > Signed-off-by: David Edmondson > --- > migration/migration.c | 3 +++ > migration/ram.c | 7 +++ > monitor/hmp-cmds.c| 12 > qapi/migration.json | 13 - > 4 files changed, 34 insertions(+), 1 deletion(-) Reviewed-by: Philippe Mathieu-Daudé
Re: [RFC v2 1/2] migration: Introduce ram_transferred_add()
On 12/20/21 10:34, David Edmondson wrote: > ...and use it. FYI Not all mails readers / git tools display subject along with content, so it is more helpful to rewrite the subject. > Signed-off-by: David Edmondson > --- > migration/ram.c | 23 ++- > 1 file changed, 14 insertions(+), 9 deletions(-) > > diff --git a/migration/ram.c b/migration/ram.c > index 57efa67f20..bd53e50a7f 100644 > --- a/migration/ram.c > +++ b/migration/ram.c > @@ -386,6 +386,11 @@ uint64_t ram_bytes_remaining(void) > > MigrationStats ram_counters; > > +static void ram_transferred_add(uint64_t bytes) > +{ > +ram_counters.transferred += bytes; > +} Reviewed-by: Philippe Mathieu-Daudé
Re: [PATCH v11 00/31] LoongArch64 port of QEMU TCG
On 12/21/21 06:40, WANG Xuerui wrote: > Hi all, > > This is a port of QEMU TCG to the brand-new CPU architecture LoongArch, > introduced by Loongson with their 3A5000 chips. > > Everything is tested on real 3A5000 board (system emulation, linux-user, > make check) and GitLab (CI jobs), and rebased to latest master branch. > ## How to build and test this > As for the hardware availability, the boards can already be bought in > China on Taobao, and I think some people at Loongson might be able to > arrange for testing environments, if testing on real hardware other than > mine is required before merging; they have their in-house Debian spin-off > from the early days of this architecture. Their kernel is > ABI-incompatible with the version being upstreamed and used by me, but > QEMU should work there regardless. I took few hours to translate and read all Taobao contracts before registering, then got blacklisted at my first login... Maybe others will get more luck. Having someone at Loongson helping with hardware is certainly easier for the community. > Lastly, I'm new to QEMU development and this is my first patch series > here; apologizes if I get anything wrong, and any help or suggestion is > certainly appreciated! For a first patch series, this is an impressive one... Regards, Phil.
Re: [PATCH] MAINTAINERS: Update COLO Proxy section
On 21/12/2021 09.04, Zhang Chen wrote: Signed-off-by: Zhang Chen --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 1de6ce6e44..5479b9376e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2983,6 +2983,7 @@ F: docs/colo-proxy.txt F: net/colo* F: net/filter-rewriter.c F: net/filter-mirror.c +F: tests/qtest/test-filter* Record/replay M: Pavel Dovgalyuk Reviewed-by: Thomas Huth
Re: [PATCH] ps2: Initial horizontal scroll support
Hi On Tue, Dec 21, 2021 at 4:10 AM Dmitry Petrov wrote: > This patch introduces horizontal scroll support for the ps/2 mouse. > It includes changes in the ps/2 device driver as well as support > for three display options - cocoa, gtk and sdl, tested and working > on all of them against guest ubuntu system. > > The patch is based on the previous work by Brad Jorsch done in 2010 > but never merge, see > https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=579968 You should split the patch for the different subsystems/ui etc Looks good to me, although I didn't test it yet. Some comments below > > Signed-off-by: Dmitry Petrov > --- > hw/input/ps2.c| 54 --- > qapi/ui.json | 2 +- > ui/cocoa.m| 18 ++-- > ui/gtk.c | 54 --- > ui/input-legacy.c | 16 ++ > ui/sdl2.c | 5 + > 6 files changed, 122 insertions(+), 27 deletions(-) > > diff --git a/hw/input/ps2.c b/hw/input/ps2.c > index 9376a8f4ce..9e42284cd9 100644 > --- a/hw/input/ps2.c > +++ b/hw/input/ps2.c > @@ -123,6 +123,7 @@ typedef struct { > int mouse_dx; /* current values, needed for 'poll' mode */ > int mouse_dy; > int mouse_dz; > +int mouse_dw; > uint8_t mouse_buttons; > } PS2MouseState; > > @@ -715,7 +716,7 @@ static int ps2_mouse_send_packet(PS2MouseState *s) > /* IMPS/2 and IMEX send 4 bytes, PS2 sends 3 bytes */ > const int needed = s->mouse_type ? 4 : 3; > unsigned int b; > -int dx1, dy1, dz1; > +int dx1, dy1, dz1, dw1; > > if (PS2_QUEUE_SIZE - s->common.queue.count < needed) { > return 0; > @@ -724,6 +725,7 @@ static int ps2_mouse_send_packet(PS2MouseState *s) > dx1 = s->mouse_dx; > dy1 = s->mouse_dy; > dz1 = s->mouse_dz; > +dw1 = s->mouse_dw; > /* XXX: increase range to 8 bits ? */ > if (dx1 > 127) > dx1 = 127; > @@ -740,6 +742,9 @@ static int ps2_mouse_send_packet(PS2MouseState *s) > /* extra byte for IMPS/2 or IMEX */ > switch(s->mouse_type) { > default: > +/* Just ignore the wheels if not supported */ > +s->mouse_dz = 0; > +s->mouse_dw = 0; > break; > case 3: > if (dz1 > 127) > @@ -747,13 +752,38 @@ static int ps2_mouse_send_packet(PS2MouseState *s) > else if (dz1 < -127) > dz1 = -127; > ps2_queue_noirq(&s->common, dz1 & 0xff); > +s->mouse_dz -= dz1; > +s->mouse_dw = 0; > break; > case 4: > -if (dz1 > 7) > -dz1 = 7; > -else if (dz1 < -7) > -dz1 = -7; > -b = (dz1 & 0x0f) | ((s->mouse_buttons & 0x18) << 1); > +/* > + * This matches what the Linux kernel expects for exps/2 in > + * drivers/input/mouse/psmouse-base.c. Note, if you happen to > + * press/release the 4th or 5th buttons at the same moment as a > + * horizontal wheel scroll, those button presses will get lost. > I'm not > + * sure what to do about that, since by this point we don't know > + * whether those buttons actually changed state. > + */ > Reading the kernel code helped me guess what is going on, but it would be nice to have more doc or link to specifications instead. > +if (dw1 != 0) { > +if (dw1 > 15) { > +dw1 = 15; > +} else if (dw1 < -15) { > +dw1 = -15; > +} > + > +/* 0x3f was found by trial and error vs ubuntu instance */ > +b = (dw1 & 0x3f) | 0x40; > Ok, clamp at 15 (I think you could go at 31 actually, since 5 bits seem to be used) and go to the kernel: case 0x40: /* horizontal scroll on IntelliMouse Explorer 4.0 */ This case doesn't handle buttons simultaneously indeed. I think 0x3f comes from 5 bits + 1 sign bit. +s->mouse_dw -= dw1; > +} else { > +if (dz1 > 7) { > +dz1 = 7; > +} else if (dz1 < -7) { > +dz1 = -7; > +} > + > +b = (dz1 & 0x0f) | ((s->mouse_buttons & 0x18) << 1); > +s->mouse_dz -= dz1; > Here clamp at 7, since we should fall in the kernel case 0x00: and only 3 bits seem to be used (thus & 0x0f for 3+1 sign). This case handles buttons simultaneously, but only vertical scroll (unless a4tech_workaround is set and triggered) > +} > ps2_queue_noirq(&s->common, b); > break; > } > @@ -764,7 +794,6 @@ static int ps2_mouse_send_packet(PS2MouseState *s) > /* update deltas */ > s->mouse_dx -= dx1; > s->mouse_dy -= dy1; > -s->mouse_dz -= dz1; > > return 1; > } > @@ -806,6 +835,12 @@ static void ps2_mouse_event(DeviceState *dev, > QemuConsole *src, > } else if (btn->button == INPUT_BUTTON_WHEEL_DOWN) { > s->mouse_dz++; > } > + > +if (btn->button == I
[PATCH] KVM: x86: ignore interrupt_bitmap field of KVM_GET/SET_SREGS
This is unnecessary, because the interrupt would be retrieved and queued anyway by KVM_GET_VCPU_EVENTS and KVM_SET_VCPU_EVENTS respectively, and it makes the flow more similar to the one for KVM_GET/SET_SREGS2. Signed-off-by: Paolo Bonzini --- target/i386/kvm/kvm.c | 22 -- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index d81745620b..b42bcbc363 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -2607,11 +2607,11 @@ static int kvm_put_sregs(X86CPU *cpu) CPUX86State *env = &cpu->env; struct kvm_sregs sregs; +/* + * The interrupt_bitmap is ignored because KVM_SET_SREGS is + * always followed by KVM_SET_VCPU_EVENTS. + */ memset(sregs.interrupt_bitmap, 0, sizeof(sregs.interrupt_bitmap)); -if (env->interrupt_injected >= 0) { -sregs.interrupt_bitmap[env->interrupt_injected / 64] |= -(uint64_t)1 << (env->interrupt_injected % 64); -} if ((env->eflags & VM_MASK)) { set_v8086_seg(&sregs.cs, &env->segs[R_CS]); @@ -3348,16 +3348,10 @@ static int kvm_get_sregs(X86CPU *cpu) return ret; } -/* There can only be one pending IRQ set in the bitmap at a time, so try - to find it and save its number instead (-1 for none). */ -env->interrupt_injected = -1; -for (i = 0; i < ARRAY_SIZE(sregs.interrupt_bitmap); i++) { -if (sregs.interrupt_bitmap[i]) { -bit = ctz64(sregs.interrupt_bitmap[i]); -env->interrupt_injected = i * 64 + bit; -break; -} -} +/* + * The interrupt_bitmap is ignored because KVM_GET_SREGS is + * always preceded by KVM_GET_VCPU_EVENTS. + */ get_seg(&env->segs[R_CS], &sregs.cs); get_seg(&env->segs[R_DS], &sregs.ds); -- 2.33.1
[PATCH v3 1/2] migration: Introduce ram_transferred_add()
Replace direct manipulation of ram_counters.transferred with a function. Signed-off-by: David Edmondson Reviewed-by: Philippe Mathieu-Daudé --- migration/ram.c | 23 ++- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index 57efa67f20..bd53e50a7f 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -386,6 +386,11 @@ uint64_t ram_bytes_remaining(void) MigrationStats ram_counters; +static void ram_transferred_add(uint64_t bytes) +{ +ram_counters.transferred += bytes; +} + /* used by the search for pages to send */ struct PageSearchStatus { /* Current block being searched */ @@ -767,7 +772,7 @@ static int save_xbzrle_page(RAMState *rs, uint8_t **current_data, * RAM_SAVE_FLAG_CONTINUE. */ xbzrle_counters.bytes += bytes_xbzrle - 8; -ram_counters.transferred += bytes_xbzrle; +ram_transferred_add(bytes_xbzrle); return 1; } @@ -1198,7 +1203,7 @@ static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset) if (len) { ram_counters.duplicate++; -ram_counters.transferred += len; +ram_transferred_add(len); return 1; } return -1; @@ -1234,7 +1239,7 @@ static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, } if (bytes_xmit) { -ram_counters.transferred += bytes_xmit; +ram_transferred_add(bytes_xmit); *pages = 1; } @@ -1265,8 +1270,8 @@ static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, uint8_t *buf, bool async) { -ram_counters.transferred += save_page_header(rs, rs->f, block, - offset | RAM_SAVE_FLAG_PAGE); +ram_transferred_add(save_page_header(rs, rs->f, block, + offset | RAM_SAVE_FLAG_PAGE)); if (async) { qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE, migrate_release_ram() & @@ -1274,7 +1279,7 @@ static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, } else { qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE); } -ram_counters.transferred += TARGET_PAGE_SIZE; +ram_transferred_add(TARGET_PAGE_SIZE); ram_counters.normal++; return 1; } @@ -1373,7 +1378,7 @@ exit: static void update_compress_thread_counts(const CompressParam *param, int bytes_xmit) { -ram_counters.transferred += bytes_xmit; +ram_transferred_add(bytes_xmit); if (param->zero_page) { ram_counters.duplicate++; @@ -2298,7 +2303,7 @@ void acct_update_position(QEMUFile *f, size_t size, bool zero) ram_counters.duplicate += pages; } else { ram_counters.normal += pages; -ram_counters.transferred += size; +ram_transferred_add(size); qemu_update_position(f, size); } } @@ -3133,7 +3138,7 @@ out: multifd_send_sync_main(rs->f); qemu_put_be64(f, RAM_SAVE_FLAG_EOS); qemu_fflush(f); -ram_counters.transferred += 8; +ram_transferred_add(8); ret = qemu_file_get_error(f); } -- 2.33.0
[PATCH v3 0/2] migration: Tally pre-copy, downtime and post-copy bytes independently
When examining a report of poor migration behaviour, it would often be useful to understand how much data was transferred in different phases of the migration process. For example, if the downtime limit is exceeded, to know how much data was transferred during the downtime. RFC because the name "ram_transferred_add" doesn't seem great, and I'm unsure whether the tests to determine the phase in the second patch are the most appropriate. v3: - Add r-by (Philippe) - Improve a commit message (Philippe) v2: - ram_transferred_add() should be static (Philippe) - Document the new MigrationStats fields (dme) David Edmondson (2): migration: Introduce ram_transferred_add() migration: Tally pre-copy, downtime and post-copy bytes independently migration/migration.c | 3 +++ migration/ram.c | 30 +- monitor/hmp-cmds.c| 12 qapi/migration.json | 13 - 4 files changed, 48 insertions(+), 10 deletions(-) -- 2.33.0
Re: [PATCH 3/5] migration: ram_release_pages() always receive 1 page as argument
Philippe Mathieu-Daudé wrote: > On 12/16/21 10:13, Juan Quintela wrote: >> Remove the pages argument. And s/pages/page/ >> >> Signed-off-by: Juan Quintela >> --- >> migration/ram.c | 8 >> 1 file changed, 4 insertions(+), 4 deletions(-) > >> -static void ram_release_pages(const char *rbname, uint64_t offset, int >> pages) >> +static void ram_release_page(const char *rbname, uint64_t offset) >> { >> if (!migrate_release_ram() || !migration_in_postcopy()) { >> return; >> } >> >> -ram_discard_range(rbname, offset, ((ram_addr_t)pages) << >> TARGET_PAGE_BITS); >> +ram_discard_range(rbname, offset, ((ram_addr_t)1) << TARGET_PAGE_BITS); > > 1ULL? I am changing it, but the argument to ram_discard_range is a size_t, and that is different in 32 bits arch. Once told that, it is not worse that what we have here, as ram_addr_t type depends on the phase of the moon. /* address in the RAM (different from a physical address) */ #if defined(CONFIG_XEN_BACKEND) typedef uint64_t ram_addr_t; # define RAM_ADDR_MAX UINT64_MAX # define RAM_ADDR_FMT "%" PRIx64 #else typedef uintptr_t ram_addr_t; # define RAM_ADDR_MAX UINTPTR_MAX # define RAM_ADDR_FMT "%" PRIxPTR #endif Later, Juan. PD. No, I don't know either why it is not casted to size_t. PD2. And yes, I still think that pure int operations should be ok. The value TARGET_PAGE_BITS more typical is 10, and here pages is only used with value 1. C promotion rules should make everything ok (famous last words).
[PATCH v3 2/2] migration: Tally pre-copy, downtime and post-copy bytes independently
Provide information on the number of bytes copied in the pre-copy, downtime and post-copy phases of migration. Signed-off-by: David Edmondson Reviewed-by: Philippe Mathieu-Daudé --- migration/migration.c | 3 +++ migration/ram.c | 7 +++ monitor/hmp-cmds.c| 12 qapi/migration.json | 13 - 4 files changed, 34 insertions(+), 1 deletion(-) diff --git a/migration/migration.c b/migration/migration.c index 3de11ae921..3950510be7 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1013,6 +1013,9 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) info->ram->page_size = page_size; info->ram->multifd_bytes = ram_counters.multifd_bytes; info->ram->pages_per_second = s->pages_per_second; +info->ram->precopy_bytes = ram_counters.precopy_bytes; +info->ram->downtime_bytes = ram_counters.downtime_bytes; +info->ram->postcopy_bytes = ram_counters.postcopy_bytes; if (migrate_use_xbzrle()) { info->has_xbzrle_cache = true; diff --git a/migration/ram.c b/migration/ram.c index bd53e50a7f..389868c988 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -388,6 +388,13 @@ MigrationStats ram_counters; static void ram_transferred_add(uint64_t bytes) { +if (runstate_is_running()) { +ram_counters.precopy_bytes += bytes; +} else if (migration_in_postcopy()) { +ram_counters.postcopy_bytes += bytes; +} else { +ram_counters.downtime_bytes += bytes; +} ram_counters.transferred += bytes; } diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c index 9c91bf93e9..6049772178 100644 --- a/monitor/hmp-cmds.c +++ b/monitor/hmp-cmds.c @@ -293,6 +293,18 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) monitor_printf(mon, "postcopy request count: %" PRIu64 "\n", info->ram->postcopy_requests); } +if (info->ram->precopy_bytes) { +monitor_printf(mon, "precopy ram: %" PRIu64 " kbytes\n", + info->ram->precopy_bytes >> 10); +} +if (info->ram->downtime_bytes) { +monitor_printf(mon, "downtime ram: %" PRIu64 " kbytes\n", + info->ram->downtime_bytes >> 10); +} +if (info->ram->postcopy_bytes) { +monitor_printf(mon, "postcopy ram: %" PRIu64 " kbytes\n", + info->ram->postcopy_bytes >> 10); +} } if (info->has_disk) { diff --git a/qapi/migration.json b/qapi/migration.json index bbfd48cf0b..5975a0e104 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -46,6 +46,15 @@ # @pages-per-second: the number of memory pages transferred per second #(Since 4.0) # +# @precopy-bytes: The number of bytes sent in the pre-copy phase +# (since 7.0). +# +# @downtime-bytes: The number of bytes sent while the guest is paused +# (since 7.0). +# +# @postcopy-bytes: The number of bytes sent during the post-copy phase +# (since 7.0). +# # Since: 0.14 ## { 'struct': 'MigrationStats', @@ -54,7 +63,9 @@ 'normal-bytes': 'int', 'dirty-pages-rate' : 'int', 'mbps' : 'number', 'dirty-sync-count' : 'int', 'postcopy-requests' : 'int', 'page-size' : 'int', - 'multifd-bytes' : 'uint64', 'pages-per-second' : 'uint64' } } + 'multifd-bytes' : 'uint64', 'pages-per-second' : 'uint64', + 'precopy-bytes' : 'uint64', 'downtime-bytes' : 'uint64', + 'postcopy-bytes' : 'uint64' } } ## # @XBZRLECacheStats: -- 2.33.0
Re: [PATCH v11 00/31] LoongArch64 port of QEMU TCG
Hi, On 2021/12/21 下午4:44, Philippe Mathieu-Daudé wrote: I took few hours to translate and read all Taobao contracts before registering, then got blacklisted at my first login... Maybe others will get more luck. Having someone at Loongson helping with hardware is certainly easier for the community. Loongson company can donate 3a5000 computers or provide an IP access to 3a5000 hardware environment. Which way is better? Thanks. Song Gao
Re: powernv gitlab ci regression
On 12/21/21 05:20, Cédric Le Goater wrote: On 12/21/21 03:37, Daniel Henrique Barboza wrote: Hey, On 12/20/21 18:35, Richard Henderson wrote: Hi guys, Somewhere within Merge tag 'pull-ppc-20211217' of https://github.com/legoater/qemu into staging ppc 7.0 queue: * General cleanup for Mac machines (Peter) * Fixes for FPU exceptions (Lucas) * Support for new ISA31 instructions (Matheus) * Fixes for ivshmem (Daniel) * Cleanups for PowerNV PHB (Christophe and Cedric) * Updates of PowerNV and pSeries documentation (Leonardo and Daniel) * Fixes for PowerNV (Daniel) * Large cleanup of FPU implementation (Richard) * Removal of SoftTLBs support for PPC74x CPUs (Fabiano) * Fixes for exception models in MPCx and 60x CPUs (Fabiano) * Removal of 401/403 CPUs (Cedric) * Deprecation of taihu machine (Thomas) * Large rework of PPC405 machine (Cedric) * Fixes for VSX instructions (Victor and Matheus) * Fix for e6500 CPU (Fabiano) * Initial support for PMU (Daniel) is something that has caused a timeout regression in avocado-system-centos: (047/171) tests/avocado/boot_linux_console.py:BootLinuxConsole.test_ppc_powernv8: INTERRUPTED: Test interrupted by SIGTERM\nRunner error occurred: Timeout reached\nOriginal status: ERROR\n{'name': '047-tests/avocado/boot_linux_console.py:BootLinuxConsole.test_ppc_powernv8', 'logdir': '/builds/qemu-project/qemu/build/tests/results/job-2021-12-17T19.23-... (90.46 s) (048/171) tests/avocado/boot_linux_console.py:BootLinuxConsole.test_ppc_powernv9: INTERRUPTED: Test interrupted by SIGTERM\nRunner error occurred: Timeout reached\nOriginal status: ERROR\n{'name': '048-tests/avocado/boot_linux_console.py:BootLinuxConsole.test_ppc_powernv9', 'logdir': '/builds/qemu-project/qemu/build/tests/results/job-2021-12-17T19.23-... (90.55 s) See e.g. https://gitlab.com/qemu-project/qemu/-/jobs/1898304074 Thanks for letting us know. I bisected it and the culprit is this patch: commit 4db3907a40a087e2cc1839d19a3642539d36610b Author: Daniel Henrique Barboza Date: Fri Dec 17 17:57:18 2021 +0100 target/ppc: enable PMU instruction count This is a patch where I added instruction count in the ppc64 PMU. After this patch the performance of these 2 tests are degraded to the point where we're hitting timeouts in gitlab (didn't hit timeouts in my machine but the performance is noticeable worse). I'll need to see the serial console of the VM booting up to evaluate if there's some kernel module during boot time that is using the PMU and causing the delay. I'll also take a look into improving the performance as well (e.g. using more TCG code and avoid calling helpers). Run with : build/tests/venv/bin/avocado --show=app,console run -t machine:powernv9 build/tests/avocado/boot_linux_console.py * 6.2 ... console: [ 1.559904] PCI: CLS 0 bytes, default 128 /console: [ 8.830245] Initialise system trusted keyrings console: [ 8.832347] Key type blacklist registered console: [ 8.834558] workingset: timestamp_bits=54 max_order=14 bucket_order=0 console: [ 9.073051] integrity: Platform Keyring initialized console: [ 9.073586] Key type asymmetric registered console: [ 9.074025] Asymmetric key parser 'x509' registered console: [ 9.075359] Block layer SCSI generic (bsg) driver version 0.4 loaded (major 251) console: [ 9.095115] IPMI message handler: version 39.2 console: [ 9.096161] ipmi device interface console: [ 9.514308] ipmi-powernv ibm,opal:ipmi: IPMI message handler: Found new BMC (man_id: 0x00, prod_id: 0x, dev_id: 0x20) -console: [ 10.171273] IPMI Watchdog: driver initialized \console: [ 10.974462] hvc0: raw protocol on /ibm,opal/consoles/serial@0 (boot console) console: [ 10.975059] hvc0: No interrupts property, using OPAL event console: [ 10.989699] Serial: 8250/16550 driver, 4 ports, IRQ sharing disabled console: [ 11.156033] brd: module loaded console: [ 11.235965] loop: module loaded console: [ 11.249922] libphy: Fixed MDIO Bus: probed console: [ 11.254128] i2c /dev entries driver console: [ 11.255782] powernv-cpufreq: ibm,pstate-min node not found console: [ 11.256134] powernv-cpufreq: Platform driver disabled. System does not support PState control console: [ 11.273326] ipip: IPv4 and MPLS over IPv4 tunneling driver console: [ 11.303989] NET: Registered protocol family 10 console: [ 11.323651] Segment Routing with IPv6 console: [ 11.325267] sit: IPv6, IPv4 and MPLS over IPv4 tunneling driver console: [ 11.335866] NET: Registered protocol family 17 console: [ 11.336900] Key type dns_resolver registered console: [ 11.337358] secvar-sysfs: secvar: failed to retrieve secvar operations. console: [ 11.337877] drmem: No dynamic reconfiguration memory found console: [ 11.341767] Loading compiled-in X.509 certificates console: [ 11.362272] Loaded X.509 cert 'Build time autogenerated kernel key: 987b64c96d830fe42d02bbf502e028ebe85c2b4e' console: [ 11.667162] Key type encrypt
Re: [PATCH RFC] MAINTAINERS: split out s390x sections
Am 20.12.21 um 12:54 schrieb Cornelia Huck: Split out some more specialized devices etc., so that we can build smarter lists of people to be put on cc: in the future. Signed-off-by: Cornelia Huck Acked-by: Christian Borntraeger That should help to get additional maintainers (in add-on patches) added. Letsa go with this split - we can fix and improve things anytime. --- As discussed offlist. Some notes: - The new sections have inherited the maintainers of the sections they have been split out of (except where people had already volunteered). That's easy to change, obviously, and I hope that the cc: list already contains people who might have interest in volunteering for some sections. - I may not have gotten the F: patterns correct, please double check. - I'm also not sure about where in the MAINTAINERS file the new sections should go; if you have a better idea, please speak up. - Also, if you have better ideas regarding the sections, please speak up as well :) - Pull requests will probably continue the same way as now (i.e. patches picked up at the top level and then sent, except for some things like tcg which may go separately.) Not sure if it would make sense to try out the submaintainer pull request model again, I don't think it made life easier in the past, and now we have the b4 tool to pick patches easily anyway. It might be a good idea to check which of the tree locations should stay, or if we want to have new ones. --- MAINTAINERS | 86 ++--- 1 file changed, 75 insertions(+), 11 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 9a8d1bdf727d..d1916f075386 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -297,7 +297,6 @@ M: David Hildenbrand S: Maintained F: target/s390x/ F: target/s390x/tcg -F: target/s390x/cpu_models_*.[ch] F: hw/s390x/ F: disas/s390.c F: tests/tcg/s390x/ @@ -396,16 +395,10 @@ M: Halil Pasic M: Christian Borntraeger S: Supported F: target/s390x/kvm/ -F: target/s390x/ioinst.[ch] F: target/s390x/machine.c F: target/s390x/sigp.c -F: target/s390x/cpu_features*.[ch] -F: target/s390x/cpu_models.[ch] F: hw/s390x/pv.c F: include/hw/s390x/pv.h -F: hw/intc/s390_flic.c -F: hw/intc/s390_flic_kvm.c -F: include/hw/s390x/s390_flic.h F: gdb-xml/s390*.xml T: git https://github.com/borntraeger/qemu.git s390-next L: qemu-s3...@nongnu.org @@ -1529,12 +1522,8 @@ S390 Virtio-ccw M: Halil Pasic M: Christian Borntraeger S: Supported -F: hw/char/sclp*.[hc] -F: hw/char/terminal3270.c F: hw/s390x/ F: include/hw/s390x/ -F: hw/watchdog/wdt_diag288.c -F: include/hw/watchdog/wdt_diag288.h F: configs/devices/s390x-softmmu/default.mak F: tests/avocado/machine_s390_ccw_virtio.py T: git https://github.com/borntraeger/qemu.git s390-next @@ -1559,6 +1548,80 @@ F: hw/s390x/s390-pci* F: include/hw/s390x/s390-pci* L: qemu-s3...@nongnu.org +S390 channel subsystem +M: Halil Pasic +M: Christian Borntraeger +S: Supported +F: hw/s390x/ccw-device.[ch] +F: hw/s390x/css.c +F: hw/s390x/css-bridge.c +F: include/hw/s390x/css.h +F: include/hw/s390x/css-bridge.h +F: include/hw/s390x/ioinst.h +F: target/s390x/ioinst.c +L: qemu-s3...@nongnu.org + +3270 device +M: Halil Pasic +M: Christian Borntraeger +S: Odd fixes +F: include/hw/s390x/3270-ccw.h +F: hw/char/terminal3270.c +F: hw/s390x/3270-ccw.c +L: qemu-s3...@nongnu.org + +diag 288 watchdog +M: Halil Pasic +M: Christian Borntraeger +S: Supported +F: hw/watchdog/wdt_diag288.c +F: include/hw/watchdog/wdt_diag288.h +L: qemu-s3...@nongnu.org + +S390 CPU models +M: David Hildenbrand +S: Maintained +F: target/s390x/cpu_features*.[ch] +F: target/s390x/cpu_models.[ch] +L: qemu-s3...@nongnu.org + +S390 storage key device +M: Halil Pasic +M: Christian Borntraeger +S: Supported +F: hw/s390x/storage-keys.h +F: hw/390x/s390-skeys*.c +L: qemu-s3...@nongnu.org + +S390 storage attribute device +M: Halil Pasic +M: Christian Borntraeger +S: Supported +F: hw/s390x/storage-attributes.h +F: hw/s390/s390-stattrib*.c +L: qemu-s3...@nongnu.org + +S390 SCLP-backed devices +M: Halil Pasic +M: Christian Borntraeger +S: Supported +F: include/hw/s390x/event-facility.h +F: include/hw/s390x/sclp.h +F: hw/char/sclp*.[hc] +F: hw/s390x/event-facility.c +F: hw/s390x/sclp*.c +L: qemu-s3...@nongnu.org + +S390 floating interrupt controller +M: Halil Pasic +M: Christian Borntraeger +M: David Hildenbrand +S: Supported +F: hw/intc/s390_flic.c +F: hw/intc/s390_flic_kvm.c +F: include/hw/s390x/s390_flic.h +L: qemu-s3...@nongnu.org + X86 Machines PC @@ -1957,6 +2020,7 @@ M: Halil Pasic S: Supported F: hw/s390x/virtio-ccw*.[hc] F: hw/s390x/vhost-vsock-ccw.c +F: hw/s390x/vhost-user-fs-ccw.c T: git https://gitlab.com/cohuck/qemu.git s390-next T: git https://github.com/borntraeger/qemu.git s390-next L: qemu-s3...@nongnu.org
Re: [PATCH] scsi-generic: replace logical block count of response of READ CAPACITY
Hi Paolo, Hannes, any thoughts on the following issue? Introduction: When using SAN storage for providing block devices to guests, configured as SCSI-passthrough devices, increasing the space available in the VM is a use case. To do it, it is currently necessary to: 1) expand storage on the actual SAN, 2) run a "virsh blockresize" or equivalent command to make QEMU aware of the new size, and finally 3) do a "rescan-scsi-bus.sh" or equivalent operation in the guest to make the running guest aware of the increased disk size. The problem: As of now, the administrator needs to make sure that step 3 won't be done before step 2 has been executed, or the resulting state will be inconsistent. In practice this creates organizational issues to try to sync up host/storage admins and guest OS admin, and is therefore error prone (due to these human factors). The proposal: The patch I replied to here from Ma Lin tries to avoid the inconsistent state, by having "rescan-scsi-bus.sh" still report the old size in the guest until QEMU itself is aware of the new disk size. The patch: Before the patch, the SCSI READ_CAPACITY command in the guest os directly receives the unmodified response from the storage backend. After the patch, QEMU intercepts the READ_CAPACITY response and replaces the maximum LBA with the information which is saved in QEMU. This means: after resizing the storage on the SAN backend, the host administrator must explicitly notify about CAPACITY HAS CHANGED by issuing a block resize command through QMP or libvirt, even for SCSI passthrough disks. Any ideas on this patch or on possible alternatives? Thanks, Claudio On 11/20/21 11:15 AM, Lin Ma wrote: > While using SCSI passthrough, Following scenario makes qemu doesn't > realized the capacity change of remote scsi target: > 1. online resize the scsi target. > 2. issue 'rescan-scsi-bus.sh -s ...' in host. > 3. issue 'rescan-scsi-bus.sh -s ...' in vm. > > In above scenario I used to experienced errors while accessing the > additional disk space in vm. I think the reasonable operations should > be: > 1. online resize the scsi target. > 2. issue 'rescan-scsi-bus.sh -s ...' in host. > 3. issue 'block_resize' via qmp to notify qemu. > 4. issue 'rescan-scsi-bus.sh -s ...' in vm. > > The errors disappear once I notify qemu by block_resize via qmp. > > So this patch replaces the number of logical blocks of READ CAPACITY > response from scsi target by qemu's bs->total_sectors. If the user in > vm wants to access the additional disk space, The administrator of > host must notify qemu once resizeing the scsi target. > > Bonus is that domblkinfo of libvirt can reflect the consistent capacity > information between host and vm in case of missing block_resize in qemu. > E.g: > ... > > > > > > > > > ... > > Before: > 1. online resize the scsi target. > 2. host:~ # rescan-scsi-bus.sh -s /dev/sdc > 3. guest:~ # rescan-scsi-bus.sh -s /dev/sda > 4 host:~ # virsh domblkinfo --domain $DOMAIN --human --device sda > Capacity: 4.000 GiB > Allocation: 0.000 B > Physical: 8.000 GiB > > 5. guest:~ # lsblk /dev/sda > NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT > sda 8:00 8G 0 disk > └─sda1 8:10 2G 0 part > > After: > 1. online resize the scsi target. > 2. host:~ # rescan-scsi-bus.sh -s /dev/sdc > 3. guest:~ # rescan-scsi-bus.sh -s /dev/sda > 4 host:~ # virsh domblkinfo --domain $DOMAIN --human --device sda > Capacity: 4.000 GiB > Allocation: 0.000 B > Physical: 8.000 GiB > > 5. guest:~ # lsblk /dev/sda > NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT > sda 8:00 4G 0 disk > └─sda1 8:10 2G 0 part > > Signed-off-by: Lin Ma > --- > hw/scsi/scsi-generic.c | 10 -- > 1 file changed, 8 insertions(+), 2 deletions(-) > > diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c > index 0306ccc7b1..343b51c2c0 100644 > --- a/hw/scsi/scsi-generic.c > +++ b/hw/scsi/scsi-generic.c > @@ -315,11 +315,17 @@ static void scsi_read_complete(void * opaque, int ret) > if (r->req.cmd.buf[0] == READ_CAPACITY_10 && > (ldl_be_p(&r->buf[0]) != 0xU || s->max_lba == 0)) { > s->blocksize = ldl_be_p(&r->buf[4]); > -s->max_lba = ldl_be_p(&r->buf[0]) & 0xULL; > +BlockBackend *blk = s->conf.blk; > +BlockDriverState *bs = blk_bs(blk); > +s->max_lba = bs->total_sectors - 1; > +stl_be_p(&r->buf[0], s->max_lba); > } else if (r->req.cmd.buf[0] == SERVICE_ACTION_IN_16 && > (r->req.cmd.buf[1] & 31) == SAI_READ_CAPACITY_16) { > s->blocksize = ldl_be_p(&r->buf[8]); > -s->max_lba = ldq_be_p(&r->buf[0]); > +BlockBackend *blk = s->conf.blk; > +BlockDriverState *bs = blk_bs(blk); > +s->max_lba = bs->total_sectors - 1; > +stq_be_p(&r->buf[0], s->max_lba); > } > blk_set_guest_block_size(s->conf.blk, s->bl
Re: [PATCH RFC] MAINTAINERS: split out s390x sections
On 20/12/2021 12.54, Cornelia Huck wrote: Split out some more specialized devices etc., so that we can build smarter lists of people to be put on cc: in the future. Signed-off-by: Cornelia Huck --- As discussed offlist. Some notes: - The new sections have inherited the maintainers of the sections they have been split out of (except where people had already volunteered). That's easy to change, obviously, and I hope that the cc: list already contains people who might have interest in volunteering for some sections. - I may not have gotten the F: patterns correct, please double check. - I'm also not sure about where in the MAINTAINERS file the new sections should go; if you have a better idea, please speak up. - Also, if you have better ideas regarding the sections, please speak up as well :) - Pull requests will probably continue the same way as now (i.e. patches picked up at the top level and then sent, except for some things like tcg which may go separately.) Not sure if it would make sense to try out the submaintainer pull request model again, I don't think it made life easier in the past, and now we have the b4 tool to pick patches easily anyway. It might be a good idea to check which of the tree locations should stay, or if we want to have new ones. --- MAINTAINERS | 86 ++--- 1 file changed, 75 insertions(+), 11 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 9a8d1bdf727d..d1916f075386 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -297,7 +297,6 @@ M: David Hildenbrand S: Maintained F: target/s390x/ F: target/s390x/tcg -F: target/s390x/cpu_models_*.[ch] F: hw/s390x/ F: disas/s390.c F: tests/tcg/s390x/ @@ -396,16 +395,10 @@ M: Halil Pasic M: Christian Borntraeger S: Supported F: target/s390x/kvm/ -F: target/s390x/ioinst.[ch] F: target/s390x/machine.c F: target/s390x/sigp.c -F: target/s390x/cpu_features*.[ch] -F: target/s390x/cpu_models.[ch] F: hw/s390x/pv.c F: include/hw/s390x/pv.h -F: hw/intc/s390_flic.c -F: hw/intc/s390_flic_kvm.c -F: include/hw/s390x/s390_flic.h F: gdb-xml/s390*.xml T: git https://github.com/borntraeger/qemu.git s390-next L: qemu-s3...@nongnu.org @@ -1529,12 +1522,8 @@ S390 Virtio-ccw M: Halil Pasic M: Christian Borntraeger S: Supported -F: hw/char/sclp*.[hc] -F: hw/char/terminal3270.c F: hw/s390x/ F: include/hw/s390x/ -F: hw/watchdog/wdt_diag288.c -F: include/hw/watchdog/wdt_diag288.h F: configs/devices/s390x-softmmu/default.mak F: tests/avocado/machine_s390_ccw_virtio.py T: git https://github.com/borntraeger/qemu.git s390-next @@ -1559,6 +1548,80 @@ F: hw/s390x/s390-pci* F: include/hw/s390x/s390-pci* L: qemu-s3...@nongnu.org +S390 channel subsystem +M: Halil Pasic +M: Christian Borntraeger +S: Supported +F: hw/s390x/ccw-device.[ch] +F: hw/s390x/css.c +F: hw/s390x/css-bridge.c +F: include/hw/s390x/css.h +F: include/hw/s390x/css-bridge.h +F: include/hw/s390x/ioinst.h +F: target/s390x/ioinst.c +L: qemu-s3...@nongnu.org + +3270 device +M: Halil Pasic +M: Christian Borntraeger +S: Odd fixes +F: include/hw/s390x/3270-ccw.h +F: hw/char/terminal3270.c +F: hw/s390x/3270-ccw.c +L: qemu-s3...@nongnu.org I'm a little bit torn between putting the s390x-related devices here in the "Machine" section (which should rather be used for machines and not for devices), or in the more generic "Devices" section later in the MAINTAINERS file. We already have vfio-ccw and vfio-ap in the "Devices" section, so maybe we should put the other s390x-related devices there as well? (maybe with a "s390x" prefix so that they show up in the same spot if we sort them alphabetically?) +diag 288 watchdog +M: Halil Pasic +M: Christian Borntraeger +S: Supported +F: hw/watchdog/wdt_diag288.c +F: include/hw/watchdog/wdt_diag288.h +L: qemu-s3...@nongnu.org + +S390 CPU models +M: David Hildenbrand +S: Maintained +F: target/s390x/cpu_features*.[ch] +F: target/s390x/cpu_models.[ch] +L: qemu-s3...@nongnu.org + +S390 storage key device +M: Halil Pasic +M: Christian Borntraeger +S: Supported +F: hw/s390x/storage-keys.h +F: hw/390x/s390-skeys*.c +L: qemu-s3...@nongnu.org + +S390 storage attribute device +M: Halil Pasic +M: Christian Borntraeger +S: Supported +F: hw/s390x/storage-attributes.h +F: hw/s390/s390-stattrib*.c +L: qemu-s3...@nongnu.org + +S390 SCLP-backed devices +M: Halil Pasic +M: Christian Borntraeger +S: Supported +F: include/hw/s390x/event-facility.h +F: include/hw/s390x/sclp.h +F: hw/char/sclp*.[hc] +F: hw/s390x/event-facility.c +F: hw/s390x/sclp*.c +L: qemu-s3...@nongnu.org + +S390 floating interrupt controller +M: Halil Pasic +M: Christian Borntraeger +M: David Hildenbrand +S: Supported +F: hw/intc/s390_flic.c +F: hw/intc/s390_flic_kvm.c The above two lines could be shortened to: F: hw/intc/s390_flic*.c +F: include/hw/s390x/s390_flic.h +L: qemu-s3...@nongnu.org + X86 Machines PC @@ -1957,6 +2020,7 @@
Re: [PATCH v2 6/8] migration: Dump sub-cmd name in loadvm_process_command tp
On Monday, 2021-12-20 at 16:53:53 +08, Peter Xu wrote: > It'll be easier to read the name rather than index of sub-cmd when debugging. > > Signed-off-by: Peter Xu > --- > migration/savevm.c | 3 ++- > migration/trace-events | 2 +- > 2 files changed, 3 insertions(+), 2 deletions(-) > > diff --git a/migration/savevm.c b/migration/savevm.c > index 0bef031acb..7f7af6f750 100644 > --- a/migration/savevm.c > +++ b/migration/savevm.c > @@ -2272,12 +2272,13 @@ static int loadvm_process_command(QEMUFile *f) > return qemu_file_get_error(f); > } > > -trace_loadvm_process_command(cmd, len); > if (cmd >= MIG_CMD_MAX || cmd == MIG_CMD_INVALID) { > error_report("MIG_CMD 0x%x unknown (len 0x%x)", cmd, len); > return -EINVAL; > } > > +trace_loadvm_process_command(mig_cmd_args[cmd].name, len); > + > if (mig_cmd_args[cmd].len != -1 && mig_cmd_args[cmd].len != len) { > error_report("%s received with bad length - expecting %zu, got %d", > mig_cmd_args[cmd].name, > diff --git a/migration/trace-events b/migration/trace-events > index b48d873b8a..d63a5915f5 100644 > --- a/migration/trace-events > +++ b/migration/trace-events > @@ -22,7 +22,7 @@ loadvm_postcopy_handle_resume(void) "" > loadvm_postcopy_ram_handle_discard(void) "" > loadvm_postcopy_ram_handle_discard_end(void) "" > loadvm_postcopy_ram_handle_discard_header(const char *ramid, uint16_t len) > "%s: %ud" > -loadvm_process_command(uint16_t com, uint16_t len) "com=0x%x len=%d" > +loadvm_process_command(const char *s, uint16_t len) "com=%s len=%d" "cmd" rather than "com", to match the code. > loadvm_process_command_ping(uint32_t val) "0x%x" > postcopy_ram_listen_thread_exit(void) "" > postcopy_ram_listen_thread_start(void) "" dme. -- I went starin' out of my window, been caught doin' it once or twice.
Re: [PATCH v2 7/8] migration: Finer grained tracepoints for POSTCOPY_LISTEN
On Monday, 2021-12-20 at 16:53:54 +08, Peter Xu wrote: > The enablement of postcopy listening has a few steps, add a few tracepoints to > be there ready for some basic measurements for them. > > Signed-off-by: Peter Xu > --- > migration/savevm.c | 9 - > migration/trace-events | 2 +- > 2 files changed, 9 insertions(+), 2 deletions(-) > > diff --git a/migration/savevm.c b/migration/savevm.c > index 7f7af6f750..25face6de0 100644 > --- a/migration/savevm.c > +++ b/migration/savevm.c > @@ -1947,9 +1947,10 @@ static void *postcopy_ram_listen_thread(void *opaque) > static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis) > { > PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_LISTENING); > -trace_loadvm_postcopy_handle_listen(); > Error *local_err = NULL; > > +trace_loadvm_postcopy_handle_listen("enter"); > + > if (ps != POSTCOPY_INCOMING_ADVISE && ps != POSTCOPY_INCOMING_DISCARD) { > error_report("CMD_POSTCOPY_LISTEN in wrong postcopy state (%d)", ps); > return -1; > @@ -1964,6 +1965,8 @@ static int > loadvm_postcopy_handle_listen(MigrationIncomingState *mis) > } > } > > +trace_loadvm_postcopy_handle_listen("after disgard"); s/disgard/discard/ > + > /* > * Sensitise RAM - can now generate requests for blocks that don't exist > * However, at this point the CPU shouldn't be running, and the IO > @@ -1976,6 +1979,8 @@ static int > loadvm_postcopy_handle_listen(MigrationIncomingState *mis) > } > } > > +trace_loadvm_postcopy_handle_listen("after uffd"); > + > if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_LISTEN, &local_err)) { > error_report_err(local_err); > return -1; > @@ -1990,6 +1995,8 @@ static int > loadvm_postcopy_handle_listen(MigrationIncomingState *mis) > qemu_sem_wait(&mis->listen_thread_sem); > qemu_sem_destroy(&mis->listen_thread_sem); > > +trace_loadvm_postcopy_handle_listen("exit"); > + "return" rather than "exit"? > return 0; > } > > diff --git a/migration/trace-events b/migration/trace-events > index d63a5915f5..77d1237d89 100644 > --- a/migration/trace-events > +++ b/migration/trace-events > @@ -14,7 +14,7 @@ loadvm_handle_cmd_packaged_main(int ret) "%d" > loadvm_handle_cmd_packaged_received(int ret) "%d" > loadvm_handle_recv_bitmap(char *s) "%s" > loadvm_postcopy_handle_advise(void) "" > -loadvm_postcopy_handle_listen(void) "" > +loadvm_postcopy_handle_listen(const char *str) "%s" > loadvm_postcopy_handle_run(void) "" > loadvm_postcopy_handle_run_cpu_sync(void) "" > loadvm_postcopy_handle_run_vmstart(void) "" dme. -- When you were the brightest star, who were the shadows?
Re: [PATCH] tests/qtest/virtio-net-failover: Use g_random_int() instead of g_test_rand_int()
On 20/12/2021 21.02, Philippe Mathieu-Daudé wrote: On 12/20/21 20:26, Richard Henderson wrote: On 12/20/21 2:27 AM, Thomas Huth wrote: const gchar *tmpdir = g_get_tmp_dir(); gchar *tmpfile = g_strdup_printf("%s/failover_test_migrate-%u-%u", - tmpdir, getpid(), g_test_rand_int()); + tmpdir, getpid(), g_random_int()); Random numbers plus pid are irrelevant, because you still don't have guaranteed uniqueness -- think stale files in /tmp. Use g_file_open_tmp. Another use in test_socket_unix_abstract(), tests/unit/test-util-sockets.c. Using g_file_open_tmp is certainly better ... but the tests are currently written in a way where they require the file name of the temporary file - so switching to g_file_open_tmp() (which only provides a file handle) certainly would need some rewrite here... Thus I'd suggest to go first with this patch to silence the Assert messages, and then to clean this up properly later. Thomas
[PATCH 1/8] configure: simplify creation of plugin symbol list
--dynamic-list is present on all supported ELF (not Windows or Darwin) platforms, since it dates back to 2006; -exported_symbols_list is likewise present on all supported versions of macOS. Do not bother doing a functional test in configure. Remove the file creation from configure as well: for Darwin, move the the creation of the Darwin-formatted symbols to meson; for ELF, use the file in the source path directly and switch from -Wl, to -Xlinker to not break weird paths that include a comma. Signed-off-by: Paolo Bonzini --- configure | 80 - plugins/meson.build | 11 +-- 2 files changed, 8 insertions(+), 83 deletions(-) diff --git a/configure b/configure index 8ccfe51673..1bce9635d9 100755 --- a/configure +++ b/configure @@ -78,7 +78,6 @@ TMPC="${TMPDIR1}/${TMPB}.c" TMPO="${TMPDIR1}/${TMPB}.o" TMPCXX="${TMPDIR1}/${TMPB}.cxx" TMPE="${TMPDIR1}/${TMPB}.exe" -TMPTXT="${TMPDIR1}/${TMPB}.txt" rm -f config.log @@ -2343,69 +2342,6 @@ EOF fi fi -## -# plugin linker support probe - -if test "$plugins" != "no"; then - -# -# See if --dynamic-list is supported by the linker - -ld_dynamic_list="no" -cat > $TMPTXT < $TMPC < -void foo(void); - -void foo(void) -{ - printf("foo\n"); -} - -int main(void) -{ - foo(); - return 0; -} -EOF - -if compile_prog "" "-Wl,--dynamic-list=$TMPTXT" ; then -ld_dynamic_list="yes" -fi - -# -# See if -exported_symbols_list is supported by the linker - -ld_exported_symbols_list="no" -cat > $TMPTXT <> $config_host_mak -# Copy the export object list to the build dir -if test "$ld_dynamic_list" = "yes" ; then - echo "CONFIG_HAS_LD_DYNAMIC_LIST=yes" >> $config_host_mak - ld_symbols=qemu-plugins-ld.symbols - cp "$source_path/plugins/qemu-plugins.symbols" $ld_symbols -elif test "$ld_exported_symbols_list" = "yes" ; then - echo "CONFIG_HAS_LD_EXPORTED_SYMBOLS_LIST=yes" >> $config_host_mak - ld64_symbols=qemu-plugins-ld64.symbols - echo "# Automatically generated by configure - do not modify" > $ld64_symbols - grep 'qemu_' "$source_path/plugins/qemu-plugins.symbols" | sed 's/;//g' | \ - sed -E 's/^[[:space:]]*(.*)/_\1/' >> $ld64_symbols -else - error_exit \ - "If \$plugins=yes, either \$ld_dynamic_list or " \ - "\$ld_exported_symbols_list should have been set to 'yes'." -fi fi if test -n "$gdb_bin"; then diff --git a/plugins/meson.build b/plugins/meson.build index b3de57853b..d0a2ee94cf 100644 --- a/plugins/meson.build +++ b/plugins/meson.build @@ -1,10 +1,15 @@ plugin_ldflags = [] # Modules need more symbols than just those in plugins/qemu-plugins.symbols if not enable_modules - if 'CONFIG_HAS_LD_DYNAMIC_LIST' in config_host -plugin_ldflags = ['-Wl,--dynamic-list=qemu-plugins-ld.symbols'] - elif 'CONFIG_HAS_LD_EXPORTED_SYMBOLS_LIST' in config_host + if targetos == 'darwin' +qemu_plugins_symbols_list = configure_file( + input: files('qemu-plugins.symbols'), + output: 'qemu-plugins-ld64.symbols', + capture: true, + command: ['sed', '-ne', 's/^[[:space:]]*\\(qemu_.*\\);/_\\1/p', '@INPUT@']) plugin_ldflags = ['-Wl,-exported_symbols_list,qemu-plugins-ld64.symbols'] + else +plugin_ldflags = ['-Xlinker', '--dynamic-list=' + (meson.project_source_root() / 'plugins/qemu-plugins.symbols')] endif endif -- 2.33.1
[PATCH 2/8] configure: do not set bsd_user/linux_user early
Similar to other optional features, leave the variables empty and compute the actual value later. Use the existence of include or source directories to detect whether an OS or CPU supports respectively bsd-user and linux-user. For now, BSD user-mode emulation is buildable even on TCI-only architectures. This probably will change once safe signals are brought over from linux-user. Signed-off-by: Paolo Bonzini --- configure | 28 +--- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/configure b/configure index 1bce9635d9..6dafbcd362 100755 --- a/configure +++ b/configure @@ -321,8 +321,8 @@ linux="no" solaris="no" profiler="no" softmmu="yes" -linux_user="no" -bsd_user="no" +linux_user="" +bsd_user="" pkgversion="" pie="" qom_cast_debug="yes" @@ -539,7 +539,6 @@ gnu/kfreebsd) ;; freebsd) bsd="yes" - bsd_user="yes" make="${MAKE-gmake}" # needed for kinfo_getvmmap(3) in libutil.h ;; @@ -584,7 +583,6 @@ haiku) ;; linux) linux="yes" - linux_user="yes" vhost_user=${default_feature:-yes} ;; esac @@ -1262,18 +1260,26 @@ if eval test -z "\${cross_cc_$cpu}"; then cross_cc_vars="$cross_cc_vars cross_cc_${cpu}" fi -# For user-mode emulation the host arch has to be one we explicitly -# support, even if we're using TCI. -if [ "$ARCH" = "unknown" ]; then - bsd_user="no" - linux_user="no" -fi - default_target_list="" deprecated_targets_list=ppc64abi32-linux-user deprecated_features="" mak_wilds="" +if [ "$linux_user" != no ]; then +if [ "$targetos" = linux ] && [ -d $source_path/linux-user/host/$cpu ]; then +linux_user=yes +elif [ "$linux_user" = yes ]; then +error_exit "linux-user not supported on this architecture" +fi +fi +if [ "$bsd_user" != no ]; then +if [ "$bsd_user" = "" ]; then +test $targetos = freebsd && bsd_user=yes +fi +if [ "$bsd_user" = yes ] && ! [ -d $source_path/bsd-user/$targetos ]; then +error_exit "bsd-user not supported on this host OS" +fi +fi if [ "$softmmu" = "yes" ]; then mak_wilds="${mak_wilds} $source_path/configs/targets/*-softmmu.mak" fi -- 2.33.1
[PATCH 0/8] Next round of configure/meson cleanups
Includes v2 of patches from the previous round, and new patches 3-8. Paolo Paolo Bonzini (8): configure: simplify creation of plugin symbol list configure: do not set bsd_user/linux_user early configure, makefile: remove traces of really old files configure: parse --enable/--disable-strip automatically, flip default configure: move non-command-line variables away from command-line parsing section meson: build contrib/ executables after generated headers configure, meson: move config-poison.h to meson meson: add comments in the target-specific flags section Makefile | 11 +-- configure | 151 + contrib/elf2dmp/meson.build| 2 +- contrib/ivshmem-client/meson.build | 2 +- contrib/ivshmem-server/meson.build | 2 +- contrib/rdmacm-mux/meson.build | 2 +- meson.build| 17 pc-bios/s390-ccw/Makefile | 2 - plugins/meson.build| 11 ++- scripts/make-config-poison.sh | 16 +++ scripts/meson-buildoptions.py | 21 ++-- scripts/meson-buildoptions.sh | 3 + 12 files changed, 90 insertions(+), 150 deletions(-) create mode 100755 scripts/make-config-poison.sh -- 2.33.1
[PATCH 4/8] configure: parse --enable/--disable-strip automatically, flip default
Always include the STRIP variable in config-host.mak (it's only used by the s390-ccw firmware build, and it adds a default if configure omitted it), and use meson-buildoptions.sh to turn --enable/--disable-strip into -Dstrip. The default is now not to strip the binaries like for almost every other package that has a configure script. Signed-off-by: Paolo Bonzini --- configure | 10 +- pc-bios/s390-ccw/Makefile | 2 -- scripts/meson-buildoptions.py | 21 ++--- scripts/meson-buildoptions.sh | 3 +++ 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/configure b/configure index e09e5bb58f..40dd6e8d1b 100755 --- a/configure +++ b/configure @@ -308,7 +308,6 @@ debug="no" sanitizers="no" tsan="no" fortify_source="$default_feature" -strip_opt="yes" mingw32="no" gcov="no" EXESUF="" @@ -891,7 +890,6 @@ for opt do debug_tcg="yes" debug_mutex="yes" debug="yes" - strip_opt="no" fortify_source="no" ;; --enable-sanitizers) sanitizers="yes" @@ -902,8 +900,6 @@ for opt do ;; --disable-tsan) tsan="no" ;; - --disable-strip) strip_opt="no" - ;; --disable-slirp) slirp="disabled" ;; --enable-slirp) slirp="enabled" @@ -1370,7 +1366,6 @@ Advanced options (experts only): --enable-debug enable common debug build options --enable-sanitizers enable default sanitizers --enable-tsanenable thread sanitizer - --disable-strip disable stripping binaries --disable-werror disable compilation abort on warning --disable-stack-protector disable compiler-provided stack protection --audio-drv-list=LISTset audio drivers to try if -audiodev is not used @@ -3340,9 +3335,6 @@ echo "GIT_SUBMODULES_ACTION=$git_submodules_action" >> $config_host_mak if test "$debug_tcg" = "yes" ; then echo "CONFIG_DEBUG_TCG=y" >> $config_host_mak fi -if test "$strip_opt" = "yes" ; then - echo "STRIP=${strip}" >> $config_host_mak -fi if test "$mingw32" = "yes" ; then echo "CONFIG_WIN32=y" >> $config_host_mak if test "$guest_agent_with_vss" = "yes" ; then @@ -3622,6 +3614,7 @@ echo "GLIB_CFLAGS=$glib_cflags" >> $config_host_mak echo "GLIB_LIBS=$glib_libs" >> $config_host_mak echo "QEMU_LDFLAGS=$QEMU_LDFLAGS" >> $config_host_mak echo "LD_I386_EMULATION=$ld_i386_emulation" >> $config_host_mak +echo "STRIP=$strip" >> $config_host_mak echo "EXESUF=$EXESUF" >> $config_host_mak echo "LIBS_QGA=$libs_qga" >> $config_host_mak @@ -3836,7 +3829,6 @@ if test "$skip_meson" = no; then -Doptimization=$(if test "$debug" = yes; then echo 0; else echo 2; fi) \ -Ddebug=$(if test "$debug_info" = yes; then echo true; else echo false; fi) \ -Dwerror=$(if test "$werror" = yes; then echo true; else echo false; fi) \ --Dstrip=$(if test "$strip_opt" = yes; then echo true; else echo false; fi) \ -Db_pie=$(if test "$pie" = yes; then echo true; else echo false; fi) \ -Db_coverage=$(if test "$gcov" = yes; then echo true; else echo false; fi) \ -Db_lto=$lto -Dcfi=$cfi -Dtcg=$tcg -Dxen=$xen \ diff --git a/pc-bios/s390-ccw/Makefile b/pc-bios/s390-ccw/Makefile index cee9d2c63b..0eb68efc7b 100644 --- a/pc-bios/s390-ccw/Makefile +++ b/pc-bios/s390-ccw/Makefile @@ -44,8 +44,6 @@ build-all: s390-ccw.img s390-netboot.img s390-ccw.elf: $(OBJECTS) $(call quiet-command,$(CC) $(LDFLAGS) -o $@ $(OBJECTS),"BUILD","$(TARGET_DIR)$@") -STRIP ?= strip - s390-ccw.img: s390-ccw.elf $(call quiet-command,$(STRIP) --strip-unneeded $< -o $@,"STRIP","$(TARGET_DIR)$@") diff --git a/scripts/meson-buildoptions.py b/scripts/meson-buildoptions.py index 96969d89ee..98ae944148 100755 --- a/scripts/meson-buildoptions.py +++ b/scripts/meson-buildoptions.py @@ -36,6 +36,10 @@ "trace_file", } +BUILTIN_OPTIONS = { +"strip", +} + LINE_WIDTH = 76 @@ -90,14 +94,17 @@ def allow_arg(opt): return not (set(opt["choices"]) <= {"auto", "disabled", "enabled"}) +def filter_options(json): +if ":" in json["name"]: +return False +if json["section"] == "user": +return json["name"] not in SKIP_OPTIONS +else: +return json["name"] in BUILTIN_OPTIONS + + def load_options(json): -json = [ -x -for x in json -if x["section"] == "user" -and ":" not in x["name"] -and x["name"] not in SKIP_OPTIONS -] +json = [x for x in json if filter_options(x)] return sorted(json, key=lambda x: x["name"]) diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh index ae8f18edc2..46360e541d 100644 --- a/scripts/meson-buildoptions.sh +++ b/scripts/meson-buildoptions.sh @@ -13,6 +13,7 @@ meson_options_help() { printf "%s\n" ' jemalloc/system/tcmalloc)' printf "%s\n" ' --enable-slirp[=CHOICE] Whether and how to find the slirp library' printf "%s\n" ' (choices: auto/disabl
[PATCH 6/8] meson: build contrib/ executables after generated headers
This will be needed as soon as config-poison.h moves from configure to a meson custom_target (which is built at "ninja" time). Signed-off-by: Paolo Bonzini --- contrib/elf2dmp/meson.build| 2 +- contrib/ivshmem-client/meson.build | 2 +- contrib/ivshmem-server/meson.build | 2 +- contrib/rdmacm-mux/meson.build | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/contrib/elf2dmp/meson.build b/contrib/elf2dmp/meson.build index 4d86cb390a..6707d43c4f 100644 --- a/contrib/elf2dmp/meson.build +++ b/contrib/elf2dmp/meson.build @@ -1,5 +1,5 @@ if curl.found() - executable('elf2dmp', files('main.c', 'addrspace.c', 'download.c', 'pdb.c', 'qemu_elf.c'), + executable('elf2dmp', files('main.c', 'addrspace.c', 'download.c', 'pdb.c', 'qemu_elf.c'), genh, dependencies: [glib, curl], install: true) endif diff --git a/contrib/ivshmem-client/meson.build b/contrib/ivshmem-client/meson.build index 1b171efb4f..ce8dcca84d 100644 --- a/contrib/ivshmem-client/meson.build +++ b/contrib/ivshmem-client/meson.build @@ -1,4 +1,4 @@ -executable('ivshmem-client', files('ivshmem-client.c', 'main.c'), +executable('ivshmem-client', files('ivshmem-client.c', 'main.c'), genh, dependencies: glib, build_by_default: targetos == 'linux', install: false) diff --git a/contrib/ivshmem-server/meson.build b/contrib/ivshmem-server/meson.build index 3a53942201..c6c3c82e89 100644 --- a/contrib/ivshmem-server/meson.build +++ b/contrib/ivshmem-server/meson.build @@ -1,4 +1,4 @@ -executable('ivshmem-server', files('ivshmem-server.c', 'main.c'), +executable('ivshmem-server', files('ivshmem-server.c', 'main.c'), genh, dependencies: [qemuutil, rt], build_by_default: targetos == 'linux', install: false) diff --git a/contrib/rdmacm-mux/meson.build b/contrib/rdmacm-mux/meson.build index 6cc5016747..7674f54cc5 100644 --- a/contrib/rdmacm-mux/meson.build +++ b/contrib/rdmacm-mux/meson.build @@ -2,7 +2,7 @@ if 'CONFIG_PVRDMA' in config_host # if not found, CONFIG_PVRDMA should not be set # FIXME: broken on big endian architectures libumad = cc.find_library('ibumad', required: true) - executable('rdmacm-mux', files('main.c'), + executable('rdmacm-mux', files('main.c'), genh, dependencies: [glib, libumad], build_by_default: false, install: false) -- 2.33.1
[PATCH 7/8] configure, meson: move config-poison.h to meson
This ensures that the file is regenerated properly whenever config-target.h or config-devices.h files change. Signed-off-by: Paolo Bonzini --- Makefile | 2 +- configure | 11 --- meson.build | 12 scripts/make-config-poison.sh | 16 4 files changed, 29 insertions(+), 12 deletions(-) create mode 100755 scripts/make-config-poison.sh diff --git a/Makefile b/Makefile index 06ad8a61e1..2f80f56a4a 100644 --- a/Makefile +++ b/Makefile @@ -220,7 +220,7 @@ qemu-%.tar.bz2: distclean: clean -$(quiet-@)test -f build.ninja && $(NINJA) $(NINJAFLAGS) -t clean -g || : - rm -f config-host.mak config-poison.h + rm -f config-host.mak rm -f tests/tcg/config-*.mak rm -f config.status rm -f roms/seabios/config.mak diff --git a/configure b/configure index 810bc36490..aff371ca81 100755 --- a/configure +++ b/configure @@ -3858,17 +3858,6 @@ if test -n "${deprecated_features}"; then echo " features: ${deprecated_features}" fi -# Create list of config switches that should be poisoned in common code... -# but filter out CONFIG_TCG and CONFIG_USER_ONLY which are special. -target_configs_h=$(ls *-config-devices.h *-config-target.h 2>/dev/null) -if test -n "$target_configs_h" ; then -sed -n -e '/CONFIG_TCG/d' -e '/CONFIG_USER_ONLY/d' \ --e '/^#define / { s///; s/ .*//; s/^/#pragma GCC poison /p; }' \ -$target_configs_h | sort -u > config-poison.h -else -:> config-poison.h -fi - # Save the configure command line for later reuse. cat0 ? ['@INPUT@'] : [])) + ## # Submodules # ## diff --git a/scripts/make-config-poison.sh b/scripts/make-config-poison.sh new file mode 100755 index 00..d222a04304 --- /dev/null +++ b/scripts/make-config-poison.sh @@ -0,0 +1,16 @@ +#! /bin/sh + +if test $# = 0; then + exit 0 +fi + +# Create list of config switches that should be poisoned in common code... +# but filter out CONFIG_TCG and CONFIG_USER_ONLY which are special. +exec sed -n \ + -e' /CONFIG_TCG/d' \ + -e '/CONFIG_USER_ONLY/d' \ + -e '/^#define / {' \ + -e's///' \ + -e's/ .*//' \ + -e's/^/#pragma GCC poison /p' \ + -e '}' "$@" -- 2.33.1
[PATCH 3/8] configure, makefile: remove traces of really old files
These files have been removed for more than year in the best case, or for more than ten years for some really old TCG files. Remove any traces of it. Signed-off-by: Paolo Bonzini --- Makefile | 11 --- configure | 9 - 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/Makefile b/Makefile index 74c5b46d38..06ad8a61e1 100644 --- a/Makefile +++ b/Makefile @@ -205,14 +205,11 @@ recurse-clean: $(addsuffix /clean, $(ROM_DIRS)) clean: recurse-clean -$(quiet-@)test -f build.ninja && $(NINJA) $(NINJAFLAGS) -t clean || : -$(quiet-@)test -f build.ninja && $(NINJA) $(NINJAFLAGS) clean-ctlist || : -# avoid old build problems by removing potentially incorrect old files - rm -f config.mak op-i386.h opc-i386.h gen-op-i386.h op-arm.h opc-arm.h gen-op-arm.h find . \( -name '*.so' -o -name '*.dll' -o -name '*.[oda]' \) -type f \ ! -path ./roms/edk2/ArmPkg/Library/GccLto/liblto-aarch64.a \ ! -path ./roms/edk2/ArmPkg/Library/GccLto/liblto-arm.a \ -exec rm {} + - rm -f TAGS cscope.* *.pod *~ */*~ - rm -f fsdev/*.pod scsi/*.pod + rm -f TAGS cscope.* *~ */*~ VERSION = $(shell cat $(SRC_PATH)/VERSION) @@ -223,10 +220,10 @@ qemu-%.tar.bz2: distclean: clean -$(quiet-@)test -f build.ninja && $(NINJA) $(NINJAFLAGS) -t clean -g || : - rm -f config-host.mak config-host.h* config-poison.h + rm -f config-host.mak config-poison.h rm -f tests/tcg/config-*.mak - rm -f config-all-disas.mak config.status - rm -f roms/seabios/config.mak roms/vgabios/config.mak + rm -f config.status + rm -f roms/seabios/config.mak rm -f qemu-plugins-ld.symbols qemu-plugins-ld64.symbols rm -f *-config-target.h *-config-devices.mak *-config-devices.h rm -rf meson-private meson-logs meson-info compile_commands.json diff --git a/configure b/configure index 6dafbcd362..e09e5bb58f 100755 --- a/configure +++ b/configure @@ -3696,9 +3696,6 @@ fi # so the build tree will be missing the link back to the new file, and # tests might fail. Prefer to keep the relevant files in their own # directory and symlink the directory instead. -# UNLINK is used to remove symlinks from older development versions -# that might get into the way when doing "git update" without doing -# a "make distclean" in between. LINKS="Makefile" LINKS="$LINKS tests/tcg/Makefile.target" LINKS="$LINKS pc-bios/optionrom/Makefile" @@ -3710,7 +3707,6 @@ LINKS="$LINKS tests/avocado tests/data" LINKS="$LINKS tests/qemu-iotests/check" LINKS="$LINKS python" LINKS="$LINKS contrib/plugins/Makefile " -UNLINK="pc-bios/keymaps" for bios_file in \ $source_path/pc-bios/*.bin \ $source_path/pc-bios/*.elf \ @@ -3732,11 +3728,6 @@ for f in $LINKS ; do symlink "$source_path/$f" "$f" fi done -for f in $UNLINK ; do -if [ -L "$f" ]; then -rm -f "$f" -fi -done (for i in $cross_cc_vars; do export $i -- 2.33.1
[PATCH 5/8] configure: move non-command-line variables away from command-line parsing section
This makes it easier to identify candidates for moving to Meson. Signed-off-by: Paolo Bonzini --- configure | 13 ++--- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/configure b/configure index 40dd6e8d1b..810bc36490 100755 --- a/configure +++ b/configure @@ -308,16 +308,12 @@ debug="no" sanitizers="no" tsan="no" fortify_source="$default_feature" -mingw32="no" gcov="no" EXESUF="" modules="no" module_upgrades="no" prefix="/usr/local" qemu_suffix="qemu" -bsd="no" -linux="no" -solaris="no" profiler="no" softmmu="yes" linux_user="" @@ -331,8 +327,6 @@ opengl="$default_feature" cpuid_h="no" avx2_opt="$default_feature" guest_agent="$default_feature" -guest_agent_with_vss="no" -guest_agent_ntddscsi="no" vss_win32_sdk="$default_feature" win_sdk="no" want_tools="$default_feature" @@ -527,6 +521,10 @@ fi # OS specific +mingw32="no" +bsd="no" +linux="no" +solaris="no" case $targetos in windows) mingw32="yes" @@ -2574,6 +2572,7 @@ fi ## # check if we have VSS SDK headers for win +guest_agent_with_vss="no" if test "$mingw32" = "yes" && test "$guest_agent" != "no" && \ test "$vss_win32_sdk" != "no" ; then case "$vss_win32_sdk" in @@ -2604,7 +2603,6 @@ EOF echo "ERROR: The headers are extracted in the directory \`inc'." feature_not_found "VSS support" fi -guest_agent_with_vss="no" fi fi @@ -2631,6 +2629,7 @@ fi ## # check if mingw environment provides a recent ntddscsi.h +guest_agent_ntddscsi="no" if test "$mingw32" = "yes" && test "$guest_agent" != "no"; then cat > $TMPC << EOF #include -- 2.33.1
[PATCH 8/8] meson: add comments in the target-specific flags section
Signed-off-by: Paolo Bonzini --- meson.build | 5 + 1 file changed, 5 insertions(+) diff --git a/meson.build b/meson.build index 09ee427ca4..0a6d57125f 100644 --- a/meson.build +++ b/meson.build @@ -233,6 +233,7 @@ endif # Target-specific checks and dependencies # ### +# Fuzzing if get_option('fuzzing') and get_option('fuzzing_engine') == '' and \ not cc.links(''' #include @@ -244,6 +245,7 @@ if get_option('fuzzing') and get_option('fuzzing_engine') == '' and \ error('Your compiler does not support -fsanitize=fuzzer') endif +# Tracing backends if 'ftrace' in get_option('trace_backends') and targetos != 'linux' error('ftrace is supported only on Linux') endif @@ -257,6 +259,7 @@ if 'syslog' in get_option('trace_backends') and not cc.compiles(''' error('syslog is not supported on this system') endif +# Miscellaneous Linux-only features if targetos != 'linux' and get_option('mpath').enabled() error('Multipath is supported only on Linux') endif @@ -266,6 +269,7 @@ if targetos != 'linux' and get_option('multiprocess').enabled() endif multiprocess_allowed = targetos == 'linux' and not get_option('multiprocess').disabled() +# Target-specific libraries and flags libm = cc.find_library('m', required: false) threads = dependency('threads') util = cc.find_library('util', required: false) @@ -306,6 +310,7 @@ elif targetos == 'openbsd' endif endif +# Target-specific configuration of accelerators accelerators = [] if not get_option('kvm').disabled() and targetos == 'linux' accelerators += 'CONFIG_KVM' -- 2.33.1
Re: Virtio-GPU Xres and Yres seettings
On Mon, Dec 20, 2021 at 10:44:06PM +0530, Pratik Parvati wrote: > > EDID is optional, so you can try disable the EDID feature bit and see > > what happens. > > Thanks Gerd, after disabling the EDID, I was able to get the required > resolution (basically width and height) from the driver. > > Another strange observation - When the device receives the > command VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING with the number of > entries having a pixel data in scatter gather format, the device is trying > to store these bytes in contiguous memory. When I read those sg memory, the > device receives all zeros from the driver (for a 1024x768 display, > the device receives 3MB of data from the driver). Is this an expected > behaviour? - If not, what is the driver trying to display on the screen? How about reading the virtio spec? display updates are handled with transfer and flush commands. take care, Gerd
[PATCH] docker: include bison in debian-tricore-cross
Binutils sometimes fail to build if bison is not installed: /bin/sh ./ylwrap `test -f arparse.y || echo ./`arparse.y y.tab.c arparse.c y.tab.h arparse.h y.output arparse.output -- -d ./ylwrap: 109: ./ylwrap: -d: not found (the correct invocation of ylwrap would have "bison -d" after the double dash). Work around by installing it in the container. Cc: Alex Bennée Signed-off-by: Paolo Bonzini --- tests/docker/dockerfiles/debian-tricore-cross.docker | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/docker/dockerfiles/debian-tricore-cross.docker b/tests/docker/dockerfiles/debian-tricore-cross.docker index d8df2c6117..3f6b55562c 100644 --- a/tests/docker/dockerfiles/debian-tricore-cross.docker +++ b/tests/docker/dockerfiles/debian-tricore-cross.docker @@ -16,6 +16,7 @@ MAINTAINER Philippe Mathieu-Daudé RUN apt update && \ DEBIAN_FRONTEND=noninteractive apt install -yy eatmydata && \ DEBIAN_FRONTEND=noninteractive eatmydata apt install -yy \ + bison \ bzip2 \ ca-certificates \ ccache \ -- 2.33.1
Re: [PATCH] scsi-generic: replace logical block count of response of READ CAPACITY
Hi Claudio, On 12/21/21 10:48, Claudio Fontana wrote: > Hi Paolo, Hannes, > > any thoughts on the following issue? > > Introduction: > > When using SAN storage for providing block devices to guests, configured as > SCSI-passthrough devices, increasing the space available in the VM is a use > case. > > To do it, it is currently necessary to: > > 1) expand storage on the actual SAN, > 2) run a "virsh blockresize" or equivalent command to make QEMU aware of the > new size, and finally > 3) do a "rescan-scsi-bus.sh" or equivalent operation in the guest to make the > running guest aware of the increased disk size. > > The problem: > > As of now, the administrator needs to make sure that step 3 won't be done > before step 2 has been executed, or the resulting state will be inconsistent. > In practice this creates organizational issues to try to sync up host/storage > admins and guest OS admin, and is therefore error prone (due to these human > factors). > > The proposal: > > The patch I replied to here from Ma Lin tries to avoid the inconsistent > state, by having "rescan-scsi-bus.sh" still report the old size in the guest > until QEMU itself is aware of the new disk size. > > The patch: > > Before the patch, the SCSI READ_CAPACITY command in the guest os directly > receives the unmodified response from the storage backend. > > After the patch, QEMU intercepts the READ_CAPACITY response and replaces the > maximum LBA with the information which is saved in QEMU. > > This means: after resizing the storage on the SAN backend, the host > administrator must explicitly notify about CAPACITY HAS CHANGED by issuing a > block resize command through QMP or libvirt, > even for SCSI passthrough disks. > > Any ideas on this patch or on possible alternatives? I am not an SCSI expert, but Lin's description and yours sound coherent to me. One minor nitpick below. > On 11/20/21 11:15 AM, Lin Ma wrote: >> While using SCSI passthrough, Following scenario makes qemu doesn't >> realized the capacity change of remote scsi target: >> 1. online resize the scsi target. >> 2. issue 'rescan-scsi-bus.sh -s ...' in host. >> 3. issue 'rescan-scsi-bus.sh -s ...' in vm. >> >> In above scenario I used to experienced errors while accessing the >> additional disk space in vm. I think the reasonable operations should >> be: >> 1. online resize the scsi target. >> 2. issue 'rescan-scsi-bus.sh -s ...' in host. >> 3. issue 'block_resize' via qmp to notify qemu. >> 4. issue 'rescan-scsi-bus.sh -s ...' in vm. >> >> The errors disappear once I notify qemu by block_resize via qmp. >> >> So this patch replaces the number of logical blocks of READ CAPACITY >> response from scsi target by qemu's bs->total_sectors. If the user in >> vm wants to access the additional disk space, The administrator of >> host must notify qemu once resizeing the scsi target. >> >> Bonus is that domblkinfo of libvirt can reflect the consistent capacity >> information between host and vm in case of missing block_resize in qemu. >> E.g: >> ... >> >> >> >> >> >> >> >> >> ... >> >> Before: >> 1. online resize the scsi target. >> 2. host:~ # rescan-scsi-bus.sh -s /dev/sdc >> 3. guest:~ # rescan-scsi-bus.sh -s /dev/sda >> 4 host:~ # virsh domblkinfo --domain $DOMAIN --human --device sda >> Capacity: 4.000 GiB >> Allocation: 0.000 B >> Physical: 8.000 GiB >> >> 5. guest:~ # lsblk /dev/sda >> NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT >> sda 8:00 8G 0 disk >> └─sda1 8:10 2G 0 part >> >> After: >> 1. online resize the scsi target. >> 2. host:~ # rescan-scsi-bus.sh -s /dev/sdc >> 3. guest:~ # rescan-scsi-bus.sh -s /dev/sda >> 4 host:~ # virsh domblkinfo --domain $DOMAIN --human --device sda >> Capacity: 4.000 GiB >> Allocation: 0.000 B >> Physical: 8.000 GiB >> >> 5. guest:~ # lsblk /dev/sda >> NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT >> sda 8:00 4G 0 disk >> └─sda1 8:10 2G 0 part >> >> Signed-off-by: Lin Ma >> --- >> hw/scsi/scsi-generic.c | 10 -- >> 1 file changed, 8 insertions(+), 2 deletions(-) >> >> diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c >> index 0306ccc7b1..343b51c2c0 100644 >> --- a/hw/scsi/scsi-generic.c >> +++ b/hw/scsi/scsi-generic.c >> @@ -315,11 +315,17 @@ static void scsi_read_complete(void * opaque, int ret) >> if (r->req.cmd.buf[0] == READ_CAPACITY_10 && >> (ldl_be_p(&r->buf[0]) != 0xU || s->max_lba == 0)) { >> s->blocksize = ldl_be_p(&r->buf[4]); >> -s->max_lba = ldl_be_p(&r->buf[0]) & 0xULL; >> +BlockBackend *blk = s->conf.blk; >> +BlockDriverState *bs = blk_bs(blk); >> +s->max_lba = bs->total_sectors - 1; I'd add a refresh_max_lba() helper: static void refresh_max_lba(SCSIDevice *s) { BlockBackend *blk = s->conf.blk; BlockDriverState *bs = blk_bs(blk); uint64_t max_lba = bs->total_sectors -
Re: [PATCH] vl: Add -set options to device opts dict when using JSON syntax for -device
MkfsSion writes: > When using JSON syntax for -device, -set option can not find device > specified in JSON by id field. The following commandline is an example: > > $ qemu-system-x86_64 -device '{"id":"foo"}' -set device.foo.bar=1 > qemu-system-x86_64: -set device.foo.bar=1: there is no device "foo" defined Is this a regression? I suspect commit 5dacda5167 "vl: Enable JSON syntax for -device" (v6.2.0). I believe any conversion away from QemuOpts loses -set. > The patch adds -set options to JSON device opts dict for adding > options to device by latter object_set_properties_from_keyval call. > > Signed-off-by: YuanYang Meng > --- > include/qemu/option.h | 4 > softmmu/vl.c | 28 > util/qemu-option.c| 2 +- > 3 files changed, 33 insertions(+), 1 deletion(-) > > diff --git a/include/qemu/option.h b/include/qemu/option.h > index 306bf07575..31fa9fdc25 100644 > --- a/include/qemu/option.h > +++ b/include/qemu/option.h > @@ -45,6 +45,10 @@ const char *get_opt_value(const char *p, char **value); > > bool parse_option_size(const char *name, const char *value, > uint64_t *ret, Error **errp); > + > +bool parse_option_number(const char *name, const char *value, > + uint64_t *ret, Error **errp); > + > bool has_help_option(const char *param); > > enum QemuOptType { > diff --git a/softmmu/vl.c b/softmmu/vl.c > index 620a1f1367..feb3c49a65 100644 > --- a/softmmu/vl.c > +++ b/softmmu/vl.c > @@ -30,7 +30,9 @@ > #include "hw/qdev-properties.h" > #include "qapi/compat-policy.h" > #include "qapi/error.h" > +#include "qapi/qmp/qbool.h" > #include "qapi/qmp/qdict.h" > +#include "qapi/qmp/qnum.h" > #include "qapi/qmp/qstring.h" > #include "qapi/qmp/qjson.h" > #include "qemu-version.h" > @@ -2279,6 +2281,7 @@ static void qemu_set_option(const char *str, Error > **errp) > char group[64], id[64], arg[64]; > QemuOptsList *list; > QemuOpts *opts; > +DeviceOption *opt; > int rc, offset; > > rc = sscanf(str, "%63[^.].%63[^.].%63[^=]%n", group, id, arg, &offset); > @@ -2294,6 +2297,31 @@ static void qemu_set_option(const char *str, Error > **errp) > if (list) { > opts = qemu_opts_find(list, id); > if (!opts) { > +QTAILQ_FOREACH(opt, &device_opts, next) { > +const char *device_id = qdict_get_try_str(opt->opts, > "id"); > +if (device_id && (strcmp(device_id, id) == 0)) { > +if (qdict_get(opt->opts, arg)) { > +qdict_del(opt->opts, arg); > +} > +const char *value = str + offset + 1; > +QObject *obj = NULL; > +bool boolean; > +uint64_t num; > +if (qapi_bool_parse(arg, value, &boolean, NULL)) { > +obj = QOBJECT(qbool_from_bool(boolean)); > +} else if (parse_option_number(arg, value, &num, > NULL)) { > +obj = QOBJECT(qnum_from_uint(num)); > +} else if (parse_option_size(arg, value, &num, > NULL)) { > +obj = QOBJECT(qnum_from_uint(num)); > +} else { > +obj = QOBJECT(qstring_from_str(value)); > +} > +if (obj) { > +qdict_put_obj(opt->opts, arg, obj); > +return; > +} > +} > +} > error_setg(errp, "there is no %s \"%s\" defined", group, id); > return; > } qemu_opt_set(opts, arg, str + offset + 1, errp); } } } Two issues, and only looks fixable. -device accepts either a QemuOpts or a JSON argument. It parses the former with qemu_opts_parse_noisily() into a QemuOpt stored in @qemu_device_opts. It parses the latter with qobject_from_json() into a QObject stored in @device_opts. Yes, the names are confusing. -set parses its argument into @group, @id, and @arg (the value). Before the patch, it uses @group and @id to look up the QemuOpt in @qemu_device_opts. If found, it updates it with qemu_opt_set(). By design, -set operates on the QemuOpts store. Options stored in @device_opts are not found. Your patch tries to fix that. Before I can explain what's wrong with it, I need more background. QemuOpts arguments are parsed as a set of (key, value) pairs, where the values are all strings (because qemu_device_opts.desc[] is empty). We access them with a qobject_input_visitor_new_keyval() visitor. This parses the strings according to the types being visited. Example: key=42 gets stored as {"key": "42"}. If we visit it with visit_type_str(), we use string value "42". If we visit it with vi
Re: Virtio-GPU Xres and Yres seettings
I apologise for not putting the question properly. I am referring virtio spec to understand the driver and device operation - I had a few questions on pixel data, as it was not working as expected. My display is working fine now; thanks for your help. Regards, Pratik On Tue, 21 Dec 2021 at 16:41, Gerd Hoffmann wrote: > On Mon, Dec 20, 2021 at 10:44:06PM +0530, Pratik Parvati wrote: > > > EDID is optional, so you can try disable the EDID feature bit and see > > > what happens. > > > > Thanks Gerd, after disabling the EDID, I was able to get the required > > resolution (basically width and height) from the driver. > > > > Another strange observation - When the device receives the > > command VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING with the number of > > entries having a pixel data in scatter gather format, the device is > trying > > to store these bytes in contiguous memory. When I read those sg memory, > the > > device receives all zeros from the driver (for a 1024x768 display, > > the device receives 3MB of data from the driver). Is this an expected > > behaviour? - If not, what is the driver trying to display on the screen? > > How about reading the virtio spec? > display updates are handled with transfer and flush commands. > > take care, > Gerd > >
Re: [PATCH 5/8] configure: move non-command-line variables away from command-line parsing section
On 12/21/21 12:05, Paolo Bonzini wrote: > This makes it easier to identify candidates for moving to Meson. > > Signed-off-by: Paolo Bonzini > --- > configure | 13 ++--- > 1 file changed, 6 insertions(+), 7 deletions(-) Reviewed-by: Philippe Mathieu-Daudé
Re: [PATCH 8/8] meson: add comments in the target-specific flags section
On 12/21/21 12:05, Paolo Bonzini wrote: > Signed-off-by: Paolo Bonzini > --- > meson.build | 5 + > 1 file changed, 5 insertions(+) Reviewed-by: Philippe Mathieu-Daudé
Re: [PATCH 6/8] meson: build contrib/ executables after generated headers
On 12/21/21 12:05, Paolo Bonzini wrote: > This will be needed as soon as config-poison.h moves from configure to > a meson custom_target (which is built at "ninja" time). > > Signed-off-by: Paolo Bonzini > --- > contrib/elf2dmp/meson.build| 2 +- > contrib/ivshmem-client/meson.build | 2 +- > contrib/ivshmem-server/meson.build | 2 +- > contrib/rdmacm-mux/meson.build | 2 +- > 4 files changed, 4 insertions(+), 4 deletions(-) Reviewed-by: Philippe Mathieu-Daudé
Re: [PATCH] docker: include bison in debian-tricore-cross
On 12/21/21 12:16, Paolo Bonzini wrote: > Binutils sometimes fail to build if bison is not installed: > > /bin/sh ./ylwrap `test -f arparse.y || echo ./`arparse.y y.tab.c arparse.c > y.tab.h arparse.h y.output arparse.output -- -d > ./ylwrap: 109: ./ylwrap: -d: not found > > (the correct invocation of ylwrap would have "bison -d" after the double > dash). Work around by installing it in the container. > > Cc: Alex Bennée Resolves: https://gitlab.com/qemu-project/qemu/-/issues/596 Reviewed-by: Philippe Mathieu-Daudé > Signed-off-by: Paolo Bonzini > --- > tests/docker/dockerfiles/debian-tricore-cross.docker | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/tests/docker/dockerfiles/debian-tricore-cross.docker > b/tests/docker/dockerfiles/debian-tricore-cross.docker > index d8df2c6117..3f6b55562c 100644 > --- a/tests/docker/dockerfiles/debian-tricore-cross.docker > +++ b/tests/docker/dockerfiles/debian-tricore-cross.docker > @@ -16,6 +16,7 @@ MAINTAINER Philippe Mathieu-Daudé > RUN apt update && \ > DEBIAN_FRONTEND=noninteractive apt install -yy eatmydata && \ > DEBIAN_FRONTEND=noninteractive eatmydata apt install -yy \ > + bison \ > bzip2 \ > ca-certificates \ > ccache \
Re: QOM
Hi Abhijeet, On 12/21/21 12:27, abhijeet inamdar wrote: > Hi, > > 1)What does QOM stand for? QOM: "QEMU Object Model" See https://qemu-project.gitlab.io/qemu/devel/qom.html > 2)Can anyone tell what this error means? > > (qemu) Unexpected error in object_property_find() at > /home/ocp/vcpu-playground/vcpu_on_qemu/qemu-4.2.0/qom/object.c:1177: > qemu-system-arm: Property '.sysbus-irq[0]' not found > Aborted (core dumped). I suppose you are trying to connect a device gpio/irq output line to another device input, likely using sysbus_connect_irq(). The API is "connect the N-th output line from the SysBus device to this qemu_irq handler", where qemu_irq is the input line. Apparently your SysBus device doesn't have any output line registered. These are registered using sysbus_init_irq(). The first call register the first output IRQ, and so on. Some objects have their QOM interface documented, for example to use the ARM GIC see: https://gitlab.com/qemu-project/qemu/-/blob/master/include/hw/intc/arm_gic.h#L22 Hope that helps. Regards, Phil.
Re: [PATCH v11 00/31] LoongArch64 port of QEMU TCG
On 12/21/21 10:36, gaosong wrote: > Hi, > > On 2021/12/21 下午4:44, Philippe Mathieu-Daudé wrote: >> I took few hours to translate and read all Taobao contracts before >> registering, then got blacklisted at my first login... Maybe others >> will get more luck. >> >> Having someone at Loongson helping with hardware is certainly easier >> for the community. > > Loongson company can donate 3a5000 computers or provide an IP access to > 3a5000 hardware environment. Wow, this is awesome! > Which way is better? Access to 3a5000 hardware environment is certainly better for us, as this remove the maintenance burden.
Re: [PATCH v2 0/2] qemu-img convert: Fix sparseness detection
Am 17.12.21 um 17:46 schrieb Vladimir Sementsov-Ogievskiy: > Hi all! > > 01: only update test output rebasing on master > 02: replaced with my proposed solution. > > Kevin Wolf (1): > iotests: Test qemu-img convert of zeroed data cluster > > Vladimir Sementsov-Ogievskiy (1): > qemu-img: make is_allocated_sectors() more efficient > > qemu-img.c | 23 +++ > tests/qemu-iotests/122 | 1 + > tests/qemu-iotests/122.out | 2 ++ > 3 files changed, 22 insertions(+), 4 deletions(-) > Tested-by: Peter Lieven
Re: QOM
Oh, In that case I have to define my irq set for a machine to handle the exception and interrupts. BR. Abhijeet. On Tue, 21 Dec, 2021, 12:59 Philippe Mathieu-Daudé, wrote: > Hi Abhijeet, > > On 12/21/21 12:27, abhijeet inamdar wrote: > > Hi, > > > > 1)What does QOM stand for? > > QOM: "QEMU Object Model" > > See https://qemu-project.gitlab.io/qemu/devel/qom.html > > > 2)Can anyone tell what this error means? > > > > (qemu) Unexpected error in object_property_find() at > > /home/ocp/vcpu-playground/vcpu_on_qemu/qemu-4.2.0/qom/object.c:1177: > > qemu-system-arm: Property '.sysbus-irq[0]' not found > > Aborted (core dumped). > > I suppose you are trying to connect a device gpio/irq output line > to another device input, likely using sysbus_connect_irq(). > > The API is "connect the N-th output line from the SysBus device > to this qemu_irq handler", where qemu_irq is the input line. > > Apparently your SysBus device doesn't have any output line > registered. These are registered using sysbus_init_irq(). > The first call register the first output IRQ, and so on. > > Some objects have their QOM interface documented, for > example to use the ARM GIC see: > > https://gitlab.com/qemu-project/qemu/-/blob/master/include/hw/intc/arm_gic.h#L22 > > Hope that helps. > > Regards, > > Phil. > >
Re: QOM
As we have almost 80 irq lines and 40-45 NVIC_irq's. Where can I define them? BR. Abhijeet. On Tue, 21 Dec, 2021, 13:18 abhijeet inamdar, wrote: > Oh, > > In that case I have to define my irq set for a machine to handle the > exception and interrupts. > > BR. > Abhijeet. > > On Tue, 21 Dec, 2021, 12:59 Philippe Mathieu-Daudé, > wrote: > >> Hi Abhijeet, >> >> On 12/21/21 12:27, abhijeet inamdar wrote: >> > Hi, >> > >> > 1)What does QOM stand for? >> >> QOM: "QEMU Object Model" >> >> See https://qemu-project.gitlab.io/qemu/devel/qom.html >> >> > 2)Can anyone tell what this error means? >> > >> > (qemu) Unexpected error in object_property_find() at >> > /home/ocp/vcpu-playground/vcpu_on_qemu/qemu-4.2.0/qom/object.c:1177: >> > qemu-system-arm: Property '.sysbus-irq[0]' not found >> > Aborted (core dumped). >> >> I suppose you are trying to connect a device gpio/irq output line >> to another device input, likely using sysbus_connect_irq(). >> >> The API is "connect the N-th output line from the SysBus device >> to this qemu_irq handler", where qemu_irq is the input line. >> >> Apparently your SysBus device doesn't have any output line >> registered. These are registered using sysbus_init_irq(). >> The first call register the first output IRQ, and so on. >> >> Some objects have their QOM interface documented, for >> example to use the ARM GIC see: >> >> https://gitlab.com/qemu-project/qemu/-/blob/master/include/hw/intc/arm_gic.h#L22 >> >> Hope that helps. >> >> Regards, >> >> Phil. >> >>
Re: [PATCH] failover: Silence warning messages during qtest
On 20/12/2021 15.53, Laurent Vivier wrote: virtio-net-failover test tries several device combinations that produces some expected warnings. These warning can be confusing, so we disable them during the qtest sequence. Reported-by: Thomas Huth Signed-off-by: Laurent Vivier --- hw/net/virtio-net.c | 3 ++- migration/migration.c | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index f2014d5ea0b3..c64a6b9d1745 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -44,6 +44,7 @@ #include "hw/pci/pci.h" #include "net_rx_pkt.h" #include "hw/virtio/vhost.h" +#include "sysemu/qtest.h" #define VIRTIO_NET_VM_VERSION11 @@ -925,7 +926,7 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) qapi_event_send_failover_negotiated(n->netclient_name); qatomic_set(&n->failover_primary_hidden, false); failover_add_primary(n, &err); -if (err) { +if (err && !qtest_enabled()) { warn_report_err(err); } This trips the sanitizer build now: https://gitlab.com/thuth/qemu/-/jobs/1907374419 I think you have to error_free(err) in case qtest_enabled() ? Thomas
[PATCH v2 0/6] migration: misc cleanups
Hi Changes since v1: - Add reviewed tags for reviewed patches - Change comment about last_stage (philmd) - Change cast to 1ULL for ram_release_page() - remove TARGET_PAGE_MASK mask. It was used only for compression, and offset should be page aligned already Please, review. Thanks, Juan. [v1] This series do several cleanups: - Dave found that I was using "%d" for unsigned, fix all uses. - We pass last_stage left and right, but we only use it in two places Just move it to RAMState. - do_compress_page() used a goto when not needed. - ram_release_pages() was always used with one page - And put it when we detect zero pages, instead of everywhere we have find a zero page. Please, review. Juan Quintela (6): migration: All this fields are unsigned migration: We only need last_stage in two places migration: ram_release_pages() always receive 1 page as argument migration: Remove masking for compression migration: simplify do_compress_ram_page migration: Move ram_release_pages() call to save_zero_page_to_file() migration/multifd-zlib.c | 20 +-- migration/multifd-zstd.c | 24 +++--- migration/multifd.c | 16 - migration/ram.c | 71 +--- migration/trace-events | 26 +++ 5 files changed, 73 insertions(+), 84 deletions(-) -- 2.33.1
[PATCH v2 1/6] migration: All this fields are unsigned
So printing it as %d is wrong. Notice that for the channel id, that is an uint8_t, but I changed it anyways for consistency. Signed-off-by: Juan Quintela Reviewed-by: Philippe Mathieu-Daudé --- migration/multifd-zlib.c | 20 ++-- migration/multifd-zstd.c | 24 migration/multifd.c | 16 migration/trace-events | 26 +- 4 files changed, 43 insertions(+), 43 deletions(-) diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c index da6201704c..9f6ebf1076 100644 --- a/migration/multifd-zlib.c +++ b/migration/multifd-zlib.c @@ -51,7 +51,7 @@ static int zlib_send_setup(MultiFDSendParams *p, Error **errp) zs->opaque = Z_NULL; if (deflateInit(zs, migrate_multifd_zlib_level()) != Z_OK) { g_free(z); -error_setg(errp, "multifd %d: deflate init failed", p->id); +error_setg(errp, "multifd %u: deflate init failed", p->id); return -1; } /* To be safe, we reserve twice the size of the packet */ @@ -60,7 +60,7 @@ static int zlib_send_setup(MultiFDSendParams *p, Error **errp) if (!z->zbuff) { deflateEnd(&z->zs); g_free(z); -error_setg(errp, "multifd %d: out of memory for zbuff", p->id); +error_setg(errp, "multifd %u: out of memory for zbuff", p->id); return -1; } p->data = z; @@ -132,12 +132,12 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) ret = deflate(zs, flush); } while (ret == Z_OK && zs->avail_in && zs->avail_out); if (ret == Z_OK && zs->avail_in) { -error_setg(errp, "multifd %d: deflate failed to compress all input", +error_setg(errp, "multifd %u: deflate failed to compress all input", p->id); return -1; } if (ret != Z_OK) { -error_setg(errp, "multifd %d: deflate returned %d instead of Z_OK", +error_setg(errp, "multifd %u: deflate returned %d instead of Z_OK", p->id, ret); return -1; } @@ -190,7 +190,7 @@ static int zlib_recv_setup(MultiFDRecvParams *p, Error **errp) zs->avail_in = 0; zs->next_in = Z_NULL; if (inflateInit(zs) != Z_OK) { -error_setg(errp, "multifd %d: inflate init failed", p->id); +error_setg(errp, "multifd %u: inflate init failed", p->id); return -1; } /* To be safe, we reserve twice the size of the packet */ @@ -198,7 +198,7 @@ static int zlib_recv_setup(MultiFDRecvParams *p, Error **errp) z->zbuff = g_try_malloc(z->zbuff_len); if (!z->zbuff) { inflateEnd(zs); -error_setg(errp, "multifd %d: out of memory for zbuff", p->id); +error_setg(errp, "multifd %u: out of memory for zbuff", p->id); return -1; } return 0; @@ -247,7 +247,7 @@ static int zlib_recv_pages(MultiFDRecvParams *p, Error **errp) int i; if (flags != MULTIFD_FLAG_ZLIB) { -error_setg(errp, "multifd %d: flags received %x flags expected %x", +error_setg(errp, "multifd %u: flags received %x flags expected %x", p->id, flags, MULTIFD_FLAG_ZLIB); return -1; } @@ -284,19 +284,19 @@ static int zlib_recv_pages(MultiFDRecvParams *p, Error **errp) } while (ret == Z_OK && zs->avail_in && (zs->total_out - start) < page_size); if (ret == Z_OK && (zs->total_out - start) < page_size) { -error_setg(errp, "multifd %d: inflate generated too few output", +error_setg(errp, "multifd %u: inflate generated too few output", p->id); return -1; } if (ret != Z_OK) { -error_setg(errp, "multifd %d: inflate returned %d instead of Z_OK", +error_setg(errp, "multifd %u: inflate returned %d instead of Z_OK", p->id, ret); return -1; } } out_size = zs->total_out - out_size; if (out_size != expected_size) { -error_setg(errp, "multifd %d: packet size received %d size expected %d", +error_setg(errp, "multifd %u: packet size received %u size expected %u", p->id, out_size, expected_size); return -1; } diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c index 2d5b61106c..cc4e991724 100644 --- a/migration/multifd-zstd.c +++ b/migration/multifd-zstd.c @@ -55,7 +55,7 @@ static int zstd_send_setup(MultiFDSendParams *p, Error **errp) z->zcs = ZSTD_createCStream(); if (!z->zcs) { g_free(z); -error_setg(errp, "multifd %d: zstd createCStream failed", p->id); +error_setg(errp, "multifd %u: zstd createCStream failed", p->id); return -1; } @@ -63,7 +63,7 @@ static int zstd_send_setup(MultiFDSendParams *p, Error **errp) if (ZSTD_isError(res)) { ZSTD_freeCStream(z->zcs); g
[PATCH v2 2/6] migration: We only need last_stage in two places
We only need last_stage in two places and we are passing it all around. Just add a field to RAMState that passes it. Signed-off-by: Juan Quintela --- Repeat subject (philmd suggestion) --- migration/ram.c | 41 ++--- 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index 57efa67f20..7223b0d8ca 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -325,7 +325,8 @@ struct RAMState { uint64_t xbzrle_bytes_prev; /* Start using XBZRLE (e.g., after the first round). */ bool xbzrle_enabled; - +/* Are we on the last stage of migration */ +bool last_stage; /* compression statistics since the beginning of the period */ /* amount of count that no free thread to compress data */ uint64_t compress_thread_busy_prev; @@ -683,11 +684,10 @@ static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr) * @current_addr: addr of the page * @block: block that contains the page we want to send * @offset: offset inside the block for the page - * @last_stage: if we are at the completion stage */ static int save_xbzrle_page(RAMState *rs, uint8_t **current_data, ram_addr_t current_addr, RAMBlock *block, -ram_addr_t offset, bool last_stage) +ram_addr_t offset) { int encoded_len = 0, bytes_xbzrle; uint8_t *prev_cached_page; @@ -695,7 +695,7 @@ static int save_xbzrle_page(RAMState *rs, uint8_t **current_data, if (!cache_is_cached(XBZRLE.cache, current_addr, ram_counters.dirty_sync_count)) { xbzrle_counters.cache_miss++; -if (!last_stage) { +if (!rs->last_stage) { if (cache_insert(XBZRLE.cache, current_addr, *current_data, ram_counters.dirty_sync_count) == -1) { return -1; @@ -734,7 +734,7 @@ static int save_xbzrle_page(RAMState *rs, uint8_t **current_data, * Update the cache contents, so that it corresponds to the data * sent, in all cases except where we skip the page. */ -if (!last_stage && encoded_len != 0) { +if (!rs->last_stage && encoded_len != 0) { memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE); /* * In the case where we couldn't compress, ensure that the caller @@ -1290,9 +1290,8 @@ static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, * @rs: current RAM state * @block: block that contains the page we want to send * @offset: offset inside the block for the page - * @last_stage: if we are at the completion stage */ -static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage) +static int ram_save_page(RAMState *rs, PageSearchStatus *pss) { int pages = -1; uint8_t *p; @@ -1307,8 +1306,8 @@ static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage) XBZRLE_cache_lock(); if (rs->xbzrle_enabled && !migration_in_postcopy()) { pages = save_xbzrle_page(rs, &p, current_addr, block, - offset, last_stage); -if (!last_stage) { + offset); +if (!rs->last_stage) { /* Can't send this cached data async, since the cache page * might get updated before it gets to the wire */ @@ -2129,10 +2128,8 @@ static bool save_compress_page(RAMState *rs, RAMBlock *block, ram_addr_t offset) * * @rs: current RAM state * @pss: data about the page we want to send - * @last_stage: if we are at the completion stage */ -static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss, -bool last_stage) +static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss) { RAMBlock *block = pss->block; ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS; @@ -2171,7 +2168,7 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss, return ram_save_multifd_page(rs, block, offset); } -return ram_save_page(rs, pss, last_stage); +return ram_save_page(rs, pss); } /** @@ -2190,10 +2187,8 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss, * @rs: current RAM state * @ms: current migration state * @pss: data about the page we want to send - * @last_stage: if we are at the completion stage */ -static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, - bool last_stage) +static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss) { int tmppages, pages = 0; size_t pagesize_bits = @@ -2211,7 +2206,7 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, do { /* Check the pages is dirty and if it is send it */ if (migration_bitmap_clear_dirty(rs, pss->block, pss->page)) { -tmppages = ram_save_t
[PATCH v2 4/6] migration: Remove masking for compression
Remove the mask in the call to ram_release_pages(). Nothing else does it, and if the offset has that bits set, we have a lot of trouble. Signed-off-by: Juan Quintela --- migration/ram.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/migration/ram.c b/migration/ram.c index 0b98862b9e..4ee0369d6f 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -1365,7 +1365,7 @@ static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block, } exit: -ram_release_page(block->idstr, offset & TARGET_PAGE_MASK); +ram_release_page(block->idstr, offset); return zero_page; } -- 2.33.1
[PATCH v2 5/6] migration: simplify do_compress_ram_page
The goto is not needed at all. Signed-off-by: Juan Quintela --- migration/ram.c | 11 +++ 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index 4ee0369d6f..eddc85ffb0 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -1341,12 +1341,11 @@ static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block, { RAMState *rs = ram_state; uint8_t *p = block->host + (offset & TARGET_PAGE_MASK); -bool zero_page = false; int ret; if (save_zero_page_to_file(rs, f, block, offset)) { -zero_page = true; -goto exit; +ram_release_page(block->idstr, offset & TARGET_PAGE_MASK); +return true; } save_page_header(rs, f, block, offset | RAM_SAVE_FLAG_COMPRESS_PAGE); @@ -1361,12 +1360,8 @@ static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block, if (ret < 0) { qemu_file_set_error(migrate_get_current()->to_dst_file, ret); error_report("compressed data failed!"); -return false; } - -exit: -ram_release_page(block->idstr, offset); -return zero_page; +return false; } static void -- 2.33.1
[PATCH v2 3/6] migration: ram_release_pages() always receive 1 page as argument
Remove the pages argument. And s/pages/page/ Signed-off-by: Juan Quintela Reviewed-by: Philippe Mathieu-Daudé -- Use 1LL instead of casts Signed-off-by: Juan Quintela --- migration/ram.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index 7223b0d8ca..0b98862b9e 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -1204,13 +1204,13 @@ static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset) return -1; } -static void ram_release_pages(const char *rbname, uint64_t offset, int pages) +static void ram_release_page(const char *rbname, uint64_t offset) { if (!migrate_release_ram() || !migration_in_postcopy()) { return; } -ram_discard_range(rbname, offset, ((ram_addr_t)pages) << TARGET_PAGE_BITS); +ram_discard_range(rbname, offset, 1ULL << TARGET_PAGE_BITS); } /* @@ -1365,7 +1365,7 @@ static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block, } exit: -ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1); +ram_release_page(block->idstr, offset & TARGET_PAGE_MASK); return zero_page; } @@ -2153,7 +2153,7 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss) xbzrle_cache_zero_page(rs, block->offset + offset); XBZRLE_cache_unlock(); } -ram_release_pages(block->idstr, offset, res); +ram_release_page(block->idstr, offset); return res; } -- 2.33.1
[PATCH v2 6/6] migration: Move ram_release_pages() call to save_zero_page_to_file()
We always need to call it when we find a zero page, so put it in a single place. Signed-off-by: Juan Quintela --- migration/ram.c | 21 ++--- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index eddc85ffb0..3cd98e19d5 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -1158,6 +1158,15 @@ static void migration_bitmap_sync_precopy(RAMState *rs) } } +static void ram_release_page(const char *rbname, uint64_t offset) +{ +if (!migrate_release_ram() || !migration_in_postcopy()) { +return; +} + +ram_discard_range(rbname, offset, 1ULL << TARGET_PAGE_BITS); +} + /** * save_zero_page_to_file: send the zero page to the file * @@ -1179,6 +1188,7 @@ static int save_zero_page_to_file(RAMState *rs, QEMUFile *file, len += save_page_header(rs, file, block, offset | RAM_SAVE_FLAG_ZERO); qemu_put_byte(file, 0); len += 1; +ram_release_page(block->idstr, offset); } return len; } @@ -1204,15 +1214,6 @@ static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset) return -1; } -static void ram_release_page(const char *rbname, uint64_t offset) -{ -if (!migrate_release_ram() || !migration_in_postcopy()) { -return; -} - -ram_discard_range(rbname, offset, 1ULL << TARGET_PAGE_BITS); -} - /* * @pages: the number of pages written by the control path, *< 0 - error @@ -1344,7 +1345,6 @@ static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block, int ret; if (save_zero_page_to_file(rs, f, block, offset)) { -ram_release_page(block->idstr, offset & TARGET_PAGE_MASK); return true; } @@ -2148,7 +2148,6 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss) xbzrle_cache_zero_page(rs, block->offset + offset); XBZRLE_cache_unlock(); } -ram_release_page(block->idstr, offset); return res; } -- 2.33.1
Re: [PATCH] vl: Add -set options to device opts dict when using JSON syntax for -device
Markus Armbruster writes: > MkfsSion writes: > >> When using JSON syntax for -device, -set option can not find device >> specified in JSON by id field. The following commandline is an example: >> >> $ qemu-system-x86_64 -device '{"id":"foo"}' -set device.foo.bar=1 >> qemu-system-x86_64: -set device.foo.bar=1: there is no device "foo" defined > > Is this a regression? I suspect commit 5dacda5167 "vl: Enable JSON > syntax for -device" (v6.2.0). Obviously not a regression: everything that used to work still works. > I believe any conversion away from QemuOpts loses -set. [...]
Re: [PATCH v2 6/8] migration: Dump sub-cmd name in loadvm_process_command tp
On Tue, Dec 21, 2021 at 10:08:24AM +, David Edmondson wrote: > > @@ -22,7 +22,7 @@ loadvm_postcopy_handle_resume(void) "" > > loadvm_postcopy_ram_handle_discard(void) "" > > loadvm_postcopy_ram_handle_discard_end(void) "" > > loadvm_postcopy_ram_handle_discard_header(const char *ramid, uint16_t len) > > "%s: %ud" > > -loadvm_process_command(uint16_t com, uint16_t len) "com=0x%x len=%d" > > +loadvm_process_command(const char *s, uint16_t len) "com=%s len=%d" > > "cmd" rather than "com", to match the code. "com" is what it was used before (perhaps "com"mand?). Thanks, -- Peter Xu
Re: [PATCH v2 7/8] migration: Finer grained tracepoints for POSTCOPY_LISTEN
On Tue, Dec 21, 2021 at 10:12:34AM +, David Edmondson wrote: > On Monday, 2021-12-20 at 16:53:54 +08, Peter Xu wrote: > > > The enablement of postcopy listening has a few steps, add a few tracepoints > > to > > be there ready for some basic measurements for them. > > > > Signed-off-by: Peter Xu > > --- > > migration/savevm.c | 9 - > > migration/trace-events | 2 +- > > 2 files changed, 9 insertions(+), 2 deletions(-) > > > > diff --git a/migration/savevm.c b/migration/savevm.c > > index 7f7af6f750..25face6de0 100644 > > --- a/migration/savevm.c > > +++ b/migration/savevm.c > > @@ -1947,9 +1947,10 @@ static void *postcopy_ram_listen_thread(void *opaque) > > static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis) > > { > > PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_LISTENING); > > -trace_loadvm_postcopy_handle_listen(); > > Error *local_err = NULL; > > > > +trace_loadvm_postcopy_handle_listen("enter"); > > + > > if (ps != POSTCOPY_INCOMING_ADVISE && ps != POSTCOPY_INCOMING_DISCARD) > > { > > error_report("CMD_POSTCOPY_LISTEN in wrong postcopy state (%d)", > > ps); > > return -1; > > @@ -1964,6 +1965,8 @@ static int > > loadvm_postcopy_handle_listen(MigrationIncomingState *mis) > > } > > } > > > > +trace_loadvm_postcopy_handle_listen("after disgard"); > > s/disgard/discard/ Will fix. > > > + > > /* > > * Sensitise RAM - can now generate requests for blocks that don't > > exist > > * However, at this point the CPU shouldn't be running, and the IO > > @@ -1976,6 +1979,8 @@ static int > > loadvm_postcopy_handle_listen(MigrationIncomingState *mis) > > } > > } > > > > +trace_loadvm_postcopy_handle_listen("after uffd"); > > + > > if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_LISTEN, &local_err)) { > > error_report_err(local_err); > > return -1; > > @@ -1990,6 +1995,8 @@ static int > > loadvm_postcopy_handle_listen(MigrationIncomingState *mis) > > qemu_sem_wait(&mis->listen_thread_sem); > > qemu_sem_destroy(&mis->listen_thread_sem); > > > > +trace_loadvm_postcopy_handle_listen("exit"); > > + > > "return" rather than "exit"? I don't think it matters a lot, but sure. Thanks, -- Peter Xu
Re: [PATCH] failover: Silence warning messages during qtest
On 12/21/21 13:30, Thomas Huth wrote: > On 20/12/2021 15.53, Laurent Vivier wrote: >> virtio-net-failover test tries several device combinations that produces >> some expected warnings. >> These warning can be confusing, so we disable them during the qtest >> sequence. >> >> Reported-by: Thomas Huth >> Signed-off-by: Laurent Vivier >> --- >> hw/net/virtio-net.c | 3 ++- >> migration/migration.c | 4 +++- >> 2 files changed, 5 insertions(+), 2 deletions(-) >> >> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c >> index f2014d5ea0b3..c64a6b9d1745 100644 >> --- a/hw/net/virtio-net.c >> +++ b/hw/net/virtio-net.c >> @@ -44,6 +44,7 @@ >> #include "hw/pci/pci.h" >> #include "net_rx_pkt.h" >> #include "hw/virtio/vhost.h" >> +#include "sysemu/qtest.h" >> #define VIRTIO_NET_VM_VERSION 11 >> @@ -925,7 +926,7 @@ static void virtio_net_set_features(VirtIODevice >> *vdev, uint64_t features) >> qapi_event_send_failover_negotiated(n->netclient_name); >> qatomic_set(&n->failover_primary_hidden, false); >> failover_add_primary(n, &err); >> - if (err) { >> + if (err && !qtest_enabled()) { >> warn_report_err(err); >> } > > This trips the sanitizer build now: > > https://gitlab.com/thuth/qemu/-/jobs/1907374419 > > I think you have to error_free(err) in case qtest_enabled() ? Indeed. In that case it might be better to add a warn_report_err_except_qtest() helper...
Re: [PATCH v2 5/6] migration: simplify do_compress_ram_page
On 12/21/21 13:52, Juan Quintela wrote: > The goto is not needed at all. > > Signed-off-by: Juan Quintela > --- > migration/ram.c | 11 +++ > 1 file changed, 3 insertions(+), 8 deletions(-) > > diff --git a/migration/ram.c b/migration/ram.c > index 4ee0369d6f..eddc85ffb0 100644 > --- a/migration/ram.c > +++ b/migration/ram.c > @@ -1341,12 +1341,11 @@ static bool do_compress_ram_page(QEMUFile *f, > z_stream *stream, RAMBlock *block, > { > RAMState *rs = ram_state; > uint8_t *p = block->host + (offset & TARGET_PAGE_MASK); > -bool zero_page = false; > int ret; > > if (save_zero_page_to_file(rs, f, block, offset)) { > -zero_page = true; > -goto exit; > +ram_release_page(block->idstr, offset & TARGET_PAGE_MASK); We don't want TARGET_PAGE_MASK anymore here, right? > +return true; > } > > save_page_header(rs, f, block, offset | RAM_SAVE_FLAG_COMPRESS_PAGE); > @@ -1361,12 +1360,8 @@ static bool do_compress_ram_page(QEMUFile *f, z_stream > *stream, RAMBlock *block, > if (ret < 0) { > qemu_file_set_error(migrate_get_current()->to_dst_file, ret); > error_report("compressed data failed!"); > -return false; > } > - > -exit: > -ram_release_page(block->idstr, offset); > -return zero_page; > +return false; > } > > static void
Re: [PATCH 2/2] qapi/ui: introduce change-vnc-listen
21.12.2021 11:13, Marc-André Lureau wrote: Hi On Mon, Dec 20, 2021 at 10:24 PM Vladimir Sementsov-Ogievskiy mailto:vsement...@virtuozzo.com>> wrote: Add command that can change addresses where VNC server listens for new connections. Prior to 6.0 this functionality was available through 'change' qmp command which was deleted. Signed-off-by: Vladimir Sementsov-Ogievskiy mailto:vsement...@virtuozzo.com>> Looks good to me, Reviewed-by: Marc-André Lureau mailto:marcandre.lur...@redhat.com>> Could you write an avocado test for it? (tests/avocado/vnc.py) Thanks a lot for reviewing! I will. -- Best regards, Vladimir
[PATCH 3/2] avocado/vnc: add test_change_listen
Add simple test-case for new change-vnc-listen qmp command. Signed-off-by: Vladimir Sementsov-Ogievskiy --- tests/avocado/vnc.py | 10 ++ 1 file changed, 10 insertions(+) diff --git a/tests/avocado/vnc.py b/tests/avocado/vnc.py index 096432988f..f05ee1e00a 100644 --- a/tests/avocado/vnc.py +++ b/tests/avocado/vnc.py @@ -51,3 +51,13 @@ def test_change_password(self): set_password_response = self.vm.qmp('change-vnc-password', password='new_password') self.assertEqual(set_password_response['return'], {}) + +def test_change_listen(self): +self.vm.add_args('-nodefaults', '-S', '-vnc', ':0') +self.vm.launch() +self.assertEqual(self.vm.qmp('query-vnc')['return']['service'], '5900') +res = self.vm.qmp('change-vnc-listen', id='default', + addresses=[{'type': 'inet', 'host': '0.0.0.0', + 'port': '5901'}]) +self.assertEqual(res['return'], {}) +self.assertEqual(self.vm.qmp('query-vnc')['return']['service'], '5901') -- 2.31.1
Re: [PATCH 3/2] avocado/vnc: add test_change_listen
On Tue, Dec 21, 2021 at 5:38 PM Vladimir Sementsov-Ogievskiy wrote: > > Add simple test-case for new change-vnc-listen qmp command. > > Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Marc-André Lureau thanks > --- > tests/avocado/vnc.py | 10 ++ > 1 file changed, 10 insertions(+) > > diff --git a/tests/avocado/vnc.py b/tests/avocado/vnc.py > index 096432988f..f05ee1e00a 100644 > --- a/tests/avocado/vnc.py > +++ b/tests/avocado/vnc.py > @@ -51,3 +51,13 @@ def test_change_password(self): > set_password_response = self.vm.qmp('change-vnc-password', > password='new_password') > self.assertEqual(set_password_response['return'], {}) > + > +def test_change_listen(self): > +self.vm.add_args('-nodefaults', '-S', '-vnc', ':0') > +self.vm.launch() > +self.assertEqual(self.vm.qmp('query-vnc')['return']['service'], > '5900') > +res = self.vm.qmp('change-vnc-listen', id='default', > + addresses=[{'type': 'inet', 'host': '0.0.0.0', > + 'port': '5901'}]) > +self.assertEqual(res['return'], {}) > +self.assertEqual(self.vm.qmp('query-vnc')['return']['service'], > '5901') > -- > 2.31.1 >
Re: [PATCH] vl: Add opts to device opts list when using JSON syntax for -device
Sorry, the patch is not well tested and it causes duplicate devices creation. I have send another patch to the mailing list to fix the issue. On 2021/12/21 16:25, Philippe Mathieu-Daudé wrote: > Cc'ing Markus. > > On 12/20/21 09:45, MkfsSion wrote: >> When using JSON syntax for -device, -set option can not find device >> specified in JSON by id field. The following commandline is an example: >> >> $ qemu-system-x86_64 -device '{"id":"foo"}' -set device.foo.bar=1 >> qemu-system-x86_64: -set device.foo.bar=1: there is no device "foo" defined >> >> The patch adds device opts to device opts list when a device opts get >> parsed. >> >> Signed-off-by: MkfsSion > > BTW per: > https://www.qemu.org/docs/master/devel/submitting-a-patch.html#patch-emails-must-include-a-signed-off-by-line > "Please use your real name to sign a patch (not an alias or acronym)." The issue has been fixed in new patch. > >> --- >> softmmu/vl.c | 2 ++ >> 1 file changed, 2 insertions(+) >> >> diff --git a/softmmu/vl.c b/softmmu/vl.c >> index 620a1f1367..0dd5acbc1a 100644 >> --- a/softmmu/vl.c >> +++ b/softmmu/vl.c >> @@ -3400,6 +3400,8 @@ void qemu_init(int argc, char **argv, char **envp) >> loc_save(&opt->loc); >> assert(opt->opts != NULL); >> QTAILQ_INSERT_TAIL(&device_opts, opt, next); >> +qemu_opts_from_qdict(qemu_find_opts_err("device", >> &error_fatal), >> + opt->opts, &error_fatal); >> } else { >> if (!qemu_opts_parse_noisily(qemu_find_opts("device"), >> optarg, true)) { >
[PATCH] vl: Add -set options to device opts dict when using JSON syntax for -device
When using JSON syntax for -device, -set option can not find device specified in JSON by id field. The following commandline is an example: $ qemu-system-x86_64 -device '{"id":"foo"}' -set device.foo.bar=1 qemu-system-x86_64: -set device.foo.bar=1: there is no device "foo" defined The patch adds -set options to JSON device opts dict for adding options to device by latter object_set_properties_from_keyval call. Signed-off-by: YuanYang Meng --- include/qemu/option.h | 4 softmmu/vl.c | 28 util/qemu-option.c| 2 +- 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/include/qemu/option.h b/include/qemu/option.h index 306bf07575..31fa9fdc25 100644 --- a/include/qemu/option.h +++ b/include/qemu/option.h @@ -45,6 +45,10 @@ const char *get_opt_value(const char *p, char **value); bool parse_option_size(const char *name, const char *value, uint64_t *ret, Error **errp); + +bool parse_option_number(const char *name, const char *value, + uint64_t *ret, Error **errp); + bool has_help_option(const char *param); enum QemuOptType { diff --git a/softmmu/vl.c b/softmmu/vl.c index 620a1f1367..feb3c49a65 100644 --- a/softmmu/vl.c +++ b/softmmu/vl.c @@ -30,7 +30,9 @@ #include "hw/qdev-properties.h" #include "qapi/compat-policy.h" #include "qapi/error.h" +#include "qapi/qmp/qbool.h" #include "qapi/qmp/qdict.h" +#include "qapi/qmp/qnum.h" #include "qapi/qmp/qstring.h" #include "qapi/qmp/qjson.h" #include "qemu-version.h" @@ -2279,6 +2281,7 @@ static void qemu_set_option(const char *str, Error **errp) char group[64], id[64], arg[64]; QemuOptsList *list; QemuOpts *opts; +DeviceOption *opt; int rc, offset; rc = sscanf(str, "%63[^.].%63[^.].%63[^=]%n", group, id, arg, &offset); @@ -2294,6 +2297,31 @@ static void qemu_set_option(const char *str, Error **errp) if (list) { opts = qemu_opts_find(list, id); if (!opts) { +QTAILQ_FOREACH(opt, &device_opts, next) { +const char *device_id = qdict_get_try_str(opt->opts, "id"); +if (device_id && (strcmp(device_id, id) == 0)) { +if (qdict_get(opt->opts, arg)) { +qdict_del(opt->opts, arg); +} +const char *value = str + offset + 1; +QObject *obj = NULL; +bool boolean; +uint64_t num; +if (qapi_bool_parse(arg, value, &boolean, NULL)) { +obj = QOBJECT(qbool_from_bool(boolean)); +} else if (parse_option_number(arg, value, &num, NULL)) { +obj = QOBJECT(qnum_from_uint(num)); +} else if (parse_option_size(arg, value, &num, NULL)) { +obj = QOBJECT(qnum_from_uint(num)); +} else { +obj = QOBJECT(qstring_from_str(value)); +} +if (obj) { +qdict_put_obj(opt->opts, arg, obj); +return; +} +} +} error_setg(errp, "there is no %s \"%s\" defined", group, id); return; } diff --git a/util/qemu-option.c b/util/qemu-option.c index eedd08929b..b2427cba9f 100644 --- a/util/qemu-option.c +++ b/util/qemu-option.c @@ -88,7 +88,7 @@ const char *get_opt_value(const char *p, char **value) return offset; } -static bool parse_option_number(const char *name, const char *value, +bool parse_option_number(const char *name, const char *value, uint64_t *ret, Error **errp) { uint64_t number; -- 2.34.1
Re: [PATCH RFC] MAINTAINERS: split out s390x sections
On Mon, 20 Dec 2021 12:54:19 +0100 Cornelia Huck wrote: > Split out some more specialized devices etc., so that we can build > smarter lists of people to be put on cc: in the future. > > Signed-off-by: Cornelia Huck LGTM Acked-by: Halil Pasic
Re: [PATCH 2/2] qapi/ui: introduce change-vnc-listen
Vladimir Sementsov-Ogievskiy writes: > Add command that can change addresses where VNC server listens for new > connections. Prior to 6.0 this functionality was available through > 'change' qmp command which was deleted. > > Signed-off-by: Vladimir Sementsov-Ogievskiy > --- > docs/about/removed-features.rst | 3 ++- > qapi/ui.json| 12 > ui/vnc.c| 26 ++ > 3 files changed, 40 insertions(+), 1 deletion(-) > > diff --git a/docs/about/removed-features.rst b/docs/about/removed-features.rst > index d42c3341de..20e6901a82 100644 > --- a/docs/about/removed-features.rst > +++ b/docs/about/removed-features.rst > @@ -348,7 +348,8 @@ documentation of ``query-hotpluggable-cpus`` for > additional details. > ``change`` (removed in 6.0) > ''' > > -Use ``blockdev-change-medium`` or ``change-vnc-password`` instead. > +Use ``blockdev-change-medium`` or ``change-vnc-password`` or > +``change-vnc-listen`` instead. > > ``query-events`` (removed in 6.0) > ' > diff --git a/qapi/ui.json b/qapi/ui.json > index d7567ac866..14e6fe0b4c 100644 > --- a/qapi/ui.json > +++ b/qapi/ui.json > @@ -1304,3 +1304,15 @@ > { 'command': 'display-reload', >'data': 'DisplayReloadOptions', >'boxed' : true } > + > +## > +# @change-vnc-listen: > +# > +# Change set of addresses to listen for connections. Please document the arguments: # @id: lorem ipsum # # @address: dolor sit amet # # @websockets: consectetur adipisici elit > +# > +# Since: 7.0 > +# > +## > +{ 'command': 'change-vnc-listen', > + 'data': { 'id': 'str', 'addresses': ['SocketAddress'], > +'*websockets': ['SocketAddress'] } } Lacks 'if': 'CONFIG_VNC'. We already have change-vnc-password. You add change-vnc-listen. Is there anything else we might want to change? Aside: what's the difference between change-vnc-password and set_password? [...]
Re: [PATCH v4 0/7] nbd reconnect on open
13.12.2021 18:32, Vladimir Sementsov-Ogievskiy wrote: Hi all! The functionality is reviewed, python testing part is not. I've dropped the patch "qapi: make blockdev-add a coroutine command": it's optional, I don't want to slow down the whole series because of it. v4: 01-03: wording, add Eric's r-b others: small changes, never had an r-b Vladimir Sementsov-Ogievskiy (7): nbd: allow reconnect on open, with corresponding new options nbd/client-connection: nbd_co_establish_connection(): return real error nbd/client-connection: improve error message of cancelled attempt iotests.py: add qemu_tool_popen() For qemu_io* functions support --image-opts argument, which conflicts with -f argument from qemu_io_args. Add qemu-io Popen constructor wrapper. To be used in the following new test commit. iotests: add nbd-reconnect-on-open test qapi/block-core.json | 9 ++- block/nbd.c | 45 +++- nbd/client-connection.c | 59 ++- tests/qemu-iotests/iotests.py | 36 ++ .../qemu-iotests/tests/nbd-reconnect-on-open | 71 +++ .../tests/nbd-reconnect-on-open.out | 11 +++ 6 files changed, 199 insertions(+), 32 deletions(-) create mode 100755 tests/qemu-iotests/tests/nbd-reconnect-on-open create mode 100644 tests/qemu-iotests/tests/nbd-reconnect-on-open.out Thanks for reviewing! I do s/6.2/7.0/ fix to patch 1, restore subjects of patches 5,6 (which were somehow lost in transition v3->v4) and apply the series to my nbd branch. -- Best regards, Vladimir
Re: [RFC PATCH v2 02/14] job.h: categorize fields in struct Job
On 16/12/2021 17:21, Stefan Hajnoczi wrote: On Thu, Nov 04, 2021 at 10:53:22AM -0400, Emanuele Giuseppe Esposito wrote: Categorize the fields in struct Job to understand which ones need to be protected by the job mutex and which don't. Signed-off-by: Emanuele Giuseppe Esposito --- include/qemu/job.h | 57 +++--- 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/include/qemu/job.h b/include/qemu/job.h index ccf7826426..f7036ac6b3 100644 --- a/include/qemu/job.h +++ b/include/qemu/job.h @@ -40,27 +40,52 @@ typedef struct JobTxn JobTxn; * Long-running operation. */ typedef struct Job { + +/* Fields set at initialization (job_create), and never modified */ + /** The ID of the job. May be NULL for internal jobs. */ char *id; -/** The type of this job. */ +/** + * The type of this job. + * All callbacks are called with job_mutex *not* held. + */ const JobDriver *driver; -/** Reference count of the block job */ -int refcnt; - -/** Current state; See @JobStatus for details. */ -JobStatus status; - /** AioContext to run the job coroutine in */ AioContext *aio_context; "Fields set at initialization (job_create), and never modified" does not apply here. blockjob.c:child_job_set_aio_ctx() changes it at runtime. Right. aio_context can theoretically avoid also the job_mutex, if we make sure that all klass->set_aio_ctx() are under BQL (they are) and under drains (work in progress). For now I will protect it with job_lock(). Thank you, Emanuele
[PATCH v3 02/15] pcie: Add some SR/IOV API documentation in docs/pcie_sriov.txt
From: Knut Omang Add a small intro + minimal documentation for how to implement SR/IOV support for an emulated device. Signed-off-by: Knut Omang --- docs/pcie_sriov.txt | 115 1 file changed, 115 insertions(+) create mode 100644 docs/pcie_sriov.txt diff --git a/docs/pcie_sriov.txt b/docs/pcie_sriov.txt new file mode 100644 index 00..f5e891e1d4 --- /dev/null +++ b/docs/pcie_sriov.txt @@ -0,0 +1,115 @@ +PCI SR/IOV EMULATION SUPPORT + + +Description +=== +SR/IOV (Single Root I/O Virtualization) is an optional extended capability +of a PCI Express device. It allows a single physical function (PF) to appear as multiple +virtual functions (VFs) for the main purpose of eliminating software +overhead in I/O from virtual machines. + +Qemu now implements the basic common functionality to enable an emulated device +to support SR/IOV. Yet no fully implemented devices exists in Qemu, but a +proof-of-concept hack of the Intel igb can be found here: + +git://github.com/knuto/qemu.git sriov_patches_v5 + +Implementation +== +Implementing emulation of an SR/IOV capable device typically consists of +implementing support for two types of device classes; the "normal" physical device +(PF) and the virtual device (VF). From Qemu's perspective, the VFs are just +like other devices, except that some of their properties are derived from +the PF. + +A virtual function is different from a physical function in that the BAR +space for all VFs are defined by the BAR registers in the PFs SR/IOV +capability. All VFs have the same BARs and BAR sizes. + +Accesses to these virtual BARs then is computed as + ++ * + + +From our emulation perspective this means that there is a separate call for +setting up a BAR for a VF. + +1) To enable SR/IOV support in the PF, it must be a PCI Express device so + you would need to add a PCI Express capability in the normal PCI + capability list. You might also want to add an ARI (Alternative + Routing-ID Interpretation) capability to indicate that your device + supports functions beyond it's "own" function space (0-7), + which is necessary to support more than 7 functions, or + if functions extends beyond offset 7 because they are placed at an + offset > 1 or have stride > 1. + + ... + #include "hw/pci/pcie.h" + #include "hw/pci/pcie_sriov.h" + + pci_your_pf_dev_realize( ... ) + { + ... + int ret = pcie_endpoint_cap_init(d, 0x70); + ... + pcie_ari_init(d, 0x100, 1); + ... + + /* Add and initialize the SR/IOV capability */ + pcie_sriov_pf_init(d, 0x200, "your_virtual_dev", + vf_devid, initial_vfs, total_vfs, + fun_offset, stride); + + /* Set up individual VF BARs (parameters as for normal BARs) */ + pcie_sriov_pf_init_vf_bar( ... ) + ... + } + + For cleanup, you simply call: + + pcie_sriov_pf_exit(device); + + which will delete all the virtual functions and associated resources. + +2) Similarly in the implementation of the virtual function, you need to + make it a PCI Express device and add a similar set of capabilities + except for the SR/IOV capability. Then you need to set up the VF BARs as + subregions of the PFs SR/IOV VF BARs by calling + pcie_sriov_vf_register_bar() instead of the normal pci_register_bar() call: + + pci_your_vf_dev_realize( ... ) + { + ... + int ret = pcie_endpoint_cap_init(d, 0x60); + ... + pcie_ari_init(d, 0x100, 1); + ... + memory_region_init(mr, ... ) + pcie_sriov_vf_register_bar(d, bar_nr, mr); + ... + } + +Testing on Linux guest +== +The easiest is if your device driver supports sysfs based SR/IOV +enabling. Support for this was added in kernel v.3.8, so not all drivers +support it yet. + +To enable 4 VFs for a device at 01:00.0: + + modprobe yourdriver + echo 4 > /sys/bus/pci/devices/:01:00.0/sriov_numvfs + +You should now see 4 VFs with lspci. +To turn SR/IOV off again - the standard requires you to turn it off before you can enable +another VF count, and the emulation enforces this: + + echo 0 > /sys/bus/pci/devices/:01:00.0/sriov_numvfs + +Older drivers typically provide a max_vfs module parameter +to enable it at load time: + + modprobe yourdriver max_vfs=4 + +To disable the VFs again then, you simply have to unload the driver: + + rmmod yourdriver -- 2.25.1
[PATCH v3 00/15] hw/nvme: SR-IOV with Virtualization Enhancements
This is the version of the patch series that we consider ready for staging. We do not intend to work on the v4 unless there are major issues. Changes since v2: - The documentation mentions that SR-IOV support is still an experimental feature. - The default value activates properly when sriov_max_v{i,q}_per_vf == 0. - Secondary Controller List (CNS 15h) handles the CDW10.CNTID field. - Virtual Function Number ("VFN") in Secondary Controller Entry is not cleared to zero as the controller goes offline. - Removed no longer used helper pcie_sriov_vf_number_total. - Reset other than Controller Reset is necessary to activate (or deactivate) flexible resources. - The v{i,q}rfap fields in Primary Controller Capabilities store the currently active number of bound resources, not the number active after reset. - Secondary controller cannot be set online unless the corresponding VF is enabled (sriov_numvfs set to at least the secondary controller's VF number) The list of opens and known gaps remains the same as for v2: https://lists.gnu.org/archive/html/qemu-block/2021-11/msg00423.html Knut Omang (2): pcie: Add support for Single Root I/O Virtualization (SR/IOV) pcie: Add some SR/IOV API documentation in docs/pcie_sriov.txt Lukasz Maniak (4): hw/nvme: Add support for SR-IOV hw/nvme: Add support for Primary Controller Capabilities hw/nvme: Add support for Secondary Controller List docs: Add documentation for SR-IOV and Virtualization Enhancements Łukasz Gieryk (9): pcie: Add a helper to the SR/IOV API pcie: Add 1.2 version token for the Power Management Capability hw/nvme: Implement the Function Level Reset hw/nvme: Make max_ioqpairs and msix_qsize configurable in runtime hw/nvme: Remove reg_size variable and update BAR0 size calculation hw/nvme: Calculate BAR attributes in a function hw/nvme: Initialize capability structures for primary/secondary controllers hw/nvme: Add support for the Virtualization Management command hw/nvme: Update the initalization place for the AER queue docs/pcie_sriov.txt | 115 ++ docs/system/devices/nvme.rst | 36 ++ hw/nvme/ctrl.c | 665 --- hw/nvme/ns.c | 2 +- hw/nvme/nvme.h | 55 ++- hw/nvme/subsys.c | 75 +++- hw/nvme/trace-events | 6 + hw/pci/meson.build | 1 + hw/pci/pci.c | 97 +++-- hw/pci/pcie.c| 5 + hw/pci/pcie_sriov.c | 295 hw/pci/trace-events | 5 + include/block/nvme.h | 65 include/hw/pci/pci.h | 12 +- include/hw/pci/pci_ids.h | 1 + include/hw/pci/pci_regs.h| 1 + include/hw/pci/pcie.h| 6 + include/hw/pci/pcie_sriov.h | 72 include/qemu/typedefs.h | 2 + 19 files changed, 1435 insertions(+), 81 deletions(-) create mode 100644 docs/pcie_sriov.txt create mode 100644 hw/pci/pcie_sriov.c create mode 100644 include/hw/pci/pcie_sriov.h -- 2.25.1
[PATCH v3 05/15] hw/nvme: Add support for SR-IOV
This patch implements initial support for Single Root I/O Virtualization on an NVMe device. Essentially, it allows to define the maximum number of virtual functions supported by the NVMe controller via sriov_max_vfs parameter. Passing a non-zero value to sriov_max_vfs triggers reporting of SR-IOV capability by a physical controller and ARI capability by both the physical and virtual function devices. NVMe controllers created via virtual functions mirror functionally the physical controller, which may not entirely be the case, thus consideration would be needed on the way to limit the capabilities of the VF. NVMe subsystem is required for the use of SR-IOV. Signed-off-by: Lukasz Maniak --- hw/nvme/ctrl.c | 84 ++-- hw/nvme/nvme.h | 3 +- include/hw/pci/pci_ids.h | 1 + 3 files changed, 84 insertions(+), 4 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 5f573c417b..159635c1af 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -35,6 +35,7 @@ * mdts=,vsl=, \ * zoned.zasl=, \ * zoned.auto_transition=, \ + * sriov_max_vfs= \ * subsys= * -device nvme-ns,drive=,bus=,nsid=,\ * zoned=, \ @@ -106,6 +107,12 @@ * transitioned to zone state closed for resource management purposes. * Defaults to 'on'. * + * - `sriov_max_vfs` + * Indicates the maximum number of PCIe virtual functions supported + * by the controller. The default value is 0. Specifying a non-zero value + * enables reporting of both SR-IOV and ARI capabilities by the NVMe device. + * Virtual function controllers will not report SR-IOV capability. + * * nvme namespace device parameters * * - `shared` @@ -160,6 +167,7 @@ #include "sysemu/block-backend.h" #include "sysemu/hostmem.h" #include "hw/pci/msix.h" +#include "hw/pci/pcie_sriov.h" #include "migration/vmstate.h" #include "nvme.h" @@ -175,6 +183,9 @@ #define NVME_TEMPERATURE_CRITICAL 0x175 #define NVME_NUM_FW_SLOTS 1 #define NVME_DEFAULT_MAX_ZA_SIZE (128 * KiB) +#define NVME_MAX_VFS 127 +#define NVME_VF_OFFSET 0x1 +#define NVME_VF_STRIDE 1 #define NVME_GUEST_ERR(trace, fmt, ...) \ do { \ @@ -5588,6 +5599,10 @@ static void nvme_ctrl_reset(NvmeCtrl *n) g_free(event); } +if (!pci_is_vf(&n->parent_obj) && n->params.sriov_max_vfs) { +pcie_sriov_pf_disable_vfs(&n->parent_obj); +} + n->aer_queued = 0; n->outstanding_aers = 0; n->qs_created = false; @@ -6269,6 +6284,29 @@ static void nvme_check_constraints(NvmeCtrl *n, Error **errp) error_setg(errp, "vsl must be non-zero"); return; } + +if (params->sriov_max_vfs) { +if (!n->subsys) { +error_setg(errp, "subsystem is required for the use of SR-IOV"); +return; +} + +if (params->sriov_max_vfs > NVME_MAX_VFS) { +error_setg(errp, "sriov_max_vfs must be between 0 and %d", + NVME_MAX_VFS); +return; +} + +if (params->cmb_size_mb) { +error_setg(errp, "CMB is not supported with SR-IOV"); +return; +} + +if (n->pmr.dev) { +error_setg(errp, "PMR is not supported with SR-IOV"); +return; +} +} } static void nvme_init_state(NvmeCtrl *n) @@ -6326,6 +6364,20 @@ static void nvme_init_pmr(NvmeCtrl *n, PCIDevice *pci_dev) memory_region_set_enabled(&n->pmr.dev->mr, false); } +static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset, +uint64_t bar_size) +{ +uint16_t vf_dev_id = n->params.use_intel_id ? + PCI_DEVICE_ID_INTEL_NVME : PCI_DEVICE_ID_REDHAT_NVME; + +pcie_sriov_pf_init(pci_dev, offset, "nvme", vf_dev_id, + n->params.sriov_max_vfs, n->params.sriov_max_vfs, + NVME_VF_OFFSET, NVME_VF_STRIDE); + +pcie_sriov_pf_init_vf_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY | + PCI_BASE_ADDRESS_MEM_TYPE_64, bar_size); +} + static int nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp) { uint8_t *pci_conf = pci_dev->config; @@ -6340,7 +6392,7 @@ static int nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp) if (n->params.use_intel_id) { pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL); -pci_config_set_device_id(pci_conf, 0x5845); +pci_config_set_device_id(pci_conf, PCI_DEVICE_ID_INTEL_NVME); } else { pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_REDHAT); pci_config_set_device_id(pci_conf, PCI_DEVICE_ID_REDHAT_NVME); @@ -6348,6 +6400,9 @@ static int nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp) pci_config_set_class(pci_conf, PCI_CLASS_STORAGE_EXPRESS); pcie_endpoint_cap_init(pci_dev, 0x80); +if (n->par
[PATCH v3 01/15] pcie: Add support for Single Root I/O Virtualization (SR/IOV)
From: Knut Omang This patch provides the building blocks for creating an SR/IOV PCIe Extended Capability header and register/unregister SR/IOV Virtual Functions. Signed-off-by: Knut Omang --- hw/pci/meson.build | 1 + hw/pci/pci.c| 97 +--- hw/pci/pcie.c | 5 + hw/pci/pcie_sriov.c | 287 hw/pci/trace-events | 5 + include/hw/pci/pci.h| 12 +- include/hw/pci/pcie.h | 6 + include/hw/pci/pcie_sriov.h | 67 + include/qemu/typedefs.h | 2 + 9 files changed, 456 insertions(+), 26 deletions(-) create mode 100644 hw/pci/pcie_sriov.c create mode 100644 include/hw/pci/pcie_sriov.h diff --git a/hw/pci/meson.build b/hw/pci/meson.build index 5c4bbac817..bcc9c75919 100644 --- a/hw/pci/meson.build +++ b/hw/pci/meson.build @@ -5,6 +5,7 @@ pci_ss.add(files( 'pci.c', 'pci_bridge.c', 'pci_host.c', + 'pcie_sriov.c', 'shpc.c', 'slotid_cap.c' )) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index e5993c1ef5..1892a7e74c 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -239,6 +239,9 @@ int pci_bar(PCIDevice *d, int reg) { uint8_t type; +/* PCIe virtual functions do not have their own BARs */ +assert(!pci_is_vf(d)); + if (reg != PCI_ROM_SLOT) return PCI_BASE_ADDRESS_0 + reg * 4; @@ -304,10 +307,30 @@ void pci_device_deassert_intx(PCIDevice *dev) } } -static void pci_do_device_reset(PCIDevice *dev) +static void pci_reset_regions(PCIDevice *dev) { int r; +if (pci_is_vf(dev)) { +return; +} + +for (r = 0; r < PCI_NUM_REGIONS; ++r) { +PCIIORegion *region = &dev->io_regions[r]; +if (!region->size) { +continue; +} + +if (!(region->type & PCI_BASE_ADDRESS_SPACE_IO) && +region->type & PCI_BASE_ADDRESS_MEM_TYPE_64) { +pci_set_quad(dev->config + pci_bar(dev, r), region->type); +} else { +pci_set_long(dev->config + pci_bar(dev, r), region->type); +} +} +} +static void pci_do_device_reset(PCIDevice *dev) +{ pci_device_deassert_intx(dev); assert(dev->irq_state == 0); @@ -323,19 +346,7 @@ static void pci_do_device_reset(PCIDevice *dev) pci_get_word(dev->wmask + PCI_INTERRUPT_LINE) | pci_get_word(dev->w1cmask + PCI_INTERRUPT_LINE)); dev->config[PCI_CACHE_LINE_SIZE] = 0x0; -for (r = 0; r < PCI_NUM_REGIONS; ++r) { -PCIIORegion *region = &dev->io_regions[r]; -if (!region->size) { -continue; -} - -if (!(region->type & PCI_BASE_ADDRESS_SPACE_IO) && -region->type & PCI_BASE_ADDRESS_MEM_TYPE_64) { -pci_set_quad(dev->config + pci_bar(dev, r), region->type); -} else { -pci_set_long(dev->config + pci_bar(dev, r), region->type); -} -} +pci_reset_regions(dev); pci_update_mappings(dev); msi_reset(dev); @@ -884,6 +895,15 @@ static void pci_init_multifunction(PCIBus *bus, PCIDevice *dev, Error **errp) dev->config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION; } +/* With SR/IOV and ARI, a device at function 0 need not be a multifunction + * device, as it may just be a VF that ended up with function 0 in + * the legacy PCI interpretation. Avoid failing in such cases: + */ +if (pci_is_vf(dev) && +dev->exp.sriov_vf.pf->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) { +return; +} + /* * multifunction bit is interpreted in two ways as follows. * - all functions must set the bit to 1. @@ -1083,6 +1103,7 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, bus->devices[devfn]->name); return NULL; } else if (dev->hotplugged && + !pci_is_vf(pci_dev) && pci_get_function_0(pci_dev)) { error_setg(errp, "PCI: slot %d function 0 already occupied by %s," " new func %s cannot be exposed to guest.", @@ -1191,6 +1212,7 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num, pcibus_t size = memory_region_size(memory); uint8_t hdr_type; +assert(!pci_is_vf(pci_dev)); /* VFs must use pcie_sriov_vf_register_bar */ assert(region_num >= 0); assert(region_num < PCI_NUM_REGIONS); assert(is_power_of_2(size)); @@ -1294,11 +1316,43 @@ pcibus_t pci_get_bar_addr(PCIDevice *pci_dev, int region_num) return pci_dev->io_regions[region_num].addr; } -static pcibus_t pci_bar_address(PCIDevice *d, -int reg, uint8_t type, pcibus_t size) +static pcibus_t pci_config_get_bar_addr(PCIDevice *d, int reg, +uint8_t type, pcibus_t size) +{ +pcibus_t new_addr; +if (!pci_is_vf(d)) { +int bar = pci_bar(d, reg); +if (type & PCI_BASE_ADDRESS_MEM_TYPE_64) { +new_addr = pci
[PATCH v3 03/15] pcie: Add a helper to the SR/IOV API
From: Łukasz Gieryk Convenience function for retrieving the PCIDevice object of the N-th VF. Signed-off-by: Łukasz Gieryk Reviewed-by: Knut Omang --- hw/pci/pcie_sriov.c | 10 +- include/hw/pci/pcie_sriov.h | 5 + 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c index 501a1ff433..be8c907e06 100644 --- a/hw/pci/pcie_sriov.c +++ b/hw/pci/pcie_sriov.c @@ -280,8 +280,16 @@ uint16_t pcie_sriov_vf_number(PCIDevice *dev) return dev->exp.sriov_vf.vf_number; } - PCIDevice *pcie_sriov_get_pf(PCIDevice *dev) { return dev->exp.sriov_vf.pf; } + +PCIDevice *pcie_sriov_get_vf_at_index(PCIDevice *dev, int n) +{ +assert(!pci_is_vf(dev)); +if (n < dev->exp.sriov_pf.num_vfs) { +return dev->exp.sriov_pf.vf[n]; +} +return NULL; +} diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h index 0974f00054..cd2aebd3a6 100644 --- a/include/hw/pci/pcie_sriov.h +++ b/include/hw/pci/pcie_sriov.h @@ -64,4 +64,9 @@ uint16_t pcie_sriov_vf_number(PCIDevice *dev); */ PCIDevice *pcie_sriov_get_pf(PCIDevice *dev); +/* Get the n-th VF of this physical function - only valid for PF. + * Returns NULL if index is invalid + */ +PCIDevice *pcie_sriov_get_vf_at_index(PCIDevice *dev, int n); + #endif /* QEMU_PCIE_SRIOV_H */ -- 2.25.1
[PATCH v3 08/15] hw/nvme: Implement the Function Level Reset
From: Łukasz Gieryk This patch implements the Function Level Reset, a feature currently not implemented for the Nvme device, while listed as a mandatory ("shall") in the 1.4 spec. The implementation reuses FLR-related building blocks defined for the pci-bridge module, and follows the same logic: - FLR capability is advertised in the PCIE config, - custom pci_write_config callback detects a write to the trigger register and performs the PCI reset, - which, eventually, calls the custom dc->reset handler. Depending on reset type, parts of the state should (or should not) be cleared. To distinguish the type of reset, an additional parameter is passed to the reset function. This patch also enables advertisement of the Power Management PCI capability. The main reason behind it is to announce the no_soft_reset=1 bit, to signal SR-IOV support where each VF can be reset individually. The implementation purposedly ignores writes to the PMCS.PS register, as even such naïve behavior is enough to correctly handle the D3->D0 transition. It’s worth to note, that the power state transition back to to D3, with all the corresponding side effects, wasn't and stil isn't handled properly. Signed-off-by: Łukasz Gieryk Reviewed-by: Klaus Jensen --- hw/nvme/ctrl.c | 52 hw/nvme/nvme.h | 5 + hw/nvme/trace-events | 1 + 3 files changed, 54 insertions(+), 4 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index eaca12df57..9e83b4dd76 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -5602,7 +5602,7 @@ static void nvme_process_sq(void *opaque) } } -static void nvme_ctrl_reset(NvmeCtrl *n) +static void nvme_ctrl_reset(NvmeCtrl *n, NvmeResetType rst) { NvmeNamespace *ns; int i; @@ -5634,7 +5634,9 @@ static void nvme_ctrl_reset(NvmeCtrl *n) } if (!pci_is_vf(&n->parent_obj) && n->params.sriov_max_vfs) { -pcie_sriov_pf_disable_vfs(&n->parent_obj); +if (rst != NVME_RESET_CONTROLLER) { +pcie_sriov_pf_disable_vfs(&n->parent_obj); +} } n->aer_queued = 0; @@ -5868,7 +5870,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, } } else if (!NVME_CC_EN(data) && NVME_CC_EN(cc)) { trace_pci_nvme_mmio_stopped(); -nvme_ctrl_reset(n); +nvme_ctrl_reset(n, NVME_RESET_CONTROLLER); cc = 0; csts &= ~NVME_CSTS_READY; } @@ -6426,6 +6428,28 @@ static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset, PCI_BASE_ADDRESS_MEM_TYPE_64, bar_size); } +static int nvme_add_pm_capability(PCIDevice *pci_dev, uint8_t offset) +{ +Error *err = NULL; +int ret; + +ret = pci_add_capability(pci_dev, PCI_CAP_ID_PM, offset, + PCI_PM_SIZEOF, &err); +if (err) { +error_report_err(err); +return ret; +} + +pci_set_word(pci_dev->config + offset + PCI_PM_PMC, + PCI_PM_CAP_VER_1_2); +pci_set_word(pci_dev->config + offset + PCI_PM_CTRL, + PCI_PM_CTRL_NO_SOFT_RESET); +pci_set_word(pci_dev->wmask + offset + PCI_PM_CTRL, + PCI_PM_CTRL_STATE_MASK); + +return 0; +} + static int nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp) { uint8_t *pci_conf = pci_dev->config; @@ -6447,7 +6471,9 @@ static int nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp) } pci_config_set_class(pci_conf, PCI_CLASS_STORAGE_EXPRESS); +nvme_add_pm_capability(pci_dev, 0x60); pcie_endpoint_cap_init(pci_dev, 0x80); +pcie_cap_flr_init(pci_dev); if (n->params.sriov_max_vfs) { pcie_ari_init(pci_dev, 0x100, 1); } @@ -6696,7 +6722,7 @@ static void nvme_exit(PCIDevice *pci_dev) NvmeNamespace *ns; int i; -nvme_ctrl_reset(n); +nvme_ctrl_reset(n, NVME_RESET_FUNCTION); if (n->subsys) { for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { @@ -6795,6 +6821,22 @@ static void nvme_set_smart_warning(Object *obj, Visitor *v, const char *name, } } +static void nvme_pci_reset(DeviceState *qdev) +{ +PCIDevice *pci_dev = PCI_DEVICE(qdev); +NvmeCtrl *n = NVME(pci_dev); + +trace_pci_nvme_pci_reset(); +nvme_ctrl_reset(n, NVME_RESET_FUNCTION); +} + +static void nvme_pci_write_config(PCIDevice *dev, uint32_t address, + uint32_t val, int len) +{ +pci_default_write_config(dev, address, val, len); +pcie_cap_flr_write_config(dev, address, val, len); +} + static const VMStateDescription nvme_vmstate = { .name = "nvme", .unmigratable = 1, @@ -6806,6 +6848,7 @@ static void nvme_class_init(ObjectClass *oc, void *data) PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc); pc->realize = nvme_realize; +pc->config_write = nvme_pci_write_config; pc->exit = nvme_exit; pc->class_id = PCI_CLASS_STORAG
[PATCH v3 04/15] pcie: Add 1.2 version token for the Power Management Capability
From: Łukasz Gieryk Signed-off-by: Łukasz Gieryk --- include/hw/pci/pci_regs.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/hw/pci/pci_regs.h b/include/hw/pci/pci_regs.h index 77ba64b931..a590140962 100644 --- a/include/hw/pci/pci_regs.h +++ b/include/hw/pci/pci_regs.h @@ -4,5 +4,6 @@ #include "standard-headers/linux/pci_regs.h" #define PCI_PM_CAP_VER_1_1 0x0002 /* PCI PM spec ver. 1.1 */ +#define PCI_PM_CAP_VER_1_2 0x0003 /* PCI PM spec ver. 1.2 */ #endif -- 2.25.1
[PATCH v3 06/15] hw/nvme: Add support for Primary Controller Capabilities
Implementation of Primary Controller Capabilities data structure (Identify command with CNS value of 14h). Currently, the command returns only ID of a primary controller. Handling of remaining fields are added in subsequent patches implementing virtualization enhancements. Signed-off-by: Lukasz Maniak Reviewed-by: Klaus Jensen --- hw/nvme/ctrl.c | 22 +- hw/nvme/nvme.h | 2 ++ hw/nvme/trace-events | 1 + include/block/nvme.h | 23 +++ 4 files changed, 43 insertions(+), 5 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 159635c1af..651e1f2fa2 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -4543,6 +4543,13 @@ static uint16_t nvme_identify_ctrl_list(NvmeCtrl *n, NvmeRequest *req, return nvme_c2h(n, (uint8_t *)list, sizeof(list), req); } +static uint16_t nvme_identify_pri_ctrl_cap(NvmeCtrl *n, NvmeRequest *req) +{ +trace_pci_nvme_identify_pri_ctrl_cap(le16_to_cpu(n->pri_ctrl_cap.cntlid)); + +return nvme_c2h(n, (uint8_t *)&n->pri_ctrl_cap, sizeof(NvmePriCtrlCap), req); +} + static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req, bool active) { @@ -4761,6 +4768,8 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeRequest *req) return nvme_identify_ctrl_list(n, req, true); case NVME_ID_CNS_CTRL_LIST: return nvme_identify_ctrl_list(n, req, false); +case NVME_ID_CNS_PRIMARY_CTRL_CAP: +return nvme_identify_pri_ctrl_cap(n, req); case NVME_ID_CNS_CS_NS: return nvme_identify_ns_csi(n, req, true); case NVME_ID_CNS_CS_NS_PRESENT: @@ -6311,6 +6320,8 @@ static void nvme_check_constraints(NvmeCtrl *n, Error **errp) static void nvme_init_state(NvmeCtrl *n) { +NvmePriCtrlCap *cap = &n->pri_ctrl_cap; + /* add one to max_ioqpairs to account for the admin queue pair */ n->reg_size = pow2ceil(sizeof(NvmeBar) + 2 * (n->params.max_ioqpairs + 1) * NVME_DB_SIZE); @@ -6320,6 +6331,8 @@ static void nvme_init_state(NvmeCtrl *n) n->features.temp_thresh_hi = NVME_TEMPERATURE_WARNING; n->starttime_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); n->aer_reqs = g_new0(NvmeRequest *, n->params.aerl + 1); + +cap->cntlid = cpu_to_le16(n->cntlid); } static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev) @@ -6619,15 +6632,14 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) qbus_init(&n->bus, sizeof(NvmeBus), TYPE_NVME_BUS, &pci_dev->qdev, n->parent_obj.qdev.id); -nvme_init_state(n); -if (nvme_init_pci(n, pci_dev, errp)) { -return; -} - if (nvme_init_subsys(n, errp)) { error_propagate(errp, local_err); return; } +nvme_init_state(n); +if (nvme_init_pci(n, pci_dev, errp)) { +return; +} nvme_init_ctrl(n, pci_dev); /* setup a namespace if the controller drive property was given */ diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h index 4c8af34b28..81deb45dfb 100644 --- a/hw/nvme/nvme.h +++ b/hw/nvme/nvme.h @@ -461,6 +461,8 @@ typedef struct NvmeCtrl { }; uint32_tasync_config; } features; + +NvmePriCtrlCap pri_ctrl_cap; } NvmeCtrl; static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid) diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events index ff6cafd520..1014ebceb6 100644 --- a/hw/nvme/trace-events +++ b/hw/nvme/trace-events @@ -52,6 +52,7 @@ pci_nvme_identify_ctrl(void) "identify controller" pci_nvme_identify_ctrl_csi(uint8_t csi) "identify controller, csi=0x%"PRIx8"" pci_nvme_identify_ns(uint32_t ns) "nsid %"PRIu32"" pci_nvme_identify_ctrl_list(uint8_t cns, uint16_t cntid) "cns 0x%"PRIx8" cntid %"PRIu16"" +pci_nvme_identify_pri_ctrl_cap(uint16_t cntlid) "identify primary controller capabilities cntlid=%"PRIu16"" pci_nvme_identify_ns_csi(uint32_t ns, uint8_t csi) "nsid=%"PRIu32", csi=0x%"PRIx8"" pci_nvme_identify_nslist(uint32_t ns) "nsid %"PRIu32"" pci_nvme_identify_nslist_csi(uint16_t ns, uint8_t csi) "nsid=%"PRIu16", csi=0x%"PRIx8"" diff --git a/include/block/nvme.h b/include/block/nvme.h index e3bd47bf76..f69bd1d14f 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -1017,6 +1017,7 @@ enum NvmeIdCns { NVME_ID_CNS_NS_PRESENT= 0x11, NVME_ID_CNS_NS_ATTACHED_CTRL_LIST = 0x12, NVME_ID_CNS_CTRL_LIST = 0x13, +NVME_ID_CNS_PRIMARY_CTRL_CAP = 0x14, NVME_ID_CNS_CS_NS_PRESENT_LIST= 0x1a, NVME_ID_CNS_CS_NS_PRESENT = 0x1b, NVME_ID_CNS_IO_COMMAND_SET= 0x1c, @@ -1465,6 +1466,27 @@ typedef enum NvmeZoneState { NVME_ZONE_STATE_OFFLINE = 0x0f, } NvmeZoneState; +typedef struct QEMU_PACKED NvmePriCtrlCap { +uint16_tcntlid; +uint16_tportid; +uint8_t crt; +uint8_t rsvd5[27]; +uint32_tvqfrt; +uint32_tvqrfa; +uint16_tvqrfap; +uint16_tvqprt; +uint16_tvqfr
[PATCH v3 07/15] hw/nvme: Add support for Secondary Controller List
Introduce handling for Secondary Controller List (Identify command with CNS value of 15h). Secondary controller ids are unique in the subsystem, hence they are reserved by it upon initialization of the primary controller to the number of sriov_max_vfs. ID reservation requires the addition of an intermediate controller slot state, so the reserved controller has the address 0x. A secondary controller is in the reserved state when it has no virtual function assigned, but its primary controller is realized. Secondary controller reservations are released to NULL when its primary controller is unregistered. Signed-off-by: Lukasz Maniak --- hw/nvme/ctrl.c | 35 + hw/nvme/ns.c | 2 +- hw/nvme/nvme.h | 18 +++ hw/nvme/subsys.c | 75 ++-- hw/nvme/trace-events | 1 + include/block/nvme.h | 20 6 files changed, 141 insertions(+), 10 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 651e1f2fa2..eaca12df57 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -4550,6 +4550,29 @@ static uint16_t nvme_identify_pri_ctrl_cap(NvmeCtrl *n, NvmeRequest *req) return nvme_c2h(n, (uint8_t *)&n->pri_ctrl_cap, sizeof(NvmePriCtrlCap), req); } +static uint16_t nvme_identify_sec_ctrl_list(NvmeCtrl *n, NvmeRequest *req) +{ +NvmeIdentify *c = (NvmeIdentify *)&req->cmd; +uint16_t pri_ctrl_id = le16_to_cpu(n->pri_ctrl_cap.cntlid); +uint16_t min_id = le16_to_cpu(c->ctrlid); +uint8_t num_sec_ctrl = n->sec_ctrl_list.numcntl; +NvmeSecCtrlList list = {0}; +uint8_t i; + +for (i = 0; i < num_sec_ctrl; i++) { +if (n->sec_ctrl_list.sec[i].scid >= min_id) { +list.numcntl = num_sec_ctrl - i; +memcpy(&list.sec, n->sec_ctrl_list.sec + i, + list.numcntl * sizeof(NvmeSecCtrlEntry)); +break; +} +} + +trace_pci_nvme_identify_sec_ctrl_list(pri_ctrl_id, list.numcntl); + +return nvme_c2h(n, (uint8_t *)&list, sizeof(list), req); +} + static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req, bool active) { @@ -4770,6 +4793,8 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeRequest *req) return nvme_identify_ctrl_list(n, req, false); case NVME_ID_CNS_PRIMARY_CTRL_CAP: return nvme_identify_pri_ctrl_cap(n, req); +case NVME_ID_CNS_SECONDARY_CTRL_LIST: +return nvme_identify_sec_ctrl_list(n, req); case NVME_ID_CNS_CS_NS: return nvme_identify_ns_csi(n, req, true); case NVME_ID_CNS_CS_NS_PRESENT: @@ -6321,6 +6346,9 @@ static void nvme_check_constraints(NvmeCtrl *n, Error **errp) static void nvme_init_state(NvmeCtrl *n) { NvmePriCtrlCap *cap = &n->pri_ctrl_cap; +NvmeSecCtrlList *list = &n->sec_ctrl_list; +NvmeSecCtrlEntry *sctrl; +int i; /* add one to max_ioqpairs to account for the admin queue pair */ n->reg_size = pow2ceil(sizeof(NvmeBar) + @@ -6332,6 +6360,13 @@ static void nvme_init_state(NvmeCtrl *n) n->starttime_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); n->aer_reqs = g_new0(NvmeRequest *, n->params.aerl + 1); +list->numcntl = cpu_to_le16(n->params.sriov_max_vfs); +for (i = 0; i < n->params.sriov_max_vfs; i++) { +sctrl = &list->sec[i]; +sctrl->pcid = cpu_to_le16(n->cntlid); +sctrl->vfn = cpu_to_le16(i + 1); +} + cap->cntlid = cpu_to_le16(n->cntlid); } diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c index 8b5f98c761..e7a54ac572 100644 --- a/hw/nvme/ns.c +++ b/hw/nvme/ns.c @@ -511,7 +511,7 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp) for (i = 0; i < ARRAY_SIZE(subsys->ctrls); i++) { NvmeCtrl *ctrl = subsys->ctrls[i]; -if (ctrl) { +if (ctrl && ctrl != SUBSYS_SLOT_RSVD) { nvme_attach_ns(ctrl, ns); } } diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h index 81deb45dfb..2157a7b95f 100644 --- a/hw/nvme/nvme.h +++ b/hw/nvme/nvme.h @@ -43,6 +43,7 @@ typedef struct NvmeBus { #define TYPE_NVME_SUBSYS "nvme-subsys" #define NVME_SUBSYS(obj) \ OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS) +#define SUBSYS_SLOT_RSVD (void *)0x typedef struct NvmeSubsystem { DeviceState parent_obj; @@ -67,6 +68,10 @@ static inline NvmeCtrl *nvme_subsys_ctrl(NvmeSubsystem *subsys, return NULL; } +if (subsys->ctrls[cntlid] == SUBSYS_SLOT_RSVD) { +return NULL; +} + return subsys->ctrls[cntlid]; } @@ -463,6 +468,7 @@ typedef struct NvmeCtrl { } features; NvmePriCtrlCap pri_ctrl_cap; +NvmeSecCtrlList sec_ctrl_list; } NvmeCtrl; static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid) @@ -497,6 +503,18 @@ static inline uint16_t nvme_cid(NvmeRequest *req) return le16_to_cpu(req->cqe.cid); } +static inline NvmeSecCtrlEntry *nvme_sctrl(NvmeCtrl *n) +
[PATCH v3 09/15] hw/nvme: Make max_ioqpairs and msix_qsize configurable in runtime
From: Łukasz Gieryk The NVMe device defines two properties: max_ioqpairs, msix_qsize. Having them as constants is problematic for SR-IOV support. SR-IOV introduces virtual resources (queues, interrupts) that can be assigned to PF and its dependent VFs. Each device, following a reset, should work with the configured number of queues. A single constant is no longer sufficient to hold the whole state. This patch tries to solve the problem by introducing additional variables in NvmeCtrl’s state. The variables for, e.g., managing queues are therefore organized as: - n->params.max_ioqpairs – no changes, constant set by the user - n->(mutable_state) – (not a part of this patch) user-configurable, specifies number of queues available _after_ reset - n->conf_ioqpairs - (new) used in all the places instead of the ‘old’ n->params.max_ioqpairs; initialized in realize() and updated during reset() to reflect user’s changes to the mutable state Since the number of available i/o queues and interrupts can change in runtime, buffers for sq/cqs and the MSIX-related structures are allocated big enough to handle the limits, to completely avoid the complicated reallocation. A helper function (nvme_update_msixcap_ts) updates the corresponding capability register, to signal configuration changes. Signed-off-by: Łukasz Gieryk --- hw/nvme/ctrl.c | 52 ++ hw/nvme/nvme.h | 2 ++ 2 files changed, 38 insertions(+), 16 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 9e83b4dd76..de463450b6 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -416,12 +416,12 @@ static bool nvme_nsid_valid(NvmeCtrl *n, uint32_t nsid) static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid) { -return sqid < n->params.max_ioqpairs + 1 && n->sq[sqid] != NULL ? 0 : -1; +return sqid < n->conf_ioqpairs + 1 && n->sq[sqid] != NULL ? 0 : -1; } static int nvme_check_cqid(NvmeCtrl *n, uint16_t cqid) { -return cqid < n->params.max_ioqpairs + 1 && n->cq[cqid] != NULL ? 0 : -1; +return cqid < n->conf_ioqpairs + 1 && n->cq[cqid] != NULL ? 0 : -1; } static void nvme_inc_cq_tail(NvmeCQueue *cq) @@ -4034,8 +4034,7 @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeRequest *req) trace_pci_nvme_err_invalid_create_sq_cqid(cqid); return NVME_INVALID_CQID | NVME_DNR; } -if (unlikely(!sqid || sqid > n->params.max_ioqpairs || -n->sq[sqid] != NULL)) { +if (unlikely(!sqid || sqid > n->conf_ioqpairs || n->sq[sqid] != NULL)) { trace_pci_nvme_err_invalid_create_sq_sqid(sqid); return NVME_INVALID_QID | NVME_DNR; } @@ -4387,8 +4386,7 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req) trace_pci_nvme_create_cq(prp1, cqid, vector, qsize, qflags, NVME_CQ_FLAGS_IEN(qflags) != 0); -if (unlikely(!cqid || cqid > n->params.max_ioqpairs || -n->cq[cqid] != NULL)) { +if (unlikely(!cqid || cqid > n->conf_ioqpairs || n->cq[cqid] != NULL)) { trace_pci_nvme_err_invalid_create_cq_cqid(cqid); return NVME_INVALID_QID | NVME_DNR; } @@ -4404,7 +4402,7 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req) trace_pci_nvme_err_invalid_create_cq_vector(vector); return NVME_INVALID_IRQ_VECTOR | NVME_DNR; } -if (unlikely(vector >= n->params.msix_qsize)) { +if (unlikely(vector >= n->conf_msix_qsize)) { trace_pci_nvme_err_invalid_create_cq_vector(vector); return NVME_INVALID_IRQ_VECTOR | NVME_DNR; } @@ -5000,13 +4998,12 @@ defaults: break; case NVME_NUMBER_OF_QUEUES: -result = (n->params.max_ioqpairs - 1) | -((n->params.max_ioqpairs - 1) << 16); +result = (n->conf_ioqpairs - 1) | ((n->conf_ioqpairs - 1) << 16); trace_pci_nvme_getfeat_numq(result); break; case NVME_INTERRUPT_VECTOR_CONF: iv = dw11 & 0x; -if (iv >= n->params.max_ioqpairs + 1) { +if (iv >= n->conf_ioqpairs + 1) { return NVME_INVALID_FIELD | NVME_DNR; } @@ -5161,10 +5158,10 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req) trace_pci_nvme_setfeat_numq((dw11 & 0x) + 1, ((dw11 >> 16) & 0x) + 1, -n->params.max_ioqpairs, -n->params.max_ioqpairs); -req->cqe.result = cpu_to_le32((n->params.max_ioqpairs - 1) | - ((n->params.max_ioqpairs - 1) << 16)); +n->conf_ioqpairs, +n->conf_ioqpairs); +req->cqe.result = cpu_to_le32((n->conf_ioqpairs - 1) | + ((n->conf_ioqpairs - 1) << 16)); break; case NVME_ASYNC
[PATCH v3 11/15] hw/nvme: Calculate BAR attributes in a function
From: Łukasz Gieryk An NVMe device with SR-IOV capability calculates the BAR size differently for PF and VF, so it makes sense to extract the common code to a separate function. Signed-off-by: Łukasz Gieryk --- hw/nvme/ctrl.c | 45 +++-- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index a4b11b201a..a26abaea36 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -6429,6 +6429,34 @@ static void nvme_init_pmr(NvmeCtrl *n, PCIDevice *pci_dev) memory_region_set_enabled(&n->pmr.dev->mr, false); } +static uint64_t nvme_bar_size(unsigned total_queues, unsigned total_irqs, + unsigned *msix_table_offset, + unsigned *msix_pba_offset) +{ +uint64_t bar_size, msix_table_size, msix_pba_size; + +bar_size = sizeof(NvmeBar) + 2 * total_queues * NVME_DB_SIZE; +bar_size = QEMU_ALIGN_UP(bar_size, 4 * KiB); + +if (msix_table_offset) { +*msix_table_offset = bar_size; +} + +msix_table_size = PCI_MSIX_ENTRY_SIZE * total_irqs; +bar_size += msix_table_size; +bar_size = QEMU_ALIGN_UP(bar_size, 4 * KiB); + +if (msix_pba_offset) { +*msix_pba_offset = bar_size; +} + +msix_pba_size = QEMU_ALIGN_UP(total_irqs, 64) / 8; +bar_size += msix_pba_size; + +bar_size = pow2ceil(bar_size); +return bar_size; +} + static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset, uint64_t bar_size) { @@ -6468,7 +6496,7 @@ static int nvme_add_pm_capability(PCIDevice *pci_dev, uint8_t offset) static int nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp) { uint8_t *pci_conf = pci_dev->config; -uint64_t bar_size, msix_table_size, msix_pba_size; +uint64_t bar_size; unsigned msix_table_offset, msix_pba_offset; int ret; @@ -6494,19 +6522,8 @@ static int nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp) } /* add one to max_ioqpairs to account for the admin queue pair */ -bar_size = sizeof(NvmeBar) + - 2 * (n->params.max_ioqpairs + 1) * NVME_DB_SIZE; -bar_size = QEMU_ALIGN_UP(bar_size, 4 * KiB); -msix_table_offset = bar_size; -msix_table_size = PCI_MSIX_ENTRY_SIZE * n->params.msix_qsize; - -bar_size += msix_table_size; -bar_size = QEMU_ALIGN_UP(bar_size, 4 * KiB); -msix_pba_offset = bar_size; -msix_pba_size = QEMU_ALIGN_UP(n->params.msix_qsize, 64) / 8; - -bar_size += msix_pba_size; -bar_size = pow2ceil(bar_size); +bar_size = nvme_bar_size(n->params.max_ioqpairs + 1, n->params.msix_qsize, + &msix_table_offset, &msix_pba_offset); memory_region_init(&n->bar0, OBJECT(n), "nvme-bar0", bar_size); memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme", -- 2.25.1
[PATCH v3 12/15] hw/nvme: Initialize capability structures for primary/secondary controllers
From: Łukasz Gieryk With four new properties: - sriov_v{i,q}_flexible, - sriov_max_v{i,q}_per_vf, one can configure the number of available flexible resources, as well as the limits. The primary and secondary controller capability structures are initialized accordingly. Since the number of available queues (interrupts) now varies between VF/PF, BAR size calculation is also adjusted. Signed-off-by: Łukasz Gieryk --- hw/nvme/ctrl.c | 138 --- hw/nvme/nvme.h | 4 ++ include/block/nvme.h | 5 ++ 3 files changed, 140 insertions(+), 7 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index a26abaea36..e43773b525 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -36,6 +36,10 @@ * zoned.zasl=, \ * zoned.auto_transition=, \ * sriov_max_vfs= \ + * sriov_vq_flexible= \ + * sriov_vi_flexible= \ + * sriov_max_vi_per_vf= \ + * sriov_max_vq_per_vf= \ * subsys= * -device nvme-ns,drive=,bus=,nsid=,\ * zoned=, \ @@ -113,6 +117,29 @@ * enables reporting of both SR-IOV and ARI capabilities by the NVMe device. * Virtual function controllers will not report SR-IOV capability. * + * NOTE: Single Root I/O Virtualization support is experimental. + * All the related parameters may be subject to change. + * + * - `sriov_vq_flexible` + * Indicates the total number of flexible queue resources assignable to all + * the secondary controllers. Implicitly sets the number of primary + * controller's private resources to `(max_ioqpairs - sriov_vq_flexible)`. + * + * - `sriov_vi_flexible` + * Indicates the total number of flexible interrupt resources assignable to + * all the secondary controllers. Implicitly sets the number of primary + * controller's private resources to `(msix_qsize - sriov_vi_flexible)`. + * + * - `sriov_max_vi_per_vf` + * Indicates the maximum number of virtual interrupt resources assignable + * to a secondary controller. The default 0 resolves to + * `(sriov_vi_flexible / sriov_max_vfs)`. + * + * - `sriov_max_vq_per_vf` + * Indicates the maximum number of virtual queue resources assignable to + * a secondary controller. The default 0 resolves to + * `(sriov_vq_flexible / sriov_max_vfs)`. + * * nvme namespace device parameters * * - `shared` @@ -184,6 +211,7 @@ #define NVME_NUM_FW_SLOTS 1 #define NVME_DEFAULT_MAX_ZA_SIZE (128 * KiB) #define NVME_MAX_VFS 127 +#define NVME_VF_RES_GRANULARITY 1 #define NVME_VF_OFFSET 0x1 #define NVME_VF_STRIDE 1 @@ -6357,6 +6385,54 @@ static void nvme_check_constraints(NvmeCtrl *n, Error **errp) error_setg(errp, "PMR is not supported with SR-IOV"); return; } + +if (!params->sriov_vq_flexible || !params->sriov_vi_flexible) { +error_setg(errp, "both sriov_vq_flexible and sriov_vi_flexible" + " must be set for the use of SR-IOV"); +return; +} + +if (params->sriov_vq_flexible < params->sriov_max_vfs * 2) { +error_setg(errp, "sriov_vq_flexible must be greater than or equal" + " to %d (sriov_max_vfs * 2)", params->sriov_max_vfs * 2); +return; +} + +if (params->max_ioqpairs < params->sriov_vq_flexible + 2) { +error_setg(errp, "sriov_vq_flexible - max_ioqpairs (PF-private" + " queue resources) must be greater than or equal to 2"); +return; +} + +if (params->sriov_vi_flexible < params->sriov_max_vfs) { +error_setg(errp, "sriov_vi_flexible must be greater than or equal" + " to %d (sriov_max_vfs)", params->sriov_max_vfs); +return; +} + +if (params->msix_qsize < params->sriov_vi_flexible + 1) { +error_setg(errp, "sriov_vi_flexible - msix_qsize (PF-private" + " interrupt resources) must be greater than or equal" + " to 1"); +return; +} + +if (params->sriov_max_vi_per_vf && +(params->sriov_max_vi_per_vf - 1) % NVME_VF_RES_GRANULARITY) { +error_setg(errp, "sriov_max_vi_per_vf must meet:" + " (X - 1) %% %d == 0 and X >= 1", + NVME_VF_RES_GRANULARITY); +return; +} + +if (params->sriov_max_vq_per_vf && +(params->sriov_max_vq_per_vf < 2 || + (params->sriov_max_vq_per_vf - 1) % NVME_VF_RES_GRANULARITY)) { +error_setg(errp, "sriov_max_vq_per_vf must meet:" + " (X - 1) %% %d == 0 and X >= 2", + NVME_VF_RES_GRANULARITY); +return; +} } } @@ -6365,10 +6441,19 @@ static void nvme_init_state(NvmeCtrl *n) NvmePriCtrlCap *cap = &n->pri_ctrl_cap; NvmeS
[PATCH v3 15/15] hw/nvme: Update the initalization place for the AER queue
From: Łukasz Gieryk This patch updates the initialization place for the AER queue, so it’s initialized once, at controller initialization, and not every time controller is enabled. While the original version works for a non-SR-IOV device, as it’s hard to interact with the controller if it’s not enabled, the multiple reinitialization is not necessarily correct. With the SR/IOV feature enabled a segfault can happen: a VF can have its controller disabled, while a namespace can still be attached to the controller through the parent PF. An event generated in such case ends up on an uninitialized queue. While it’s an interesting question whether a VF should support AER in the first place, I don’t think it must be answered today. Signed-off-by: Łukasz Gieryk --- hw/nvme/ctrl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index e21c60fee8..23280f501f 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -6023,8 +6023,6 @@ static int nvme_start_ctrl(NvmeCtrl *n) nvme_set_timestamp(n, 0ULL); -QTAILQ_INIT(&n->aer_queue); - nvme_select_iocs(n); return 0; @@ -7001,6 +6999,8 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) id->cmic |= NVME_CMIC_MULTI_CTRL; } +QTAILQ_INIT(&n->aer_queue); + NVME_CAP_SET_MQES(cap, 0x7ff); NVME_CAP_SET_CQR(cap, 1); NVME_CAP_SET_TO(cap, 0xf); -- 2.25.1
[PATCH v3 13/15] hw/nvme: Add support for the Virtualization Management command
From: Łukasz Gieryk With the new command one can: - assign flexible resources (queues, interrupts) to primary and secondary controllers, - toggle the online/offline state of given controller. Signed-off-by: Łukasz Gieryk --- hw/nvme/ctrl.c | 253 ++- hw/nvme/nvme.h | 20 hw/nvme/trace-events | 3 + include/block/nvme.h | 17 +++ 4 files changed, 291 insertions(+), 2 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index e43773b525..e21c60fee8 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -188,6 +188,7 @@ #include "qemu/error-report.h" #include "qemu/log.h" #include "qemu/units.h" +#include "qemu/range.h" #include "qapi/error.h" #include "qapi/visitor.h" #include "sysemu/sysemu.h" @@ -259,6 +260,7 @@ static const uint32_t nvme_cse_acs[256] = { [NVME_ADM_CMD_GET_FEATURES] = NVME_CMD_EFF_CSUPP, [NVME_ADM_CMD_ASYNC_EV_REQ] = NVME_CMD_EFF_CSUPP, [NVME_ADM_CMD_NS_ATTACHMENT]= NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC, +[NVME_ADM_CMD_VIRT_MNGMT] = NVME_CMD_EFF_CSUPP, [NVME_ADM_CMD_FORMAT_NVM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, }; @@ -290,6 +292,7 @@ static const uint32_t nvme_cse_iocs_zoned[256] = { }; static void nvme_process_sq(void *opaque); +static void nvme_ctrl_reset(NvmeCtrl *n, NvmeResetType rst); static uint16_t nvme_sqid(NvmeRequest *req) { @@ -5539,6 +5542,164 @@ out: return status; } +static void nvme_get_virt_res_num(NvmeCtrl *n, uint8_t rt, int *num_total, + int *num_prim, int *num_sec) +{ +*num_total = le32_to_cpu(rt ? n->pri_ctrl_cap.vifrt : n->pri_ctrl_cap.vqfrt); +*num_prim = le16_to_cpu(rt ? n->pri_ctrl_cap.virfap : n->pri_ctrl_cap.vqrfap); +*num_sec = le16_to_cpu(rt ? n->pri_ctrl_cap.virfa : n->pri_ctrl_cap.vqrfa); +} + +static uint16_t nvme_assign_virt_res_to_prim(NvmeCtrl *n, NvmeRequest *req, + uint16_t cntlid, uint8_t rt, int nr) +{ +int num_total, num_prim, num_sec; + +if (cntlid != n->cntlid) { +return NVME_INVALID_CTRL_ID | NVME_DNR; +} + +nvme_get_virt_res_num(n, rt, &num_total, &num_prim, &num_sec); + +if (nr > num_total) { +return NVME_INVALID_NUM_RESOURCES | NVME_DNR; +} + +if (nr > num_total - num_sec) { +return NVME_INVALID_RESOURCE_ID | NVME_DNR; +} + +if (rt) { +n->next_pri_ctrl_cap.virfap = cpu_to_le16(nr); +} else { +n->next_pri_ctrl_cap.vqrfap = cpu_to_le16(nr); +} + +req->cqe.result = cpu_to_le32(nr); +return req->status; +} + +static void nvme_update_virt_res(NvmeCtrl *n, NvmeSecCtrlEntry *sctrl, + uint8_t rt, int nr) +{ +int prev_nr, prev_total; + +if (rt) { +prev_nr = le16_to_cpu(sctrl->nvi); +prev_total = le32_to_cpu(n->pri_ctrl_cap.virfa); +sctrl->nvi = cpu_to_le16(nr); +n->pri_ctrl_cap.virfa = cpu_to_le32(prev_total + nr - prev_nr); +} else { +prev_nr = le16_to_cpu(sctrl->nvq); +prev_total = le32_to_cpu(n->pri_ctrl_cap.vqrfa); +sctrl->nvq = cpu_to_le16(nr); +n->pri_ctrl_cap.vqrfa = cpu_to_le32(prev_total + nr - prev_nr); +} +} + +static uint16_t nvme_assign_virt_res_to_sec(NvmeCtrl *n, NvmeRequest *req, +uint16_t cntlid, uint8_t rt, int nr) +{ +int num_total, num_prim, num_sec, num_free, diff, limit; +NvmeSecCtrlEntry *sctrl; + +sctrl = nvme_sctrl_for_cntlid(n, cntlid); +if (!sctrl) { +return NVME_INVALID_CTRL_ID | NVME_DNR; +} + +if (sctrl->scs) { +return NVME_INVALID_SEC_CTRL_STATE | NVME_DNR; +} + +limit = le16_to_cpu(rt ? n->pri_ctrl_cap.vifrsm : n->pri_ctrl_cap.vqfrsm); +if (nr > limit) { +return NVME_INVALID_NUM_RESOURCES | NVME_DNR; +} + +nvme_get_virt_res_num(n, rt, &num_total, &num_prim, &num_sec); +num_free = num_total - num_prim - num_sec; +diff = nr - le16_to_cpu(rt ? sctrl->nvi : sctrl->nvq); + +if (diff > num_free) { +return NVME_INVALID_RESOURCE_ID | NVME_DNR; +} + +nvme_update_virt_res(n, sctrl, rt, nr); +req->cqe.result = cpu_to_le32(nr); + +return req->status; +} + +static uint16_t nvme_virt_set_state(NvmeCtrl *n, uint16_t cntlid, bool online) +{ +NvmeCtrl *sn = NULL; +NvmeSecCtrlEntry *sctrl; +int vf_index; + +sctrl = nvme_sctrl_for_cntlid(n, cntlid); +if (!sctrl) { +return NVME_INVALID_CTRL_ID | NVME_DNR; +} + +if (!pci_is_vf(&n->parent_obj)) { +vf_index = le16_to_cpu(sctrl->vfn) - 1; +sn = NVME(pcie_sriov_get_vf_at_index(&n->parent_obj, vf_index)); +} + +if (online) { +if (!sctrl->nvi || (le16_to_cpu(sctrl->nvq) < 2) || !sn) { +return NVME_INVALID_SEC_CTRL_STATE | NVME_DNR; +} + +if (!sctrl->scs) { +sctrl->scs = 0x1; +nvme_ctrl_reset(
[PATCH v3 10/15] hw/nvme: Remove reg_size variable and update BAR0 size calculation
From: Łukasz Gieryk The n->reg_size parameter unnecessarily splits the BAR0 size calculation in two phases; removed to simplify the code. With all the calculations done in one place, it seems the pow2ceil, applied originally to reg_size, is unnecessary. The rounding should happen as the last step, when BAR size includes Nvme registers, queue registers, and MSIX-related space. Finally, the size of the mmio memory region is extended to cover the 1st 4KiB padding (see the map below). Access to this range is handled as interaction with a non-existing queue and generates an error trace, so actually nothing changes, while the reg_size variable is no longer needed. | BAR0| [Nvme Registers] [Queues] [power-of-2 padding] - removed in this patch [4KiB padding (1) ] [MSIX TABLE] [4KiB padding (2) ] [MSIX PBA ] [power-of-2 padding] Signed-off-by: Łukasz Gieryk --- hw/nvme/ctrl.c | 10 +- hw/nvme/nvme.h | 1 - 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index de463450b6..a4b11b201a 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -6370,9 +6370,6 @@ static void nvme_init_state(NvmeCtrl *n) n->conf_ioqpairs = n->params.max_ioqpairs; n->conf_msix_qsize = n->params.msix_qsize; -/* add one to max_ioqpairs to account for the admin queue pair */ -n->reg_size = pow2ceil(sizeof(NvmeBar) + - 2 * (n->params.max_ioqpairs + 1) * NVME_DB_SIZE); n->sq = g_new0(NvmeSQueue *, n->params.max_ioqpairs + 1); n->cq = g_new0(NvmeCQueue *, n->params.max_ioqpairs + 1); n->temperature = NVME_TEMPERATURE; @@ -6496,7 +6493,10 @@ static int nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp) pcie_ari_init(pci_dev, 0x100, 1); } -bar_size = QEMU_ALIGN_UP(n->reg_size, 4 * KiB); +/* add one to max_ioqpairs to account for the admin queue pair */ +bar_size = sizeof(NvmeBar) + + 2 * (n->params.max_ioqpairs + 1) * NVME_DB_SIZE; +bar_size = QEMU_ALIGN_UP(bar_size, 4 * KiB); msix_table_offset = bar_size; msix_table_size = PCI_MSIX_ENTRY_SIZE * n->params.msix_qsize; @@ -6510,7 +6510,7 @@ static int nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp) memory_region_init(&n->bar0, OBJECT(n), "nvme-bar0", bar_size); memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme", - n->reg_size); + msix_table_offset); memory_region_add_subregion(&n->bar0, 0, &n->iomem); if (pci_is_vf(pci_dev)) { diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h index 927890b490..1401ac3904 100644 --- a/hw/nvme/nvme.h +++ b/hw/nvme/nvme.h @@ -414,7 +414,6 @@ typedef struct NvmeCtrl { uint16_tmax_prp_ents; uint16_tcqe_size; uint16_tsqe_size; -uint32_treg_size; uint32_tmax_q_ents; uint8_t outstanding_aers; uint32_tirq_status; -- 2.25.1
Re: [PATCH 2/2] qapi/ui: introduce change-vnc-listen
21.12.2021 17:15, Markus Armbruster wrote: Vladimir Sementsov-Ogievskiy writes: Add command that can change addresses where VNC server listens for new connections. Prior to 6.0 this functionality was available through 'change' qmp command which was deleted. Signed-off-by: Vladimir Sementsov-Ogievskiy --- docs/about/removed-features.rst | 3 ++- qapi/ui.json| 12 ui/vnc.c| 26 ++ 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/docs/about/removed-features.rst b/docs/about/removed-features.rst index d42c3341de..20e6901a82 100644 --- a/docs/about/removed-features.rst +++ b/docs/about/removed-features.rst @@ -348,7 +348,8 @@ documentation of ``query-hotpluggable-cpus`` for additional details. ``change`` (removed in 6.0) ''' -Use ``blockdev-change-medium`` or ``change-vnc-password`` instead. +Use ``blockdev-change-medium`` or ``change-vnc-password`` or +``change-vnc-listen`` instead. ``query-events`` (removed in 6.0) ' diff --git a/qapi/ui.json b/qapi/ui.json index d7567ac866..14e6fe0b4c 100644 --- a/qapi/ui.json +++ b/qapi/ui.json @@ -1304,3 +1304,15 @@ { 'command': 'display-reload', 'data': 'DisplayReloadOptions', 'boxed' : true } + +## +# @change-vnc-listen: +# +# Change set of addresses to listen for connections. Please document the arguments: # @id: lorem ipsum # # @address: dolor sit amet # # @websockets: consectetur adipisici elit Oops :) # @id: vnc display identifier # # @addresses: list of addresses for listen at # # @websockets: list of addresses to listen with websockets +# +# Since: 7.0 +# +## +{ 'command': 'change-vnc-listen', + 'data': { 'id': 'str', 'addresses': ['SocketAddress'], +'*websockets': ['SocketAddress'] } } Lacks 'if': 'CONFIG_VNC'. Oops, yes. We already have change-vnc-password. You add change-vnc-listen. Is there anything else we might want to change? I don't know. I have a request to change only the port of connection. But creating a special command to change only the port is too specific. On the other hand, creating command that will allow to change many other vnc parameters means deeper refactoring the vnc code, it's too much for me. Old removed "change" command allowed to change many vnc arguments as I understand, but they we parsed from one string argument, which is bad for QMP. So, I decided that the golden mean is make an interface to change the addresses to listen. Actually, I don't need "websockets", and even don't know how to test them, so we can drop this parameter for now, it's simple to add it later on demand. Or we can keep it as is. Aside: what's the difference between change-vnc-password and set_password? Looking at code, the difference is that set_password can also change password on spice, and has some additional logic with "connected" argument. [...] -- Best regards, Vladimir
[PATCH v3 14/15] docs: Add documentation for SR-IOV and Virtualization Enhancements
Signed-off-by: Lukasz Maniak --- docs/system/devices/nvme.rst | 36 1 file changed, 36 insertions(+) diff --git a/docs/system/devices/nvme.rst b/docs/system/devices/nvme.rst index b5acb2a9c1..166a11abc6 100644 --- a/docs/system/devices/nvme.rst +++ b/docs/system/devices/nvme.rst @@ -239,3 +239,39 @@ The virtual namespace device supports DIF- and DIX-based protection information to ``1`` to transfer protection information as the first eight bytes of metadata. Otherwise, the protection information is transferred as the last eight bytes. + +Virtualization Enhancements and SR-IOV (Experimental Support) +- + +The ``nvme`` device supports Single Root I/O Virtualization and Sharing +along with Virtualization Enhancements. The controller has to be linked to +an NVM Subsystem device (``nvme-subsys``) for use with SR-IOV. + +A number of parameters are present (**please note, that they may be +subject to change**): + +``sriov_max_vfs`` (default: ``0``) + Indicates the maximum number of PCIe virtual functions supported + by the controller. Specifying a non-zero value enables reporting of both + SR-IOV and ARI (Alternative Routing-ID Interpretation) capabilities + by the NVMe device. Virtual function controllers will not report SR-IOV. + +``sriov_vq_flexible`` + Indicates the total number of flexible queue resources assignable to all + the secondary controllers. Implicitly sets the number of primary + controller's private resources to ``(max_ioqpairs - sriov_vq_flexible)``. + +``sriov_vi_flexible`` + Indicates the total number of flexible interrupt resources assignable to + all the secondary controllers. Implicitly sets the number of primary + controller's private resources to ``(msix_qsize - sriov_vi_flexible)``. + +``sriov_max_vi_per_vf`` (default: ``0``) + Indicates the maximum number of virtual interrupt resources assignable + to a secondary controller. The default ``0`` resolves to + ``(sriov_vi_flexible / sriov_max_vfs)`` + +``sriov_max_vq_per_vf`` (default: ``0``) + Indicates the maximum number of virtual queue resources assignable to + a secondary controller. The default ``0`` resolves to + ``(sriov_vq_flexible / sriov_max_vfs)`` -- 2.25.1
Re: [PATCH] vl: Add -set options to device opts dict when using JSON syntax for -device
On 12/21/21 13:58, Markus Armbruster wrote: Is this a regression? I suspect commit 5dacda5167 "vl: Enable JSON syntax for -device" (v6.2.0). Obviously not a regression: everything that used to work still works. FWIW I think -set should be deprecated. I'm not aware of any particularly useful use of it. There are a couple in the QEMU tests (in vhost-user-test and in qemu-iotests 068), but in both cases the code would be easier to follow without; patches can be dusted off if desired. Paolo
[PATCH] acpi: validate hotplug selector on access
When bus is looked up on a pci write, we didn't validate that the lookup succeeded. Fuzzers thus can trigger QEMU crash by dereferencing the NULL bus pointer. Fixes: b32bd763a1 ("pci: introduce acpi-index property for PCI device") Cc: "Igor Mammedov" Fixes: https://gitlab.com/qemu-project/qemu/-/issues/770 Signed-off-by: Michael S. Tsirkin --- hw/acpi/pcihp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c index 30405b5113..a5e182dd3a 100644 --- a/hw/acpi/pcihp.c +++ b/hw/acpi/pcihp.c @@ -491,6 +491,9 @@ static void pci_write(void *opaque, hwaddr addr, uint64_t data, } bus = acpi_pcihp_find_hotplug_bus(s, s->hotplug_select); +if (!bus) { +break; +} QTAILQ_FOREACH_SAFE(kid, &bus->qbus.children, sibling, next) { Object *o = OBJECT(kid->child); PCIDevice *dev = PCI_DEVICE(o); -- MST
Re: [PATCH] acpi: validate hotplug selector on access
On 12/21/21 15:48, Michael S. Tsirkin wrote: > When bus is looked up on a pci write, we didn't > validate that the lookup succeeded. > Fuzzers thus can trigger QEMU crash by dereferencing the NULL > bus pointer. > > Fixes: b32bd763a1 ("pci: introduce acpi-index property for PCI device") > Cc: "Igor Mammedov" > Fixes: https://gitlab.com/qemu-project/qemu/-/issues/770 > Signed-off-by: Michael S. Tsirkin > --- > hw/acpi/pcihp.c | 3 +++ > 1 file changed, 3 insertions(+) Reviewed-by: Philippe Mathieu-Daudé
[PATCH v3 02/15] mm/memfd: Introduce MFD_INACCESSIBLE flag
Introduce a new memfd_create() flag indicating the content of the created memfd is inaccessible from userspace. It does this by force setting F_SEAL_INACCESSIBLE seal when the file is created. It also set F_SEAL_SEAL to prevent future sealing, which means, it can not coexist with MFD_ALLOW_SEALING. Signed-off-by: Chao Peng --- include/uapi/linux/memfd.h | 1 + mm/memfd.c | 12 +++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/memfd.h b/include/uapi/linux/memfd.h index 7a8a26751c23..48750474b904 100644 --- a/include/uapi/linux/memfd.h +++ b/include/uapi/linux/memfd.h @@ -8,6 +8,7 @@ #define MFD_CLOEXEC0x0001U #define MFD_ALLOW_SEALING 0x0002U #define MFD_HUGETLB0x0004U +#define MFD_INACCESSIBLE 0x0008U /* * Huge page size encoding when MFD_HUGETLB is specified, and a huge page diff --git a/mm/memfd.c b/mm/memfd.c index 9f80f162791a..c898a007fb76 100644 --- a/mm/memfd.c +++ b/mm/memfd.c @@ -245,7 +245,8 @@ long memfd_fcntl(struct file *file, unsigned int cmd, unsigned long arg) #define MFD_NAME_PREFIX_LEN (sizeof(MFD_NAME_PREFIX) - 1) #define MFD_NAME_MAX_LEN (NAME_MAX - MFD_NAME_PREFIX_LEN) -#define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_HUGETLB) +#define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_HUGETLB | \ + MFD_INACCESSIBLE) SYSCALL_DEFINE2(memfd_create, const char __user *, uname, @@ -267,6 +268,10 @@ SYSCALL_DEFINE2(memfd_create, return -EINVAL; } + /* Disallow sealing when MFD_INACCESSIBLE is set. */ + if (flags & MFD_INACCESSIBLE && flags & MFD_ALLOW_SEALING) + return -EINVAL; + /* length includes terminating zero */ len = strnlen_user(uname, MFD_NAME_MAX_LEN + 1); if (len <= 0) @@ -315,6 +320,11 @@ SYSCALL_DEFINE2(memfd_create, *file_seals &= ~F_SEAL_SEAL; } + if (flags & MFD_INACCESSIBLE) { + file_seals = memfd_file_seals_ptr(file); + *file_seals &= F_SEAL_SEAL | F_SEAL_INACCESSIBLE; + } + fd_install(fd, file); kfree(name); return fd; -- 2.17.1
[PATCH v3 04/15] KVM: Extend the memslot to support fd-based private memory
Extend the memslot definition to provide fd-based private memory support by adding two new fields(fd/ofs). The memslot then can maintain memory for both shared and private pages in a single memslot. Shared pages are provided in the existing way by using userspace_addr(hva) field and get_user_pages() while private pages are provided through the new fields(fd/ofs). Since there is no 'hva' concept anymore for private memory we cannot call get_user_pages() to get a pfn, instead we rely on the newly introduced MEMFD_OPS callbacks to do the same job. This new extension is indicated by a new flag KVM_MEM_PRIVATE. Signed-off-by: Yu Zhang Signed-off-by: Chao Peng --- include/linux/kvm_host.h | 9 + include/uapi/linux/kvm.h | 12 2 files changed, 21 insertions(+) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 865a677baf52..96e46b288ecd 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -433,8 +433,17 @@ struct kvm_memory_slot { u32 flags; short id; u16 as_id; + struct file *file; + u64 file_ofs; }; +static inline bool kvm_slot_is_private(const struct kvm_memory_slot *slot) +{ + if (slot && (slot->flags & KVM_MEM_PRIVATE)) + return true; + return false; +} + static inline bool kvm_slot_dirty_track_enabled(struct kvm_memory_slot *slot) { return slot->flags & KVM_MEM_LOG_DIRTY_PAGES; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index e3706e574bd2..a2c1fb8c9843 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -103,6 +103,17 @@ struct kvm_userspace_memory_region { __u64 userspace_addr; /* start of the userspace allocated memory */ }; +struct kvm_userspace_memory_region_ext { + __u32 slot; + __u32 flags; + __u64 guest_phys_addr; + __u64 memory_size; /* bytes */ + __u64 userspace_addr; /* hva */ + __u64 ofs; /* offset into fd */ + __u32 fd; + __u32 padding[5]; +}; + /* * The bit 0 ~ bit 15 of kvm_memory_region::flags are visible for userspace, * other bits are reserved for kvm internal use which are defined in @@ -110,6 +121,7 @@ struct kvm_userspace_memory_region { */ #define KVM_MEM_LOG_DIRTY_PAGES(1UL << 0) #define KVM_MEM_READONLY (1UL << 1) +#define KVM_MEM_PRIVATE(1UL << 2) /* for KVM_IRQ_LINE */ struct kvm_irq_level { -- 2.17.1
[PATCH v3 01/15] mm/shmem: Introduce F_SEAL_INACCESSIBLE
From: "Kirill A. Shutemov" Introduce a new seal F_SEAL_INACCESSIBLE indicating the content of the file is inaccessible from userspace in any possible ways like read(),write() or mmap() etc. It provides semantics required for KVM guest private memory support that a file descriptor with this seal set is going to be used as the source of guest memory in confidential computing environments such as Intel TDX/AMD SEV but may not be accessible from host userspace. At this time only shmem implements this seal. Signed-off-by: Kirill A. Shutemov Signed-off-by: Chao Peng --- include/uapi/linux/fcntl.h | 1 + mm/shmem.c | 37 +++-- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index 2f86b2ad6d7e..e2bad051936f 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -43,6 +43,7 @@ #define F_SEAL_GROW0x0004 /* prevent file from growing */ #define F_SEAL_WRITE 0x0008 /* prevent writes */ #define F_SEAL_FUTURE_WRITE0x0010 /* prevent future writes while mapped */ +#define F_SEAL_INACCESSIBLE0x0020 /* prevent file from accessing */ /* (1U << 31) is reserved for signed error codes */ /* diff --git a/mm/shmem.c b/mm/shmem.c index 18f93c2d68f1..faa7e9b1b9bc 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1098,6 +1098,10 @@ static int shmem_setattr(struct user_namespace *mnt_userns, (newsize > oldsize && (info->seals & F_SEAL_GROW))) return -EPERM; + if ((info->seals & F_SEAL_INACCESSIBLE) && + (newsize & ~PAGE_MASK)) + return -EINVAL; + if (newsize != oldsize) { error = shmem_reacct_size(SHMEM_I(inode)->flags, oldsize, newsize); @@ -1364,6 +1368,8 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) goto redirty; if (!total_swap_pages) goto redirty; + if (info->seals & F_SEAL_INACCESSIBLE) + goto redirty; /* * Our capabilities prevent regular writeback or sync from ever calling @@ -2262,6 +2268,9 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma) if (ret) return ret; + if (info->seals & F_SEAL_INACCESSIBLE) + return -EPERM; + /* arm64 - allow memory tagging on RAM-based files */ vma->vm_flags |= VM_MTE_ALLOWED; @@ -2459,12 +2468,15 @@ shmem_write_begin(struct file *file, struct address_space *mapping, pgoff_t index = pos >> PAGE_SHIFT; /* i_rwsem is held by caller */ - if (unlikely(info->seals & (F_SEAL_GROW | - F_SEAL_WRITE | F_SEAL_FUTURE_WRITE))) { + if (unlikely(info->seals & (F_SEAL_GROW | F_SEAL_WRITE | + F_SEAL_FUTURE_WRITE | + F_SEAL_INACCESSIBLE))) { if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) return -EPERM; if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size) return -EPERM; + if (info->seals & F_SEAL_INACCESSIBLE) + return -EPERM; } return shmem_getpage(inode, index, pagep, SGP_WRITE); @@ -2538,6 +2550,21 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to) end_index = i_size >> PAGE_SHIFT; if (index > end_index) break; + + /* +* inode_lock protects setting up seals as well as write to +* i_size. Setting F_SEAL_INACCESSIBLE only allowed with +* i_size == 0. +* +* Check F_SEAL_INACCESSIBLE after i_size. It effectively +* serialize read vs. setting F_SEAL_INACCESSIBLE without +* taking inode_lock in read path. +*/ + if (SHMEM_I(inode)->seals & F_SEAL_INACCESSIBLE) { + error = -EPERM; + break; + } + if (index == end_index) { nr = i_size & ~PAGE_MASK; if (nr <= offset) @@ -2663,6 +2690,12 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset, goto out; } + if ((info->seals & F_SEAL_INACCESSIBLE) && + (offset & ~PAGE_MASK || len & ~PAGE_MASK)) { + error = -EINVAL; + goto out; + } + shmem_falloc.waitq = &shmem_falloc_waitq; shmem_falloc.start = (u64)unmap_start >> PAGE_SHIFT; shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT; -- 2.17.1