Re: [RFC v4 PATCH 18/49] multi-process: create IOHUB object to handle irq
On Thu, Oct 24, 2019 at 05:08:59AM -0400, Jagannathan Raman wrote: I don't know the interrupt code well enough to decide whether it's necessary to do so much work and tie the protocol to the KVM API. The main QEMU process already has the KVM API code and the ability to deal with these things. I was expecting something much simpler, like protocol messages that pass a single eventfd for raising an interrupt and no state tracking interrupt levels. > +static void intr_resample_handler(void *opaque) > +{ > +ResampleToken *token = opaque; > +RemoteIOHubState *iohub = token->iohub; > +uint64_t val; > +int pirq, s; > + > +pirq = token->pirq; > + > +s = read(event_notifier_get_fd(&iohub->resamplefds[pirq]), &val, > + sizeof(uint64_t)); Please use event_notifier_test_and_clear(). > + > +assert(s >= 0); > + > +qemu_mutex_lock(&iohub->irq_level_lock[pirq]); > + > +if (iohub->irq_level[pirq]) { > +event_notifier_set(&iohub->irqfds[pirq]); > +} > + > +qemu_mutex_unlock(&iohub->irq_level_lock[pirq]); > +} > + > +void process_set_irqfd_msg(PCIDevice *pci_dev, MPQemuMsg *msg) This function doesn't handle the case where SET_IRQFD is sent multiple times for the same interrupt gracefully. > +{ > +RemMachineState *machine = REMOTE_MACHINE(current_machine); > +RemoteIOHubState *iohub = machine->iohub; > +ResampleToken *token; > +int pirq = remote_iohub_map_irq(pci_dev, msg->data1.set_irqfd.intx); > + > +assert(msg->num_fds == 2); > + > +event_notifier_init_fd(&iohub->irqfds[pirq], msg->fds[0]); > +event_notifier_init_fd(&iohub->resamplefds[pirq], msg->fds[1]); event_notifier_cleanup() is missing. > + > +token = g_malloc0(sizeof(ResampleToken)); I couldn't find a g_free() and wonder if this needs to be malloced at all. signature.asc Description: PGP signature
Re: [PATCH v2] linux-user/strace: Add missing signal strings
On 11/20/19 3:55 PM, Helge Deller wrote: > Add the textual representations of some missing target signals. > > Signed-off-by: Helge Deller Reviewed-by: Richard Henderson r~
Re: [RFC v4 PATCH 19/49] multi-process: configure remote side devices
On Thu, Oct 24, 2019 at 05:09:00AM -0400, Jagannathan Raman wrote: > +static void set_remote_opts(PCIDevice *dev, QDict *qdict, unsigned int cmd) > +{ > +QString *qstr; > +MPQemuMsg msg; > +const char *str; > +PCIProxyDev *pdev; > + > +pdev = PCI_PROXY_DEV(dev); > + > +qstr = qobject_to_json(QOBJECT(qdict)); qstr is leaked. > +str = qstring_get_str(qstr); > + > +memset(&msg, 0, sizeof(MPQemuMsg)); > + > +msg.data2 = (uint8_t *)str; > +msg.cmd = cmd; > +msg.bytestream = 1; > +msg.size = qstring_get_length(qstr) + 1; > +msg.num_fds = 0; > + > +mpqemu_msg_send(pdev->mpqemu_link, &msg, pdev->mpqemu_link->com); > + > +return; > +} signature.asc Description: PGP signature
Re: [PATCH 11/15] RFC: s390x: Exit on vcpu reset error
On 20.11.19 12:43, Janosch Frank wrote: If a vcpu is not properly reset it might be better to just end the VM. Signed-off-by: Janosch Frank --- target/s390x/kvm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c index 190400df55..0210b54157 100644 --- a/target/s390x/kvm.c +++ b/target/s390x/kvm.c @@ -418,11 +418,13 @@ static void kvm_s390_reset_vcpu(S390CPU *cpu, unsigned long type) if (kvm_vcpu_ioctl(cs, KVM_S390_VCPU_RESET, type)) { error_report("CPU reset type %ld failed on CPU %i", type, cs->cpu_index); +exit(1); } return; } if (kvm_vcpu_ioctl(cs, KVM_S390_INITIAL_RESET, NULL)) { error_report("Initial CPU reset failed on CPU %i", cs->cpu_index); +exit(1); } } According to the comment in include/qapi/error.h "Please don't error_setg(&error_fatal, ...), use error_report() and exit(), because that's more obvious." This is the right thing to do. ... and it's a fairly pathological thing to happen either way. Reviewed-by: David Hildenbrand -- Thanks, David / dhildenb
Re: [RFC v4 PATCH 20/49] multi-process: add qdev_proxy_add to create proxy devices
On Thu, Oct 24, 2019 at 05:09:01AM -0400, Jagannathan Raman wrote: > diff --git a/hw/proxy/qemu-proxy.c b/hw/proxy/qemu-proxy.c > index 3b84055..fc1c731 100644 > --- a/hw/proxy/qemu-proxy.c > +++ b/hw/proxy/qemu-proxy.c > @@ -337,7 +337,8 @@ static void init_proxy(PCIDevice *dev, char *command, > bool need_spawn, Error **e > > if (!pdev->managed) { > if (need_spawn) { > -if (!remote_spawn(pdev, command, &local_error)) { > +if (remote_spawn(pdev, command, &local_error)) { > +fprintf(stderr, "remote spawn failed\n"); > return; > } > } Looks like a fix for a bug in a previous patch. Please squash it. Also, please propagate local_err and do not use fprintf in a function that has an errp argument for reporting errors. > +#if defined(CONFIG_MPQEMU) Maybe these functions should be in a separate file that the makefile includes when CONFIG_MPQEMU is defined. > + > +static PCIProxyDev *get_proxy_object_rid(const char *rid) > +{ > +PCIProxyDev *entry; > +if (!proxy_list_lock.initialized) { > +QLIST_INIT(&proxy_dev_list.devices); > +qemu_mutex_init(&proxy_list_lock); > +} This locking approach is broken since exactly-once initialization semantics are required to avoid races during initialization. Is the lock needed at all? > +DeviceState *qdev_remote_add(QemuOpts *opts, bool device, Error **errp) > +{ > +PCIProxyDev *pdev = NULL; > +DeviceState *dev; > +const char *rid, *rsocket = NULL, *command = NULL; > +QDict *qdict_new; > +const char *id = NULL; > +const char *driver = NULL; > +const char *bus = NULL; > + > +if (!proxy_list_lock.initialized) { > +QLIST_INIT(&proxy_dev_list.devices); > +qemu_mutex_init(&proxy_list_lock); > +} > + > +rid = qemu_opt_get(opts, "rid"); > +if (!rid) { > +error_setg(errp, "rdevice option needs rid specified."); > +return NULL; > +} > +if (device) { > +driver = qemu_opt_get(opts, "driver"); > +/* TODO: properly identify the device class. */ > +if (strncmp(driver, "lsi", 3) == 0) { > +id = qemu_opts_id(opts); > +if (!id) { > +error_setg(errp, "qdev_remote_add option needs id > specified."); > +return NULL; > +} > +} > +} > + > +rsocket = qemu_opt_get(opts, "socket"); > +if (rsocket) { > +if (strlen(rsocket) > MAX_RID_LENGTH) { > +error_setg(errp, "Socket number is incorrect."); > +return NULL; > +} > +} > +/* > + * TODO: verify command with known commands and on remote end. > + * How else can we verify the binary we launch without libvirtd support? > + */ > +command = qemu_opt_get(opts, "command"); > +if (!rsocket && !command) { > +error_setg(errp, "rdevice option needs socket or command > specified."); > +return NULL; > +} > + > +bus = qemu_opt_get(opts, "bus"); > +dev = qdev_proxy_add(rid, id, (char *)bus, (char *)command, > + rsocket ? atoi(rsocket) : -1, > + rsocket ? true : false, errp); > +if (!dev) { > +error_setg(errp, "qdev_proxy_add error."); > +return NULL; > +} > + > +qdict_new = qemu_opts_to_qdict(opts, NULL); > + > +if (!qdict_new) { > +error_setg(errp, "Could not parse rdevice options."); > +return NULL; > +} > + > +pdev = PCI_PROXY_DEV(dev); > +if (!pdev->set_remote_opts) { > +/* TODO: destroy proxy? */ > +error_setg(errp, "set_remote_opts failed."); > +return NULL; > +} else { > +if (id && !pdev->dev_id) { > +pdev->dev_id = g_strdup(id); > +} > +pdev->set_remote_opts(PCI_DEVICE(pdev), qdict_new, > + device ? DEV_OPTS : DRIVE_OPTS); This function needs to be able to return an error if setting the options failed. A response message needs to be defined in the protocol to support this. Is DRIVE_OPTS still needed? I thought the drives would be configured in the remote process and no proxy objects were needed on the QEMU side? signature.asc Description: PGP signature
Re: [PATCH] Fix incorrect int->float conversions caught by clang -Wimplicit-int-float-conversion
On 11/20/19 6:30 PM, Fangrui Song wrote: > On 2019-11-20, Juan Quintela wrote: >> Markus Armbruster wrote: >>> Fangrui Song writes: >>> The warning will be enabled by default in clang 10. It is not available for clang <= 9. qemu/migration/migration.c:2038:24: error: implicit conversion from 'long' to 'double' changes value from 9223372036854775807 to 9223372036854775808 [-Werror,-Wimplicit-int-float-conversion] ... qemu/util/cutils.c:245:23: error: implicit conversion from 'unsigned long' to 'double' changes value from 18446744073709550592 to 18446744073709551616 [-Werror,-Wimplicit-int-float-conversion] Signed-off-by: Fangrui Song --- migration/migration.c | 4 ++-- util/cutils.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 354ad072fa..ac3ea2934a 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -53,6 +53,7 @@ #include "monitor/monitor.h" #include "net/announce.h" #include "qemu/queue.h" +#include #define MAX_THROTTLE (32 << 20) /* Migration transfer speed throttling */ @@ -2035,11 +2036,10 @@ void qmp_migrate_set_downtime(double value, Error **errp) >>> if (value < 0 || value > MAX_MIGRATE_DOWNTIME_SECONDS) { >>> error_setg(errp, "Parameter 'downtime_limit' expects an integer >>> in " >>> "the range of 0 to %d seconds", >>> MAX_MIGRATE_DOWNTIME_SECONDS); >>> return; } >>> >>> @value is now in [0,2000]. >>> value *= 1000; /* Convert to milliseconds */ >>> >>> @value is in [0,200] >>> - value = MAX(0, MIN(INT64_MAX, value)); >>> >>> This does nothing. >>> MigrateSetParameters p = { .has_downtime_limit = true, - .downtime_limit = value, + .downtime_limit = (int64_t)fmin(value, nextafter(0x1p63, 0)), >>> >>> This does nothing and is hard to read :) >>> >>> Can we simply drop the offending line statement instead? >> >> Agreed aboutdropping the whole bussines for migration. >> >> }; qmp_migrate_set_parameters(&p, errp); diff --git a/util/cutils.c b/util/cutils.c index fd591cadf0..2b4484c015 100644 --- a/util/cutils.c +++ b/util/cutils.c @@ -239,10 +239,10 @@ static int do_strtosz(const char *nptr, const char **end, goto out; } /* - * Values >= 0xfc00 overflow uint64_t after their trip + * Values > nextafter(0x1p64, 0) overflow uint64_t after their trip * through double (53 bits of precision). */ - if ((val * mul >= 0xfc00) || val < 0) { + if ((val * mul > nextafter(0x1p64, 0)) || val < 0) { retval = -ERANGE; goto out; } >> >> This comment was really bad (it says the same that the code). >> On the other hand, I can *kind of* understand what does 0x> f's here>. >> >> But I am at a complete loss about what value is: >> >> nextafter(0x1p64, 0). >> >> Can we put what value is that instead? > > It is a C99 hexadecimal floating-point literal. > 0x1p64 represents hex fraction 1.0 scaled by 2**64, that is 2**64. > > We can write this as `val * mul > 0xf800p0`, but I feel that > counting the number of f's is error-prone and is not fun. > > (We cannot use val * mul >= 0x1p64. > If FLT_EVAL_METHOD == 2, the intermediate computation val * mul will be > performed at long double precision, val * mul may not by representable > by a double and will overflow as (double)0x1p64.) I agree about not spelling out the f's, or the 0x800 at the end. That's something that the compiler can do for us, resolving this standard library function at compile-time. We just need a better comment. Perhaps: /* * Values near UINT64_MAX overflow to 2**64 when converting * to double precision. Compare against the maximum representable * double precision value below 2**64, computed as "the next value * after 2**64 (0x1p64) in the direction of 0". */ r~
Re: [PATCH 01/15] s390x: Cleanup cpu resets
On Thu, 21 Nov 2019 12:32:38 +0100 Janosch Frank wrote: > On 11/21/19 12:10 PM, Cornelia Huck wrote: > > On Wed, 20 Nov 2019 06:43:20 -0500 > > Janosch Frank wrote: > > > >> Let's move the resets into one function and switch by type, so we can > >> use fallthroughs for shared reset actions. > > > > Doing that makes sense. > > > >> > >> Signed-off-by: Janosch Frank > >> --- > >> hw/s390x/s390-virtio-ccw.c | 3 + > >> target/s390x/cpu.c | 111 - > >> 2 files changed, 52 insertions(+), 62 deletions(-) > >> > >> diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c > >> index d3edeef0ad..c1d1440272 100644 > >> --- a/hw/s390x/s390-virtio-ccw.c > >> +++ b/hw/s390x/s390-virtio-ccw.c > >> @@ -348,6 +348,9 @@ static void s390_machine_reset(MachineState *machine) > >> break; > >> case S390_RESET_LOAD_NORMAL: > >> CPU_FOREACH(t) { > >> +if (t == cs) { > >> +continue; > >> +} > > > > Hm, why is this needed now? > > The Ultravisor checks which reset is done to which cpu. > So blindly resetting the calling cpu with a normal reset to then do a > clear/initial reset will return an error. Let's split this change out, then? The rest of the patch is a simple refactoring. pgpQVMjtHWUrD.pgp Description: OpenPGP digital signature
Re: [RFC v4 PATCH 30/49] multi-process: send heartbeat messages to remote
On Wed, Nov 13, 2019 at 11:01:07AM -0500, Jag Raman wrote: > > > On 11/11/2019 11:27 AM, Stefan Hajnoczi wrote: > > On Thu, Oct 24, 2019 at 05:09:11AM -0400, Jagannathan Raman wrote: > > > +static void broadcast_msg(MPQemuMsg *msg, bool need_reply) > > > +{ > > > +PCIProxyDev *entry; > > > +unsigned int pid; > > > +int wait; > > > + > > > +QLIST_FOREACH(entry, &proxy_dev_list.devices, next) { > > > +if (need_reply) { > > > +wait = eventfd(0, EFD_NONBLOCK); > > > +msg->num_fds = 1; > > > +msg->fds[0] = wait; > > > +} > > > + > > > +mpqemu_msg_send(entry->mpqemu_link, msg, > > > entry->mpqemu_link->com); > > > +if (need_reply) { > > > +pid = (uint32_t)wait_for_remote(wait); > > > > Sometimes QEMU really needs to wait for the remote process before it can > > make progress. I think this is not one of those cases though. > > > > Since QEMU is event-driven it's problematic to invoke blocking system > > calls. The remote process might not respond for a significant amount of > > time. Other QEMU threads will be held up waiting for the QEMU global > > mutex in the meantime (because we hold it!). > > There are places where we wait synchronously for the remote process. > However, these synchronous waits carry a timeout to prevent the hang > situation you described above. > > We will add an error recovery in the future. That is, we will respawn > the remote process if the QEMU times out waiting for it. Even with a timeout, in the meantime the event loop is blocked. That means timers will be delayed by a large amount, the monitor will be unresponsive, etc. > > > > Please implement heartbeat/ping asynchronously. The wait eventfd should > > be read by an event loop fd handler instead. That way QEMU can continue > > with running the VM while waiting for the remote process. > > In the current implementation, the heartbeat/ping is asynchronous. > start_heartbeat_timer() sets up a timer to perform the ping. The heartbeat/ping is synchronous because broadcast_msg() blocks. Stefan signature.asc Description: PGP signature
Re: [PATCH 11/15] RFC: s390x: Exit on vcpu reset error
On 11/21/19 1:14 PM, David Hildenbrand wrote: > On 20.11.19 12:43, Janosch Frank wrote: >> If a vcpu is not properly reset it might be better to just end the VM. >> >> Signed-off-by: Janosch Frank >> --- >> target/s390x/kvm.c | 2 ++ >> 1 file changed, 2 insertions(+) >> >> diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c >> index 190400df55..0210b54157 100644 >> --- a/target/s390x/kvm.c >> +++ b/target/s390x/kvm.c >> @@ -418,11 +418,13 @@ static void kvm_s390_reset_vcpu(S390CPU *cpu, unsigned >> long type) >> if (kvm_vcpu_ioctl(cs, KVM_S390_VCPU_RESET, type)) { >> error_report("CPU reset type %ld failed on CPU %i", >>type, cs->cpu_index); >> +exit(1); >> } >> return; >> } >> if (kvm_vcpu_ioctl(cs, KVM_S390_INITIAL_RESET, NULL)) { >> error_report("Initial CPU reset failed on CPU %i", cs->cpu_index); >> +exit(1); >> } >> } >> >> > > According to the comment in include/qapi/error.h > > "Please don't error_setg(&error_fatal, ...), use error_report() and > exit(), because that's more obvious." > > This is the right thing to do. > > ... and it's a fairly pathological thing to happen either way. > > Reviewed-by: David Hildenbrand > Do we want to have that separate or should I squash it into the reset changes? signature.asc Description: OpenPGP digital signature
Re: [PATCH 11/15] RFC: s390x: Exit on vcpu reset error
On 21.11.19 13:19, Janosch Frank wrote: On 11/21/19 1:14 PM, David Hildenbrand wrote: On 20.11.19 12:43, Janosch Frank wrote: If a vcpu is not properly reset it might be better to just end the VM. Signed-off-by: Janosch Frank --- target/s390x/kvm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c index 190400df55..0210b54157 100644 --- a/target/s390x/kvm.c +++ b/target/s390x/kvm.c @@ -418,11 +418,13 @@ static void kvm_s390_reset_vcpu(S390CPU *cpu, unsigned long type) if (kvm_vcpu_ioctl(cs, KVM_S390_VCPU_RESET, type)) { error_report("CPU reset type %ld failed on CPU %i", type, cs->cpu_index); +exit(1); } return; } if (kvm_vcpu_ioctl(cs, KVM_S390_INITIAL_RESET, NULL)) { error_report("Initial CPU reset failed on CPU %i", cs->cpu_index); +exit(1); } } According to the comment in include/qapi/error.h "Please don't error_setg(&error_fatal, ...), use error_report() and exit(), because that's more obvious." This is the right thing to do. ... and it's a fairly pathological thing to happen either way. Reviewed-by: David Hildenbrand Do we want to have that separate or should I squash it into the reset changes? I' keep it separated. -- Thanks, David / dhildenb
Re: [PATCH v2 2/6] block: truncate: Don't make backing file data visible
On 21.11.19 12:34, Kevin Wolf wrote: > Am 21.11.2019 um 09:59 hat Max Reitz geschrieben: >> On 20.11.19 19:44, Kevin Wolf wrote: >>> When extending the size of an image that has a backing file larger than >>> its old size, make sure that the backing file data doesn't become >>> visible in the guest, but the added area is properly zeroed out. >>> >>> Consider the following scenario where the overlay is shorter than its >>> backing file: >>> >>> base.qcow2: >>> overlay.qcow2: >>> >>> When resizing (extending) overlay.qcow2, the new blocks should not stay >>> unallocated and make the additional As from base.qcow2 visible like >>> before this patch, but zeros should be read. >>> >>> A similar case happens with the various variants of a commit job when an >>> intermediate file is short (- for unallocated): >>> >>> base.qcow2: A-A- >>> mid.qcow2: BB-B >>> top.qcow2: C--C--C- >>> >>> After commit top.qcow2 to mid.qcow2, the following happens: >>> >>> mid.qcow2: CB-C00C0 (correct result) >>> mid.qcow2: CB-C--C- (before this fix) >>> >>> Without the fix, blocks that previously read as zeros on top.qcow2 >>> suddenly turn into A. >>> >>> Signed-off-by: Kevin Wolf >>> --- >>> block/io.c | 32 >>> 1 file changed, 32 insertions(+) >> >> Zeroing the intersection may take some time. So is it right for QMP’s >> block_resize to do this, seeing it is a synchronous operation? > > What else would be right? Returning an error? Going through a deprecation cycle. > Common cases (raw and qcow2 v3 without external data files) are quick > anyway. Well, but quick enough for a fully blocking operation? >> As far as I can tell, jobs actually have the same problem. I don’t >> think mirror or commit have a pause point before truncating, so they >> still block the monitor there, don’t they? > > Do you really need a pause point? They call bdrv_co_truncate() from > inside the job coroutine, so it will yield. I would expect that this > is enough. OK then. > But in fact, all jobs have a pause point before even calling .run(), so > even if that made a difference, it should still be fine. Good. But I believe this is still a problem for block_resize. I don’t see why this needs to be fixed in 4.2-rc3. What’s the problem with going through a proper deprecation cycle other than the fact that we can’t start it in 4.2 because we don’t have a resize job yet? Max signature.asc Description: OpenPGP digital signature
Re: [RFC v4 PATCH 32/49] multi-process: Use separate MMIO communication channel
On Wed, Nov 13, 2019 at 11:14:50AM -0500, Jag Raman wrote: > On 11/11/2019 11:21 AM, Stefan Hajnoczi wrote: > > On Thu, Oct 24, 2019 at 05:09:13AM -0400, Jagannathan Raman wrote: > > > Using a separate communication channel for MMIO helps > > > with improving Performance > > > > Why? > > Typical initiation of IO operations involves multiple MMIO accesses per > IO operation. In some legacy devices like LSI, the completion of the IO > operations is also accomplished by polling on MMIO registers. Therefore, > MMIO traffic can be hefty in some cases and contribute to Performance. > > Having a dedicated channel for MMIO ensures that it doesn't have to > compete with other messages to the remote process, especially when there > are multiple devices emulated by a single remote process. A vCPU doing a polling read on an MMIO register will cause a BAR_READ message to be sent to the remote process. The vCPU thread waits for the response to this message. When there are multiple remote devices each has its own socket, so communication with different remote processes does not interfere. The only scenarios I can think of are: 1. Interference within a single device between vCPUs and/or the QEMU monitor. 2. A single process serving multiple devices that is implemented in a way such that different devices interfere with each other. It sounds like you are saying the problem is #2, but this is still unclear to me. If the remote process can be implemented in a way such that there is no interference when each device has a special MMIO socket, then why can't it be implemented in a way such that there is no interference when each device's main socket is used (each device has it's own!). Maybe I've missed the point. It would be good if you could explain in more detail. Stefan signature.asc Description: PGP signature
Re: [RFC v4 PATCH 00/49] Initial support of multi-process qemu
On Thu, Oct 24, 2019 at 05:08:41AM -0400, Jagannathan Raman wrote: > Started with the presentation in October 2017 made by Marc-Andre (Red Hat) > and Konrad Wilk (Oracle) [1], and continued by Jag's BoF at KVM Forum 2018, > the multi-process project is now a prototype and presented in this patchset. > John & Elena will present the status of this project in KVM Forum 2019. > > This first series enables the emulation of lsi53c895a in a separate process. > > We posted the Proof Of Concept patches [2] before the BoF session in 2018. > Subsequently, we posted RFC v1 [3], RFC v2 [4] and RFC v3 [5] of this series. > > We want to present version 4 of this series, which incorporates the feedback > we received for v3 & adds support for live migrating the remote process. > > Following people contributed to this patchset: > > John G Johnson > Jagannathan Raman > Elena Ufimtseva > Kanth Ghatraju > > For full concept writeup about QEMU disaggregation refer to > docs/devel/qemu-multiprocess.rst. Please refer to > docs/qemu-multiprocess.txt for usage information. > > We are planning on making the following improvements in the future: > - Performance improvements > - Libvirt support > - Enforcement of security policies > - blockdev support > > We welcome all your ideas, concerns, and questions for this patchset. > > Thank you! I've wrapped up for v4. There is more to review in detail but I've posted enough comments so that I'd like to see the next revision before investing more time. The main topics: 1. It's possible to have just one proxy device per bus type (PCI, USB, etc). The proxy device instance can be initialized by communicating with the remote process to inquire about its memory regions, interrupts, etc. This removes the need to hardcode that information into per-device proxy objects, which is tedious and can get out-of-sync with the real device emulation code. This is becoming similar to doing VFIO or muser over a socket... 2. Security and code quality. Missing input validation and resource leaks don't inspire confidence :(. Please run scripts/checkpatch.pl on the code. Stefan signature.asc Description: PGP signature
Re: [PULL for rc3 0/5] a few doc and testing tweaks
On Wed, 20 Nov 2019 at 10:58, Alex Bennée wrote: > > The following changes since commit 39e2821077e6dcf788b7c2a9ef50970ec7995437: > > Update version for v4.2.0-rc2 release (2019-11-19 19:34:10 +) > > are available in the Git repository at: > > https://github.com/stsquad/qemu.git tags/pull-rc3-testing-and-tcg-201119-1 > > for you to fetch changes up to 22c30b2d20e828edadbd992c1d4c4bfd0f3433ba: > > tests/tcg: modify multiarch tests to work with clang (2019-11-20 10:53:31 > +) > > > A few test and doc fixes: > > - tweak DEBUG behaviour for vm-test-build > - rename and update plug docs for versioning > - slim down MAIN_SOFTMMU_TARGETS > Applied, thanks. Please update the changelog at https://wiki.qemu.org/ChangeLog/4.2 for any user-visible changes. -- PMM
Re: [PATCH 03/15] s390x: protvirt: Add diag308 subcodes 8 - 10
On Wed, 20 Nov 2019 06:43:22 -0500 Janosch Frank wrote: > For diag308 subcodes 8 - 10 we have a new ipib of type 5. The ipib > holds the address and length of the secure execution header, as well > as a list of guest components. > > Each component is a block of memory, for example kernel or initrd, > which needs to be decrypted by the Ultravisor in order to run a > protected VM. The secure execution header instructs the Ultravisor on > how to handle the protected VM and its components. > > Subcodes 8 and 9 are similiar to 5 and 6 and subcode 10 will finally > start the protected guest. > > Subcodes 8-10 are not valid in protected mode, we have to do a subcode > 3 and then the 8 and 10 combination for a protected reboot. > > Signed-off-by: Janosch Frank > --- > hw/s390x/ipl.c | 48 ++--- > hw/s390x/ipl.h | 33 +++ > target/s390x/diag.c | 26 ++-- > 3 files changed, 102 insertions(+), 5 deletions(-) > > diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c > index ca544d64c5..a077926f36 100644 > --- a/hw/s390x/ipl.c > +++ b/hw/s390x/ipl.c > @@ -529,15 +529,56 @@ static bool is_virtio_scsi_device(IplParameterBlock > *iplb) > return is_virtio_ccw_device_of_type(iplb, VIRTIO_ID_SCSI); > } > > +int s390_ipl_pv_check_comp(IplParameterBlock *iplb) s390_ipl_pv_check_components() ? > +{ > +int i; > +IPLBlockPV *ipib_pv = &iplb->pv; > + > +if (ipib_pv->num_comp == 0) { > +return -EINVAL; > +} > + > +for (i = 0; i < ipib_pv->num_comp; i++) { > + > +/* Addr must be 4k aligned */ > +if (ipib_pv->components[i].addr & ~TARGET_PAGE_MASK) { > +return -EINVAL; > +} > + > +/* Tweak prefix is monotonously increasing with each component */ > +if (i < ipib_pv->num_comp - 1 && > +ipib_pv->components[i].tweak_pref > > +ipib_pv->components[i + 1].tweak_pref) { > +return -EINVAL; > +} > +} > +return 1; > +} > + (...) > @@ -185,4 +211,11 @@ static inline bool iplb_valid_fcp(IplParameterBlock > *iplb) > iplb->pbt == S390_IPL_TYPE_FCP; > } > > +static inline bool iplb_valid_se(IplParameterBlock *iplb) iplb_valid_pv() ? > +{ > +return be32_to_cpu(iplb->len) >= S390_IPLB_MIN_PV_LEN && > + iplb->pbt == S390_IPL_TYPE_PV; > +} > + > + > #endif (...) > @@ -105,6 +110,7 @@ void handle_diag_308(CPUS390XState *env, uint64_t r1, > uint64_t r3, uintptr_t ra) > s390_ipl_reset_request(cs, S390_RESET_REIPL); > break; > case DIAG308_SET: > +case DIAG308_PV_SET: /* Set SE parms */ PV parms? ('SE' makes me think of 'service element' :) > if (diag308_parm_check(env, r1, addr, ra, false)) { > return; > } > @@ -117,7 +123,8 @@ void handle_diag_308(CPUS390XState *env, uint64_t r1, > uint64_t r3, uintptr_t ra) > > cpu_physical_memory_read(addr, iplb, be32_to_cpu(iplb->len)); > > -if (!iplb_valid_ccw(iplb) && !iplb_valid_fcp(iplb)) { > +if (!iplb_valid_ccw(iplb) && !iplb_valid_fcp(iplb) && > +!(iplb_valid_se(iplb) && s390_ipl_pv_check_comp(iplb) >= 0)) { > env->regs[r1 + 1] = DIAG_308_RC_INVALID; > goto out; > } > @@ -128,10 +135,15 @@ out: > g_free(iplb); > return; > case DIAG308_STORE: > +case DIAG308_PV_STORE: /* Get SE parms */ Same here. > if (diag308_parm_check(env, r1, addr, ra, true)) { > return; > } > -iplb = s390_ipl_get_iplb(); > +if (subcode == DIAG308_PV_STORE) { > +iplb = s390_ipl_get_iplb_secure(); > +} else { > +iplb = s390_ipl_get_iplb(); > +} > if (iplb) { > cpu_physical_memory_write(addr, iplb, be32_to_cpu(iplb->len)); > env->regs[r1 + 1] = DIAG_308_RC_OK; > @@ -139,6 +151,16 @@ out: > env->regs[r1 + 1] = DIAG_308_RC_NO_CONF; > } > return; > +break; > +case DIAG308_PV_START: /* SE start */ Also here :) > +iplb = s390_ipl_get_iplb_secure(); > +if (!iplb_valid_se(iplb)) { > +env->regs[r1 + 1] = DIAG_308_RC_NO_PV_CONF; > +return; > +} > + > +s390_ipl_reset_request(cs, S390_RESET_PV); > +break; > default: > s390_program_interrupt(env, PGM_SPECIFICATION, ra); > break; Did not spot anything else.
Re: QEMU HTML documentation now on qemu.org
On Fri, Nov 15, 2019 at 09:05:32AM -0500, G 3 wrote: > > You can now access the latest QEMU HTML documentation built from > > https://wiki.qemu.org/docs/qemu-doc.html > > > This is a welcome start. Could we add version support to the URL? > > What I mean is add the version number to the path of the URL. > Like this: > https://wiki.qemu.org/docs/4.2/qemu-doc.html > > This way users of older versions of QEMU can still access documentation > relevant to his or her version. The current setup only builds qemu.git/master. The final URL is now: https://www.qemu.org/docs/master/qemu-doc.html Mike Roth could add something to the release process that publishes versioned HTML documentation. There probably won't be much activity around this until after QEMU 4.2 has been released. My patches haven't been reviewed/merged because of the code freeze. Stefan signature.asc Description: PGP signature
Re: [qemu-web PATCH] Add device fuzzing blog post
On Tue, Nov 12, 2019 at 12:39:49PM +0100, Thomas Huth wrote: > On 07/11/2019 16.48, Stefan Hajnoczi wrote: > > On Thu, Nov 7, 2019 at 10:43 AM Thomas Huth wrote: > >> > >> - Original Message - > >>> From: "Stefan Hajnoczi" > >>> Sent: Thursday, November 7, 2019 10:11:36 AM > >>> > >>> This blog post covers the device fuzzing GSoC project that Alexander > >>> Olenik did in 2019. > [...] > >> Seems like the images are missing ... can you please attach them? > > > > The commit is available with .png files here: > > https://github.com/stefanha/qemu-web/commit/49efe1b254460a92c6348e1981caf3e1320782f8 > > > > I moved the authorship information into the author: field. > > The article is online now: > > https://www.qemu.org/2019/11/07/device-fuzzing/ Thank you! Stefan signature.asc Description: PGP signature
Re: [PATCH 01/15] s390x: Cleanup cpu resets
On 20/11/2019 12.43, Janosch Frank wrote: > Let's move the resets into one function and switch by type, so we can > use fallthroughs for shared reset actions. > > Signed-off-by: Janosch Frank > --- > hw/s390x/s390-virtio-ccw.c | 3 + > target/s390x/cpu.c | 111 - > 2 files changed, 52 insertions(+), 62 deletions(-) [...] > diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c > index 3abe7e80fd..10d5b915d8 100644 > --- a/target/s390x/cpu.c > +++ b/target/s390x/cpu.c > @@ -82,67 +82,53 @@ static void s390_cpu_load_normal(CPUState *s) > } > #endif > > -/* S390CPUClass::cpu_reset() */ > -static void s390_cpu_reset(CPUState *s) > +enum { > +S390_CPU_RESET_NORMAL, > +S390_CPU_RESET_INITIAL, > +S390_CPU_RESET_CLEAR, > +}; > + > +static void s390_cpu_reset(CPUState *s, uint8_t type) Please give the enum a name and use that instead of uint8_t for "type". Or at least make it an "int". uint8_t is not really appropriate here. > { > S390CPU *cpu = S390_CPU(s); > S390CPUClass *scc = S390_CPU_GET_CLASS(cpu); > CPUS390XState *env = &cpu->env; > > -env->pfault_token = -1UL; > -env->bpbc = false; > scc->parent_reset(s); > cpu->env.sigp_order = 0; > s390_cpu_set_state(S390_CPU_STATE_STOPPED, cpu); > -} > > -/* S390CPUClass::initial_reset() */ > -static void s390_cpu_initial_reset(CPUState *s) > -{ > -S390CPU *cpu = S390_CPU(s); > -CPUS390XState *env = &cpu->env; > +/* Set initial values after clearing */ > +switch (type) { > +case S390_CPU_RESET_CLEAR: > +/* Fallthrough will clear the rest */ I think you could drop the above comment, since /* Fallthrough */ two lines later should be enough. > +memset(env, 0, offsetof(CPUS390XState, start_initial_reset_fields)); > +/* Fallthrough */ > +case S390_CPU_RESET_INITIAL: > +memset(&env->start_initial_reset_fields, 0, > + offsetof(CPUS390XState, end_reset_fields) - > + offsetof(CPUS390XState, start_initial_reset_fields)); > +/* architectured initial values for CR 0 and 14 */ > +env->cregs[0] = CR0_RESET; > +env->cregs[14] = CR14_RESET; > > -s390_cpu_reset(s); > -/* initial reset does not clear everything! */ > -memset(&env->start_initial_reset_fields, 0, > -offsetof(CPUS390XState, end_reset_fields) - > -offsetof(CPUS390XState, start_initial_reset_fields)); > - > -/* architectured initial values for CR 0 and 14 */ > -env->cregs[0] = CR0_RESET; > -env->cregs[14] = CR14_RESET; > - > -/* architectured initial value for Breaking-Event-Address register */ > -env->gbea = 1; > - > -env->pfault_token = -1UL; > - > -/* tininess for underflow is detected before rounding */ > -set_float_detect_tininess(float_tininess_before_rounding, > - &env->fpu_status); > +/* architectured initial value for Breaking-Event-Address register */ > +env->gbea = 1; > +/* tininess for underflow is detected before rounding */ > +set_float_detect_tininess(float_tininess_before_rounding, > + &env->fpu_status); > +/* Fallthrough */ > +case S390_CPU_RESET_NORMAL: > +env->pfault_token = -1UL; > +env->bpbc = false; > +break; > +} > > /* Reset state inside the kernel that we cannot access yet from QEMU. */ > -if (kvm_enabled()) { > -kvm_s390_reset_vcpu(cpu); > +if (kvm_enabled() && (type == S390_CPU_RESET_CLEAR || > + type == S390_CPU_RESET_INITIAL)) { > +kvm_s390_reset_vcpu(cpu); > } Why don't you simply move that into the switch-case statement, too? [...] Anyway, re-using code is of course a good idea, but I wonder whether it would be nicer to keep most things in place, and then simply chain the functions like this: static void s390_cpu_reset_normal(CPUState *s) { ... } static void s390_cpu_reset_initial(CPUState *s) { ... s390_cpu_reset_normal(s); ... } static void s390_cpu_reset_clear(CPUState *s) { ... s390_cpu_reset_initial() ... } Just my 0.02 €, but at least for me, that's easier to understand than a switch-case statement with fallthroughs inbetween. Thomas
Re: [PATCH 04/15] Header sync protvirt
On Wed, 20 Nov 2019 06:43:23 -0500 Janosch Frank wrote: > Let's sync all the protvirt header changes > > Signed-off-by: Janosch Frank > --- > linux-headers/asm-s390/kvm.h | 3 ++- > linux-headers/linux/kvm.h| 42 > 2 files changed, 44 insertions(+), 1 deletion(-) > > diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h > index 18892d6541..d031051601 100644 > --- a/linux-headers/linux/kvm.h > +++ b/linux-headers/linux/kvm.h > @@ -995,6 +995,8 @@ struct kvm_ppc_resize_hpt { > #define KVM_CAP_ARM_SVE 170 > #define KVM_CAP_ARM_PTRAUTH_ADDRESS 171 > #define KVM_CAP_ARM_PTRAUTH_GENERIC 172 > +#define KVM_CAP_S390_PROTECTED 180 > +#define KVM_CAP_S390_VCPU_RESETS 181 Hm, where does this cap come from? I did not see it in the kernel patches. > > #ifdef KVM_CAP_IRQ_ROUTING >
Re: [PATCH v2 6/6] iotests: Test committing to short backing file
21.11.2019 14:39, Kevin Wolf wrote: > Am 21.11.2019 um 11:30 hat Vladimir Sementsov-Ogievskiy geschrieben: >> 21.11.2019 13:28, Vladimir Sementsov-Ogievskiy wrote: >>> 20.11.2019 21:45, Kevin Wolf wrote: Signed-off-by: Kevin Wolf >>> >>> Hmm, allocating 7G will break tests on small disks, for example, >>> on my 2G tmpfs. >>> >>> So, we probably should >>> detect "+qemu-img: Failed to resize underlying file: Could not write zeros >>> for preallocation: No space left on device" >>> errors and skip test. (better skip testcases, but it's not possible for >>> text-comparing tests :( >> >> Or could you just use smaller disks for resize? What is the purpose of such >> a big size? > > I want to exceed a 32 bit byte count for the write_zeroes operation so > that it would break if patch 1 were missing. I guess I could reduce it > to a little over 4 GB, but not less. > > Hm, though that is only for preallocation=off, which shouldn't actually > allocate the space anyway. I could use smaller sizes for falloc and > full, even though I'm not sure if we're still testing these modes with > sizes larger than INT_MAX anywhere. > Yes, preallocation=off works for me with big disk, so this should work. -- Best regards, Vladimir
Re: [PATCH 01/15] s390x: Cleanup cpu resets
On 11/21/19 1:53 PM, Thomas Huth wrote: > On 20/11/2019 12.43, Janosch Frank wrote: >> Let's move the resets into one function and switch by type, so we can >> use fallthroughs for shared reset actions. >> >> Signed-off-by: Janosch Frank >> --- >> hw/s390x/s390-virtio-ccw.c | 3 + >> target/s390x/cpu.c | 111 - >> 2 files changed, 52 insertions(+), 62 deletions(-) > [...] >> diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c >> index 3abe7e80fd..10d5b915d8 100644 >> --- a/target/s390x/cpu.c >> +++ b/target/s390x/cpu.c >> @@ -82,67 +82,53 @@ static void s390_cpu_load_normal(CPUState *s) >> } >> #endif >> >> -/* S390CPUClass::cpu_reset() */ >> -static void s390_cpu_reset(CPUState *s) >> +enum { >> +S390_CPU_RESET_NORMAL, >> +S390_CPU_RESET_INITIAL, >> +S390_CPU_RESET_CLEAR, >> +}; >> + >> +static void s390_cpu_reset(CPUState *s, uint8_t type) > > Please give the enum a name and use that instead of uint8_t for "type". > Or at least make it an "int". uint8_t is not really appropriate here. Sure > >> { >> S390CPU *cpu = S390_CPU(s); >> S390CPUClass *scc = S390_CPU_GET_CLASS(cpu); >> CPUS390XState *env = &cpu->env; >> >> -env->pfault_token = -1UL; >> -env->bpbc = false; >> scc->parent_reset(s); >> cpu->env.sigp_order = 0; >> s390_cpu_set_state(S390_CPU_STATE_STOPPED, cpu); >> -} >> >> -/* S390CPUClass::initial_reset() */ >> -static void s390_cpu_initial_reset(CPUState *s) >> -{ >> -S390CPU *cpu = S390_CPU(s); >> -CPUS390XState *env = &cpu->env; >> +/* Set initial values after clearing */ >> +switch (type) { >> +case S390_CPU_RESET_CLEAR: >> +/* Fallthrough will clear the rest */ > > I think you could drop the above comment, since /* Fallthrough */ two > lines later should be enough. > >> +memset(env, 0, offsetof(CPUS390XState, start_initial_reset_fields)); >> +/* Fallthrough */ >> +case S390_CPU_RESET_INITIAL: >> +memset(&env->start_initial_reset_fields, 0, >> + offsetof(CPUS390XState, end_reset_fields) - >> + offsetof(CPUS390XState, start_initial_reset_fields)); >> +/* architectured initial values for CR 0 and 14 */ >> +env->cregs[0] = CR0_RESET; >> +env->cregs[14] = CR14_RESET; >> >> -s390_cpu_reset(s); >> -/* initial reset does not clear everything! */ >> -memset(&env->start_initial_reset_fields, 0, >> -offsetof(CPUS390XState, end_reset_fields) - >> -offsetof(CPUS390XState, start_initial_reset_fields)); >> - >> -/* architectured initial values for CR 0 and 14 */ >> -env->cregs[0] = CR0_RESET; >> -env->cregs[14] = CR14_RESET; >> - >> -/* architectured initial value for Breaking-Event-Address register */ >> -env->gbea = 1; >> - >> -env->pfault_token = -1UL; >> - >> -/* tininess for underflow is detected before rounding */ >> -set_float_detect_tininess(float_tininess_before_rounding, >> - &env->fpu_status); >> +/* architectured initial value for Breaking-Event-Address register >> */ >> +env->gbea = 1; >> +/* tininess for underflow is detected before rounding */ >> +set_float_detect_tininess(float_tininess_before_rounding, >> + &env->fpu_status); >> +/* Fallthrough */ >> +case S390_CPU_RESET_NORMAL: >> +env->pfault_token = -1UL; >> +env->bpbc = false; >> +break; >> +} >> >> /* Reset state inside the kernel that we cannot access yet from QEMU. */ >> -if (kvm_enabled()) { >> -kvm_s390_reset_vcpu(cpu); >> +if (kvm_enabled() && (type == S390_CPU_RESET_CLEAR || >> + type == S390_CPU_RESET_INITIAL)) { >> +kvm_s390_reset_vcpu(cpu); >> } > > Why don't you simply move that into the switch-case statement, too? There was a reason for that, time to load it from cold storage... > > [...] > > Anyway, re-using code is of course a good idea, but I wonder whether it > would be nicer to keep most things in place, and then simply chain the > functions like this: I tried that and I prefer the version in the patch. > > static void s390_cpu_reset_normal(CPUState *s) > { >... > } > > static void s390_cpu_reset_initial(CPUState *s) > { > ... > s390_cpu_reset_normal(s); > ... > } > > static void s390_cpu_reset_clear(CPUState *s) > { > ... > s390_cpu_reset_initial() > ... > } > > Just my 0.02 €, but at least for me, that's easier to understand than a > switch-case statement with fallthroughs inbetween. > > Thomas > signature.asc Description: OpenPGP digital signature
Re: [PATCH 02/15] s390x: Beautify diag308 handling
On 21/11/2019 12.21, David Hildenbrand wrote: > On 20.11.19 12:43, Janosch Frank wrote: >> Let's improve readability by: >> * Using constants for the subcodes >> * Moving parameter checking into a function >> * Removing subcode > 6 check as the default case catches that >> >> Signed-off-by: Janosch Frank >> --- >> target/s390x/diag.c | 54 +++-- >> 1 file changed, 32 insertions(+), 22 deletions(-) >> >> diff --git a/target/s390x/diag.c b/target/s390x/diag.c >> index 53c2f81f2a..067c667ba7 100644 >> --- a/target/s390x/diag.c >> +++ b/target/s390x/diag.c >> @@ -53,6 +53,29 @@ int handle_diag_288(CPUS390XState *env, uint64_t >> r1, uint64_t r3) >> #define DIAG_308_RC_NO_CONF 0x0102 >> #define DIAG_308_RC_INVALID 0x0402 >> +#define DIAG308_RES_MOD_CLR 0 >> +#define DIAG308_RES_LOAD_NORM 1 >> +#define DIAG308_LOAD_CLEAR 3 >> +#define DIAG308_LOAD_NORMAL_DUMP 4 >> +#define DIAG308_SET 5 >> +#define DIAG308_STORE 6 >> + >> +static int diag308_parm_check(CPUS390XState *env, uint64_t r1, >> uint64_t addr, >> + uintptr_t ra, bool write) >> +{ >> + if ((r1 & 1) || (addr & ~TARGET_PAGE_MASK)) { >> + s390_program_interrupt(env, PGM_SPECIFICATION, ra); >> + return -EINVAL; >> + } >> + if (!address_space_access_valid(&address_space_memory, addr, >> + sizeof(IplParameterBlock), write, >> + MEMTXATTRS_UNSPECIFIED)) { >> + s390_program_interrupt(env, PGM_ADDRESSING, ra); >> + return -EINVAL; >> + } >> + return 0; >> +} >> + >> void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3, >> uintptr_t ra) >> { >> CPUState *cs = env_cpu(env); >> @@ -65,30 +88,24 @@ void handle_diag_308(CPUS390XState *env, uint64_t >> r1, uint64_t r3, uintptr_t ra) >> return; >> } >> - if ((subcode & ~0x0ULL) || (subcode > 6)) { >> + if (subcode & ~0x0ULL) { > > Strange, the default case in the switch was basically dead code. Not really, case 2 and case 4 are not handled there. We even had a funny bug some months ago, where a guest could terminate QEMU with one of these codes, see commit 37dbd1f4d4805edcd18d94eb202bb3461b3cd52d. Thomas
Re: [PATCH 04/15] Header sync protvirt
On 11/21/19 1:59 PM, Cornelia Huck wrote: > On Wed, 20 Nov 2019 06:43:23 -0500 > Janosch Frank wrote: > >> Let's sync all the protvirt header changes >> >> Signed-off-by: Janosch Frank >> --- >> linux-headers/asm-s390/kvm.h | 3 ++- >> linux-headers/linux/kvm.h| 42 >> 2 files changed, 44 insertions(+), 1 deletion(-) >> > >> diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h >> index 18892d6541..d031051601 100644 >> --- a/linux-headers/linux/kvm.h >> +++ b/linux-headers/linux/kvm.h >> @@ -995,6 +995,8 @@ struct kvm_ppc_resize_hpt { >> #define KVM_CAP_ARM_SVE 170 >> #define KVM_CAP_ARM_PTRAUTH_ADDRESS 171 >> #define KVM_CAP_ARM_PTRAUTH_GENERIC 172 >> +#define KVM_CAP_S390_PROTECTED 180 >> +#define KVM_CAP_S390_VCPU_RESETS 181 > > Hm, where does this cap come from? I did not see it in the kernel > patches. It'll be part of the V1 once I send it out. Pierre found the problem while reviewing QEMU, so I fixed it in both repos. > >> >> #ifdef KVM_CAP_IRQ_ROUTING >> > signature.asc Description: OpenPGP digital signature
Re: [PATCH 04/15] Header sync protvirt
On Thu, 21 Nov 2019 14:12:21 +0100 Janosch Frank wrote: > On 11/21/19 1:59 PM, Cornelia Huck wrote: > > On Wed, 20 Nov 2019 06:43:23 -0500 > > Janosch Frank wrote: > > > >> Let's sync all the protvirt header changes > >> > >> Signed-off-by: Janosch Frank > >> --- > >> linux-headers/asm-s390/kvm.h | 3 ++- > >> linux-headers/linux/kvm.h| 42 > >> 2 files changed, 44 insertions(+), 1 deletion(-) > >> > > > >> diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h > >> index 18892d6541..d031051601 100644 > >> --- a/linux-headers/linux/kvm.h > >> +++ b/linux-headers/linux/kvm.h > >> @@ -995,6 +995,8 @@ struct kvm_ppc_resize_hpt { > >> #define KVM_CAP_ARM_SVE 170 > >> #define KVM_CAP_ARM_PTRAUTH_ADDRESS 171 > >> #define KVM_CAP_ARM_PTRAUTH_GENERIC 172 > >> +#define KVM_CAP_S390_PROTECTED 180 > >> +#define KVM_CAP_S390_VCPU_RESETS 181 > > > > Hm, where does this cap come from? I did not see it in the kernel > > patches. > > It'll be part of the V1 once I send it out. > Pierre found the problem while reviewing QEMU, so I fixed it in both repos. Ok, that cap probably makes sense. It was just surprising to find :) pgpvn_P0ZlJYM.pgp Description: OpenPGP digital signature
Re: [PATCH 01/15] s390x: Cleanup cpu resets
On 21/11/2019 14.11, Janosch Frank wrote: > On 11/21/19 1:53 PM, Thomas Huth wrote: >> On 20/11/2019 12.43, Janosch Frank wrote: >>> Let's move the resets into one function and switch by type, so we can >>> use fallthroughs for shared reset actions. [...] >>> +memset(env, 0, offsetof(CPUS390XState, >>> start_initial_reset_fields)); >>> +/* Fallthrough */ >>> +case S390_CPU_RESET_INITIAL: >>> +memset(&env->start_initial_reset_fields, 0, >>> + offsetof(CPUS390XState, end_reset_fields) - >>> + offsetof(CPUS390XState, start_initial_reset_fields)); >>> +/* architectured initial values for CR 0 and 14 */ >>> +env->cregs[0] = CR0_RESET; >>> +env->cregs[14] = CR14_RESET; >>> >>> -s390_cpu_reset(s); >>> -/* initial reset does not clear everything! */ >>> -memset(&env->start_initial_reset_fields, 0, >>> -offsetof(CPUS390XState, end_reset_fields) - >>> -offsetof(CPUS390XState, start_initial_reset_fields)); >>> - >>> -/* architectured initial values for CR 0 and 14 */ >>> -env->cregs[0] = CR0_RESET; >>> -env->cregs[14] = CR14_RESET; >>> - >>> -/* architectured initial value for Breaking-Event-Address register */ >>> -env->gbea = 1; >>> - >>> -env->pfault_token = -1UL; >>> - >>> -/* tininess for underflow is detected before rounding */ >>> -set_float_detect_tininess(float_tininess_before_rounding, >>> - &env->fpu_status); >>> +/* architectured initial value for Breaking-Event-Address register >>> */ >>> +env->gbea = 1; >>> +/* tininess for underflow is detected before rounding */ >>> +set_float_detect_tininess(float_tininess_before_rounding, >>> + &env->fpu_status); >>> +/* Fallthrough */ >>> +case S390_CPU_RESET_NORMAL: >>> +env->pfault_token = -1UL; >>> +env->bpbc = false; >>> +break; >>> +} >>> >>> /* Reset state inside the kernel that we cannot access yet from QEMU. >>> */ >>> -if (kvm_enabled()) { >>> -kvm_s390_reset_vcpu(cpu); >>> +if (kvm_enabled() && (type == S390_CPU_RESET_CLEAR || >>> + type == S390_CPU_RESET_INITIAL)) { >>> +kvm_s390_reset_vcpu(cpu); >>> } >> >> Why don't you simply move that into the switch-case statement, too? > > There was a reason for that, time to load it from cold storage... I just noticed that you rework this in patch 10/15, so it indeed makes sense to keep it outside of the switch-case-statement above... >> [...] >> >> Anyway, re-using code is of course a good idea, but I wonder whether it >> would be nicer to keep most things in place, and then simply chain the >> functions like this: > > I tried that and I prefer the version in the patch. ... and with patch 10 in mind, it indeed also makes more sense to *not* use the chaining that I've suggested. So never mind, your switch with the fallthrough statements is just fine. Thomas
Re: [PATCH 02/15] s390x: Beautify diag308 handling
On 20/11/2019 12.43, Janosch Frank wrote: > Let's improve readability by: > * Using constants for the subcodes > * Moving parameter checking into a function > * Removing subcode > 6 check as the default case catches that > > Signed-off-by: Janosch Frank > --- > target/s390x/diag.c | 54 +++-- > 1 file changed, 32 insertions(+), 22 deletions(-) > > diff --git a/target/s390x/diag.c b/target/s390x/diag.c > index 53c2f81f2a..067c667ba7 100644 > --- a/target/s390x/diag.c > +++ b/target/s390x/diag.c > @@ -53,6 +53,29 @@ int handle_diag_288(CPUS390XState *env, uint64_t r1, > uint64_t r3) > #define DIAG_308_RC_NO_CONF 0x0102 > #define DIAG_308_RC_INVALID 0x0402 > > +#define DIAG308_RES_MOD_CLR 0 > +#define DIAG308_RES_LOAD_NORM1 I think I'd also prefer RESET instead of RES here. > +#define DIAG308_LOAD_CLEAR 3 > +#define DIAG308_LOAD_NORMAL_DUMP 4 > +#define DIAG308_SET 5 > +#define DIAG308_STORE6 > + > +static int diag308_parm_check(CPUS390XState *env, uint64_t r1, uint64_t addr, > + uintptr_t ra, bool write) > +{ > +if ((r1 & 1) || (addr & ~TARGET_PAGE_MASK)) { > +s390_program_interrupt(env, PGM_SPECIFICATION, ra); > +return -EINVAL; > +} > +if (!address_space_access_valid(&address_space_memory, addr, > +sizeof(IplParameterBlock), write, > +MEMTXATTRS_UNSPECIFIED)) { > +s390_program_interrupt(env, PGM_ADDRESSING, ra); > +return -EINVAL; or maybe -EFAULT ? ;-) > +} > +return 0; > +} > + > void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3, uintptr_t > ra) > { > CPUState *cs = env_cpu(env); > @@ -65,30 +88,24 @@ void handle_diag_308(CPUS390XState *env, uint64_t r1, > uint64_t r3, uintptr_t ra) > return; > } > > -if ((subcode & ~0x0ULL) || (subcode > 6)) { > +if (subcode & ~0x0ULL) { > s390_program_interrupt(env, PGM_SPECIFICATION, ra); > return; > } > > switch (subcode) { > -case 0: > +case DIAG308_RES_MOD_CLR: > s390_ipl_reset_request(cs, S390_RESET_MODIFIED_CLEAR); > break; > -case 1: > +case DIAG308_RES_LOAD_NORM: > s390_ipl_reset_request(cs, S390_RESET_LOAD_NORMAL); > break; > -case 3: > +case DIAG308_LOAD_CLEAR: > +/* Well we still lack the clearing bit... */ > s390_ipl_reset_request(cs, S390_RESET_REIPL); > break; > -case 5: > -if ((r1 & 1) || (addr & 0x0fffULL)) { > -s390_program_interrupt(env, PGM_SPECIFICATION, ra); > -return; > -} > -if (!address_space_access_valid(&address_space_memory, addr, > -sizeof(IplParameterBlock), false, > -MEMTXATTRS_UNSPECIFIED)) { > -s390_program_interrupt(env, PGM_ADDRESSING, ra); > +case DIAG308_SET: > +if (diag308_parm_check(env, r1, addr, ra, false)) { > return; > } > iplb = g_new0(IplParameterBlock, 1); > @@ -110,15 +127,8 @@ void handle_diag_308(CPUS390XState *env, uint64_t r1, > uint64_t r3, uintptr_t ra) > out: > g_free(iplb); > return; > -case 6: > -if ((r1 & 1) || (addr & 0x0fffULL)) { > -s390_program_interrupt(env, PGM_SPECIFICATION, ra); > -return; > -} > -if (!address_space_access_valid(&address_space_memory, addr, > -sizeof(IplParameterBlock), true, > -MEMTXATTRS_UNSPECIFIED)) { > -s390_program_interrupt(env, PGM_ADDRESSING, ra); > +case DIAG308_STORE: > +if (diag308_parm_check(env, r1, addr, ra, true)) { > return; > } > iplb = s390_ipl_get_iplb(); > With RESET instead of RES: Reviewed-by: Thomas Huth
[PATCH v2] target/i386: add VMX features to named CPU models
This allows using "-cpu Haswell,+vmx", which we did not really want to support in QEMU but was produced by Libvirt when using the "host-model" CPU model. This was produced from the output of scripts/kvm/vmxcap using the following very ugly Python script: bits = { 'INS/OUTS instruction information': ['FEAT_VMX_BASIC', 'MSR_VMX_BASIC_INS_OUTS'], 'IA32_VMX_TRUE_*_CTLS support': ['FEAT_VMX_BASIC', 'MSR_VMX_BASIC_TRUE_CTLS'], 'External interrupt exiting': ['FEAT_VMX_PINBASED_CTLS', 'VMX_PIN_BASED_EXT_INTR_MASK'], 'NMI exiting': ['FEAT_VMX_PINBASED_CTLS', 'VMX_PIN_BASED_NMI_EXITING'], 'Virtual NMIs': ['FEAT_VMX_PINBASED_CTLS', 'VMX_PIN_BASED_VIRTUAL_NMIS'], 'Activate VMX-preemption timer': ['FEAT_VMX_PINBASED_CTLS', 'VMX_PIN_BASED_VMX_PREEMPTION_TIMER'], 'Process posted interrupts': ['FEAT_VMX_PINBASED_CTLS', 'VMX_PIN_BASED_POSTED_INTR'], 'Interrupt window exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_VIRTUAL_INTR_PENDING'], 'Use TSC offsetting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_USE_TSC_OFFSETING'], 'HLT exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_HLT_EXITING'], 'INVLPG exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_INVLPG_EXITING'], 'MWAIT exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_MWAIT_EXITING'], 'RDPMC exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_RDPMC_EXITING'], 'RDTSC exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_RDTSC_EXITING'], 'CR3-load exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_CR3_LOAD_EXITING'], 'CR3-store exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_CR3_STORE_EXITING'], 'CR8-load exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_CR8_LOAD_EXITING'], 'CR8-store exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_CR8_STORE_EXITING'], 'Use TPR shadow': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_TPR_SHADOW'], 'NMI-window exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_VIRTUAL_NMI_PENDING'], 'MOV-DR exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_MOV_DR_EXITING'], 'Unconditional I/O exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_UNCOND_IO_EXITING'], 'Use I/O bitmaps': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_USE_IO_BITMAPS'], 'Monitor trap flag': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_MONITOR_TRAP_FLAG'], 'Use MSR bitmaps': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_USE_MSR_BITMAPS'], 'MONITOR exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_MONITOR_EXITING'], 'PAUSE exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_PAUSE_EXITING'], 'Activate secondary control': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS'], 'Virtualize APIC accesses': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES'], 'Enable EPT': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_ENABLE_EPT'], 'Descriptor-table exiting': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_DESC'], 'Enable RDTSCP': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_RDTSCP'], 'Virtualize x2APIC mode': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE'], 'Enable VPID': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_ENABLE_VPID'], 'WBINVD exiting': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_WBINVD_EXITING'], 'Unrestricted guest': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST'], 'APIC register emulation': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT'], 'Virtual interrupt delivery': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY'], 'PAUSE-loop exiting': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_PAUSE_LOOP_EXITING'], 'RDRAND exiting': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_RDRAND_EXITING'], 'Enable INVPCID': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_ENABLE_INVPCID'], 'Enable VM functions': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_ENABLE_VMFUNC'], 'VMCS shadowing': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_SHADOW_VMCS'], 'RDSEED exiting': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_RDSEED_EXITING'], 'Enable PML': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_ENABLE_PML'], 'Enable XSAVES/XRSTORS': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_XSAVES'], 'Save debug controls': ['FEAT_VMX_EXIT_CTLS', 'VMX_VM_EXIT_SAVE_DEBUG_CONTROLS'], 'Load IA32_PERF_GLOBAL_CTRL': ['FEAT_VMX_EXIT_CTLS', 'VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL'], 'Acknowledge interrupt on
Re: [PATCH] target/arm: Fix handling of cortex-m FTYPE flag in EXCRET
On Thu, 21 Nov 2019 at 01:44, Jean-Hugues Deschênes wrote: > > According to the PushStack() pseudocode in the armv7m RM, > bit 4 of the LR should be set to NOT(CONTROL.PFCA) when > an FPU is present. Current implementation is doing it for > armv8, but not for armv7. This patch makes the existing > logic applicable to both code paths. > > Signed-off-by: Jean-Hugues Deschenes > --- > target/arm/m_helper.c | 7 +++ > 1 file changed, 3 insertions(+), 4 deletions(-) > > diff --git a/target/arm/m_helper.c b/target/arm/m_helper.c > index f2512e448e..595c49fe2a 100644 > --- a/target/arm/m_helper.c > +++ b/target/arm/m_helper.c > @@ -2173,19 +2173,18 @@ void arm_v7m_cpu_do_interrupt(CPUState *cs) > if (env->v7m.secure) { > lr |= R_V7M_EXCRET_S_MASK; > } > -if (!(env->v7m.control[M_REG_S] & R_V7M_CONTROL_FPCA_MASK)) { > -lr |= R_V7M_EXCRET_FTYPE_MASK; > -} > } else { > lr = R_V7M_EXCRET_RES1_MASK | > R_V7M_EXCRET_S_MASK | > R_V7M_EXCRET_DCRS_MASK | > -R_V7M_EXCRET_FTYPE_MASK | > R_V7M_EXCRET_ES_MASK; > if (env->v7m.control[M_REG_NS] & R_V7M_CONTROL_SPSEL_MASK) { > lr |= R_V7M_EXCRET_SPSEL_MASK; > } > } > +if (!(env->v7m.control[M_REG_S] & R_V7M_CONTROL_FPCA_MASK)) { > +lr |= R_V7M_EXCRET_FTYPE_MASK; > +} > if (!arm_v7m_is_handler_mode(env)) { > lr |= R_V7M_EXCRET_MODE_MASK; > } > -- > 2.17.1 Thanks, applied to my target-arm.next branch for 4.2. -- PMM
Re: [PATCH 05/15] s390x: protvirt: Sync PV state
On Wed, 20 Nov 2019 06:43:24 -0500 Janosch Frank wrote: > We do not always have the SIE intercept code handy at each place where > we do emulation. Unfortunately emulation for secure guests often > differ slightly from normal emulation and we need to make decisions > based on the protected state of the VCPU. > > Let's sync the protected state and make it available. > > Signed-off-by: Janosch Frank > --- > linux-headers/asm-s390/kvm.h | 1 + > target/s390x/cpu.h | 1 + > target/s390x/kvm.c | 4 > 3 files changed, 6 insertions(+) > > diff --git a/linux-headers/asm-s390/kvm.h b/linux-headers/asm-s390/kvm.h > index 41976d33f0..7c46cf6078 100644 > --- a/linux-headers/asm-s390/kvm.h > +++ b/linux-headers/asm-s390/kvm.h > @@ -231,6 +231,7 @@ struct kvm_guest_debug_arch { > #define KVM_SYNC_GSCB (1UL << 9) > #define KVM_SYNC_BPBC (1UL << 10) > #define KVM_SYNC_ETOKEN (1UL << 11) > +#define KVM_SYNC_PV (1UL << 12) That should go into the previous patch (will be picked up by header sync). > /* length and alignment of the sdnx as a power of two */ > #define SDNXC 8 > #define SDNXL (1UL << SDNXC) > diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h > index 17460ed7b3..a787221772 100644 > --- a/target/s390x/cpu.h > +++ b/target/s390x/cpu.h > @@ -116,6 +116,7 @@ struct CPUS390XState { > > /* Fields up to this point are cleared by a CPU reset */ > struct {} end_reset_fields; > +bool pv; /* protected virtualization */ > > #if !defined(CONFIG_USER_ONLY) > uint32_t core_id; /* PoP "CPU address", same as cpu_index */ > diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c > index c24c869e77..418154ccfe 100644 > --- a/target/s390x/kvm.c > +++ b/target/s390x/kvm.c > @@ -676,6 +676,10 @@ int kvm_arch_get_registers(CPUState *cs) > env->etoken_extension = cs->kvm_run->s.regs.etoken_extension; > } > > +if (can_sync_regs(cs, KVM_SYNC_PV)) { > +env->pv = !!cs->kvm_run->s.regs.pv; > +} > + > /* pfault parameters */ > if (can_sync_regs(cs, KVM_SYNC_PFAULT)) { > env->pfault_token = cs->kvm_run->s.regs.pft; As you add a new field to the cpu state... you probably can't migrate protected guests, so you don't need a new vmstate subsection?
Re: [PATCH v3 0/2] virtio: make seg_max virtqueue size dependent
On Tue, Nov 12, 2019 at 02:13:52PM +0300, Denis Plotnikov wrote: > v3: > * add property to set in machine type [MST] > * add min queue size check [Stefan] > * add avocado based test [Max, Stefan, Eduardo, Cleber] > > v2: > * the standalone patch to make seg_max virtqueue size dependent > * other patches are postponed > > v1: > the initial series > > Denis Plotnikov (2): > virtio: make seg_max virtqueue size dependent > tests: add virtio-scsi and virtio-blk seg_max_adjust test > > hw/block/virtio-blk.c | 9 +- > hw/core/machine.c | 3 + > hw/scsi/vhost-scsi.c | 2 + > hw/scsi/virtio-scsi.c | 10 +- > include/hw/virtio/virtio-blk.h| 1 + > include/hw/virtio/virtio-scsi.h | 1 + > tests/acceptance/virtio_seg_max_adjust.py | 135 ++ > 7 files changed, 159 insertions(+), 2 deletions(-) > create mode 100755 tests/acceptance/virtio_seg_max_adjust.py > > -- > 2.17.0 > > Reviewed-by: Stefan Hajnoczi signature.asc Description: PGP signature
Re: [PATCH v2 3/6] iotests: Add qemu_io_log()
On Wed 20 Nov 2019 07:44:58 PM CET, Kevin Wolf wrote: > Add a function that runs qemu-io and logs the output with the > appropriate filters applied. > > Signed-off-by: Kevin Wolf > Reviewed-by: Eric Blake > Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Alberto Garcia Berto
Re: [PATCH v2 4/6] iotests: Fix timeout in run_job()
On Wed 20 Nov 2019 07:44:59 PM CET, Kevin Wolf wrote: > run_job() accepts a wait parameter for a timeout, but it doesn't > actually use it. The only thing that is missing is passing it to > events_wait(), so do that now. > > Signed-off-by: Kevin Wolf > Reviewed-by: Eric Blake > Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Alberto Garcia Berto
Re: [PATCH v2 5/6] iotests: Support job-complete in run_job()
On Wed 20 Nov 2019 07:45:00 PM CET, Kevin Wolf wrote: > Automatically complete jobs that have a 'ready' state and need an > explicit job-complete. Without this, run_job() would hang for such > jobs. > > Signed-off-by: Kevin Wolf > Reviewed-by: Eric Blake > Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Alberto Garcia Berto
Re: [PATCH v4 06/37] serial: initial qom-ification
On Wed, 20 Nov 2019 at 15:25, Marc-André Lureau wrote: > > Make SerialState a device (the following patches will introduce IO/MM > sysbus serial devices) > > None of the serial_{,mm}_init() callers actually free the returned > value (even if they did, it would be quite harmless), so we can change > the object allocation at will. > > However, the devices that embed SerialState must now have their field > QOM-initialized manually (isa, pci, pci-multi). > > Signed-off-by: Marc-André Lureau > +static void serial_class_init(ObjectClass *klass, void *data) > +{ > +DeviceClass *dc = DEVICE_CLASS(klass); > + > +dc->user_creatable = false; The comment describing user_creatable in qdev-core.h says: * It should never be cleared without a comment explaining why it * is cleared. So we should have a comment here. Otherwise Reviewed-by: Peter Maydell thanks -- PMM
Re: [PATCH v4 18/37] mips: baudbase is 115200 by default
On Wed, 20 Nov 2019 at 15:28, Marc-André Lureau wrote: > > Signed-off-by: Marc-André Lureau > --- > hw/mips/mips_mipssim.c | 1 - > 1 file changed, 1 deletion(-) > > diff --git a/hw/mips/mips_mipssim.c b/hw/mips/mips_mipssim.c > index bfafa4d7e9..3cd0e6eb33 100644 > --- a/hw/mips/mips_mipssim.c > +++ b/hw/mips/mips_mipssim.c > @@ -223,7 +223,6 @@ mips_mipssim_init(MachineState *machine) > if (serial_hd(0)) { > DeviceState *dev = qdev_create(NULL, TYPE_SERIAL_IO); > > -qdev_prop_set_uint32(DEVICE(dev), "baudbase", 115200); > qdev_prop_set_chr(dev, "chardev", serial_hd(0)); > qdev_set_legacy_instance_id(dev, 0x3f8, 2); > qdev_init_nofail(dev); > -- > 2.24.0 Reviewed-by: Peter Maydell thanks -- PMM
Re: [PATCH v4 19/37] mips: use sysbus_add_io()
On Wed, 20 Nov 2019 at 15:28, Marc-André Lureau wrote: > > Signed-off-by: Marc-André Lureau > --- > hw/mips/mips_mipssim.c | 3 +-- > 1 file changed, 1 insertion(+), 2 deletions(-) > > diff --git a/hw/mips/mips_mipssim.c b/hw/mips/mips_mipssim.c > index 3cd0e6eb33..2c2c7f25b2 100644 > --- a/hw/mips/mips_mipssim.c > +++ b/hw/mips/mips_mipssim.c > @@ -227,8 +227,7 @@ mips_mipssim_init(MachineState *machine) > qdev_set_legacy_instance_id(dev, 0x3f8, 2); > qdev_init_nofail(dev); > sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, env->irq[4]); > -memory_region_add_subregion(get_system_io(), 0x3f8, > -&SERIAL_IO(dev)->serial.io); > +sysbus_add_io(SYS_BUS_DEVICE(dev), 0x3f8, > &SERIAL_IO(dev)->serial.io); > } > > if (nd_table[0].used) > -- Reviewed-by: Peter Maydell thanks -- PMM
Re: [PATCH 05/15] s390x: protvirt: Sync PV state
On 11/21/19 2:25 PM, Cornelia Huck wrote: > On Wed, 20 Nov 2019 06:43:24 -0500 > Janosch Frank wrote: > >> We do not always have the SIE intercept code handy at each place where >> we do emulation. Unfortunately emulation for secure guests often >> differ slightly from normal emulation and we need to make decisions >> based on the protected state of the VCPU. >> >> Let's sync the protected state and make it available. >> >> Signed-off-by: Janosch Frank >> --- >> linux-headers/asm-s390/kvm.h | 1 + >> target/s390x/cpu.h | 1 + >> target/s390x/kvm.c | 4 >> 3 files changed, 6 insertions(+) >> >> diff --git a/linux-headers/asm-s390/kvm.h b/linux-headers/asm-s390/kvm.h >> index 41976d33f0..7c46cf6078 100644 >> --- a/linux-headers/asm-s390/kvm.h >> +++ b/linux-headers/asm-s390/kvm.h >> @@ -231,6 +231,7 @@ struct kvm_guest_debug_arch { >> #define KVM_SYNC_GSCB (1UL << 9) >> #define KVM_SYNC_BPBC (1UL << 10) >> #define KVM_SYNC_ETOKEN (1UL << 11) >> +#define KVM_SYNC_PV (1UL << 12) > > That should go into the previous patch (will be picked up by header > sync). Yes, will be fixed in a second > >> /* length and alignment of the sdnx as a power of two */ >> #define SDNXC 8 >> #define SDNXL (1UL << SDNXC) >> diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h >> index 17460ed7b3..a787221772 100644 >> --- a/target/s390x/cpu.h >> +++ b/target/s390x/cpu.h >> @@ -116,6 +116,7 @@ struct CPUS390XState { >> >> /* Fields up to this point are cleared by a CPU reset */ >> struct {} end_reset_fields; >> +bool pv; /* protected virtualization */ >> >> #if !defined(CONFIG_USER_ONLY) >> uint32_t core_id; /* PoP "CPU address", same as cpu_index */ >> diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c >> index c24c869e77..418154ccfe 100644 >> --- a/target/s390x/kvm.c >> +++ b/target/s390x/kvm.c >> @@ -676,6 +676,10 @@ int kvm_arch_get_registers(CPUState *cs) >> env->etoken_extension = cs->kvm_run->s.regs.etoken_extension; >> } >> >> +if (can_sync_regs(cs, KVM_SYNC_PV)) { >> +env->pv = !!cs->kvm_run->s.regs.pv; >> +} >> + >> /* pfault parameters */ >> if (can_sync_regs(cs, KVM_SYNC_PFAULT)) { >> env->pfault_token = cs->kvm_run->s.regs.pft; > > As you add a new field to the cpu state... you probably can't migrate > protected guests, so you don't need a new vmstate subsection? > We can't migrate currently, but it's on out todo list signature.asc Description: OpenPGP digital signature
Re: [PATCH] vhost-user-fs: remove "vhostfd" property
On Sat, Nov 16, 2019 at 03:20:16PM +0400, Marc-André Lureau wrote: > The property doesn't make much sense for a vhost-user device. > > Signed-off-by: Marc-André Lureau > --- > hw/virtio/vhost-user-fs.c | 1 - > include/hw/virtio/vhost-user-fs.h | 1 - > 2 files changed, 2 deletions(-) It was never implemented either :). Reviewed-by: Stefan Hajnoczi signature.asc Description: PGP signature
Re: [PATCH v4 12/37] serial: start making SerialMM a sysbus device
On Wed, 20 Nov 2019 at 15:27, Marc-André Lureau wrote: > > Memory mapped serial device is in fact a sysbus device. The following > patches will make use of sysbus facilities for resource and > registration. In particular, "serial-mm: use sysbus facilities" will > move internal serial realization to serial_mm_realize callback to > follow qdev best practices. What goes wrong if you just put the realize of smm->serial in the right place to start with ? thanks -- PMM
Re: [PATCH v4 20/37] mips: use sysbus_mmio_get_region() instead of internal fields
On Wed, 20 Nov 2019 at 15:28, Marc-André Lureau wrote: > > Register the memory region with sysbus_init_mmio() and look it up with > sysbus_mmio_get_region() to avoid accessing internal device fields. > > Suggested-by: Peter Maydell > Signed-off-by: Marc-André Lureau > --- > hw/char/serial.c | 1 + > hw/mips/mips_mipssim.c | 3 ++- Reviewed-by: Peter Maydell thanks -- PMM
Re: [PATCH 07/15] s390x: protvirt: Handle diag 308 subcodes 0,1,3,4
On Wed, 20 Nov 2019 06:43:26 -0500 Janosch Frank wrote: > Now that we know the protection state off the cpus, we can start > handling all diag 308 subcodes in the protected state. "As we now have access to the protection state of the cpus, we can implement special handling of diag 308 subcodes for cpus in the protected state." ? > > For subcodes 0 and 1 we need to unshare all pages before continuing, > so the guest doesn't accidently expose data when dumping. > > For subcode 3/4 we tear down the protected VM and reboot into > unprotected mode. We do not provide a secure reboot. > > Before we can do the unshare calls, we need to mark all cpus as > stopped. > > Signed-off-by: Janosch Frank > --- > hw/s390x/s390-virtio-ccw.c | 30 +++--- > target/s390x/diag.c| 4 > 2 files changed, 31 insertions(+), 3 deletions(-) > > diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c > index 7262453616..6fd50b4c42 100644 > --- a/hw/s390x/s390-virtio-ccw.c > +++ b/hw/s390x/s390-virtio-ccw.c > @@ -319,11 +319,26 @@ static inline void s390_do_cpu_ipl(CPUState *cs, > run_on_cpu_data arg) > s390_cpu_set_state(S390_CPU_STATE_OPERATING, cpu); > } > > +static void s390_pv_prepare_reset(CPUS390XState *env) > +{ > +CPUState *cs; > + > +if (!env->pv) { > +return; > +} > +CPU_FOREACH(cs) { > +s390_cpu_set_state(S390_CPU_STATE_STOPPED, S390_CPU(cs)); > +} > +s390_pv_unshare(); > +s390_pv_perf_clear_reset(); > +} > + > static void s390_machine_reset(MachineState *machine) > { > enum s390_reset reset_type; > CPUState *cs, *t; > S390CPU *cpu; > +CPUS390XState *env; > > /* get the reset parameters, reset them once done */ > s390_ipl_get_reset_request(&cs, &reset_type); > @@ -332,29 +347,38 @@ static void s390_machine_reset(MachineState *machine) > s390_cmma_reset(); > > cpu = S390_CPU(cs); > +env = &cpu->env; > > switch (reset_type) { > case S390_RESET_MODIFIED_CLEAR: /* Subcode 0 */ > +subsystem_reset(); > +s390_crypto_reset(); > +s390_pv_prepare_reset(env); > CPU_FOREACH(t) { > run_on_cpu(t, s390_do_cpu_full_reset, RUN_ON_CPU_NULL); > } > -subsystem_reset(); > -s390_crypto_reset(); This also switches the order in which different parts are reset for normal guests. I presume that order doesn't matter here? > run_on_cpu(cs, s390_do_cpu_load_normal, RUN_ON_CPU_NULL); > break; > case S390_RESET_LOAD_NORMAL: /* Subcode 1*/ missing blank before */ (introduced in a previous patch) > +subsystem_reset(); > +s390_pv_prepare_reset(env); > CPU_FOREACH(t) { > if (t == cs) { > continue; > } > run_on_cpu(t, s390_do_cpu_reset, RUN_ON_CPU_NULL); > } > -subsystem_reset(); > run_on_cpu(cs, s390_do_cpu_initial_reset, RUN_ON_CPU_NULL); > run_on_cpu(cs, s390_do_cpu_load_normal, RUN_ON_CPU_NULL); > break; > case S390_RESET_EXTERNAL: Annotate this with the subcode as well? (in the patch introducing it) > case S390_RESET_REIPL: /* Subcode 4 */ > +if (env->pv) { > +CPU_FOREACH(t) { > +s390_pv_vcpu_destroy(t); > +} > +s390_pv_vm_destroy(); > +} > qemu_devices_reset(); > s390_crypto_reset(); > /* configure and start the ipl CPU only */ > diff --git a/target/s390x/diag.c b/target/s390x/diag.c > index 32049bb4ee..db6d79cef3 100644 > --- a/target/s390x/diag.c > +++ b/target/s390x/diag.c > @@ -68,6 +68,10 @@ int handle_diag_288(CPUS390XState *env, uint64_t r1, > uint64_t r3) > static int diag308_parm_check(CPUS390XState *env, uint64_t r1, uint64_t addr, >uintptr_t ra, bool write) > { > +/* Handled by the Ultravisor */ > +if (env->pv) { > +return 0; > +} > if ((r1 & 1) || (addr & ~TARGET_PAGE_MASK)) { > s390_program_interrupt(env, PGM_SPECIFICATION, ra); > return -EINVAL;
Re: [PATCH] vhost-user-input: use free(elem) instead of g_free(elem)
On Wed, Nov 20, 2019 at 11:48:56AM +, Daniel P. Berrangé wrote: > On Tue, Nov 19, 2019 at 11:16:26AM +, Stefan Hajnoczi wrote: > > The virtqueue element returned by vu_queue_pop() is allocated using > > malloc(3) by virtqueue_alloc_element(). Use the matching free(3) > > function instead of glib's g_free(). > > Just as an FYI, since glib 2.46 g_malloc is hardcoded to use the > system allocator, so it is now guaranteed that g_malloc/malloc > and g_free/free are safely interchangable. I recently got this > clarified in the glib docs: > > https://gitlab.gnome.org/GNOME/glib/merge_requests/1099//diffs > > QEMU mandates 2.48 so we are now safe in that regard > > For readability/sanity sake I'd still suggest matching functions > but it is not a functional danger any more. Even when it was a > risk, that risk only arose if you called GLib's API for installing > a custom allocator callback which QEMU never did, so it was always > a non-issue. You are right, although QEMU did use g_mem_set_vtable(). The custom functions still used malloc() underneath though so it would be safe anyway: commit 98cf48f60aa4999f5b2808569a193a401a390e6a Author: Paolo Bonzini Date: Wed Sep 16 17:38:44 2015 +0200 trace: remove malloc tracing signature.asc Description: PGP signature
Re: [Qemu-devel] [PATCH v2 1/2] net: assert that tx packets have nonzero size
On 191107 1221, Jason Wang wrote: > > On 2019/7/22 下午9:24, Oleinik, Alexander wrote: > > Virtual devices should not try to send zero-sized packets. The caller > > should check the size prior to calling qemu_sendv_packet_async. > > > > Signed-off-by: Alexander Oleinik > > --- > > v2: > >* Improve the comment to explain the rationale for adding the assert. > > net/net.c | 9 + > > 1 file changed, 9 insertions(+) > > > > diff --git a/net/net.c b/net/net.c > > index 7d4098254f..4ad21df36f 100644 > > --- a/net/net.c > > +++ b/net/net.c > > @@ -741,6 +741,15 @@ ssize_t qemu_sendv_packet_async(NetClientState *sender, > > size_t size = iov_size(iov, iovcnt); > > int ret; > > +/* > > + * Since this function returns the size of the sent packets, and a > > return > > + * value of zero indicates that the packet will be sent asynchronously, > > + * there is currently no way to report that a 0-sized packet has been > > sent > > + * successfully. Forbid it for now, and if someone needs this > > functionality > > + * later, the API will require a change. > > + */ > > +assert(size); > > > This probably will make the assertion triggerable from guest. Is this better > to warn and return NET_BUFSIZE + 1 here? Will do. I'll send a v3 out with this change. Thank you > Thanks > > > > + > > if (size > NET_BUFSIZE) { > > return size; > > } >
Re: [PATCH v4 27/37] leon3: use qdev gpio facilities for the PIL
On Wed, 20 Nov 2019 at 15:30, Marc-André Lureau wrote: > > Signed-off-by: Marc-André Lureau > --- > hw/sparc/leon3.c | 6 -- > target/sparc/cpu.h | 1 - > 2 files changed, 4 insertions(+), 3 deletions(-) > > diff --git a/hw/sparc/leon3.c b/hw/sparc/leon3.c > index cac987373e..1a89d44e57 100644 > --- a/hw/sparc/leon3.c > +++ b/hw/sparc/leon3.c > @@ -230,8 +230,10 @@ static void leon3_generic_hw_init(MachineState *machine) > > /* Allocate IRQ manager */ > dev = qdev_create(NULL, TYPE_GRLIB_IRQMP); > -env->pil_irq = qemu_allocate_irq(leon3_set_pil_in, env, 0); > -qdev_connect_gpio_out_named(dev, "grlib-irq", 0, env->pil_irq); > +qdev_init_gpio_in_named_with_opaque(DEVICE(env), leon3_set_pil_in, > +env, "pil", 1); > +qdev_connect_gpio_out_named(dev, "grlib-irq", 0, > +qdev_get_gpio_in_named(DEVICE(env), "pil", > 0)); > qdev_init_nofail(dev); > sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, LEON3_IRQMP_OFFSET); > env->irq_manager = dev; Reviewed-by: Peter Maydell Creating a gpio pin on some object that isn't yourself looks a bit odd, but all this leon3 code is modifying the CPU object from the outside anyway. Someday we might tidy it up, but not today. thanks -- PMM
Re: [PATCH 02/15] s390x: Beautify diag308 handling
On 11/21/19 2:20 PM, Thomas Huth wrote: > On 20/11/2019 12.43, Janosch Frank wrote: >> Let's improve readability by: >> * Using constants for the subcodes >> * Moving parameter checking into a function >> * Removing subcode > 6 check as the default case catches that >> >> Signed-off-by: Janosch Frank >> --- >> target/s390x/diag.c | 54 +++-- >> 1 file changed, 32 insertions(+), 22 deletions(-) >> >> diff --git a/target/s390x/diag.c b/target/s390x/diag.c >> index 53c2f81f2a..067c667ba7 100644 >> --- a/target/s390x/diag.c >> +++ b/target/s390x/diag.c >> @@ -53,6 +53,29 @@ int handle_diag_288(CPUS390XState *env, uint64_t r1, >> uint64_t r3) >> #define DIAG_308_RC_NO_CONF 0x0102 >> #define DIAG_308_RC_INVALID 0x0402 >> >> +#define DIAG308_RES_MOD_CLR 0 >> +#define DIAG308_RES_LOAD_NORM 1 > > I think I'd also prefer RESET instead of RES here. > >> +#define DIAG308_LOAD_CLEAR 3 >> +#define DIAG308_LOAD_NORMAL_DUMP4 >> +#define DIAG308_SET 5 >> +#define DIAG308_STORE 6 >> + >> +static int diag308_parm_check(CPUS390XState *env, uint64_t r1, uint64_t >> addr, >> + uintptr_t ra, bool write) >> +{ >> +if ((r1 & 1) || (addr & ~TARGET_PAGE_MASK)) { >> +s390_program_interrupt(env, PGM_SPECIFICATION, ra); >> +return -EINVAL; >> +} >> +if (!address_space_access_valid(&address_space_memory, addr, >> +sizeof(IplParameterBlock), write, >> +MEMTXATTRS_UNSPECIFIED)) { >> +s390_program_interrupt(env, PGM_ADDRESSING, ra); >> +return -EINVAL; > > or maybe -EFAULT ? ;-) Honestly, I'm asking myself why I'm even bothering to return specific error codes when all I check is rc != 0. So I guess I just make it -1 for all errors. > >> +} >> +return 0; >> +} >> + >> void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3, >> uintptr_t ra) >> { >> CPUState *cs = env_cpu(env); >> @@ -65,30 +88,24 @@ void handle_diag_308(CPUS390XState *env, uint64_t r1, >> uint64_t r3, uintptr_t ra) >> return; >> } >> >> -if ((subcode & ~0x0ULL) || (subcode > 6)) { >> +if (subcode & ~0x0ULL) { >> s390_program_interrupt(env, PGM_SPECIFICATION, ra); >> return; >> } >> >> switch (subcode) { >> -case 0: >> +case DIAG308_RES_MOD_CLR: >> s390_ipl_reset_request(cs, S390_RESET_MODIFIED_CLEAR); >> break; >> -case 1: >> +case DIAG308_RES_LOAD_NORM: >> s390_ipl_reset_request(cs, S390_RESET_LOAD_NORMAL); >> break; >> -case 3: >> +case DIAG308_LOAD_CLEAR: >> +/* Well we still lack the clearing bit... */ >> s390_ipl_reset_request(cs, S390_RESET_REIPL); >> break; >> -case 5: >> -if ((r1 & 1) || (addr & 0x0fffULL)) { >> -s390_program_interrupt(env, PGM_SPECIFICATION, ra); >> -return; >> -} >> -if (!address_space_access_valid(&address_space_memory, addr, >> -sizeof(IplParameterBlock), false, >> -MEMTXATTRS_UNSPECIFIED)) { >> -s390_program_interrupt(env, PGM_ADDRESSING, ra); >> +case DIAG308_SET: >> +if (diag308_parm_check(env, r1, addr, ra, false)) { >> return; >> } >> iplb = g_new0(IplParameterBlock, 1); >> @@ -110,15 +127,8 @@ void handle_diag_308(CPUS390XState *env, uint64_t r1, >> uint64_t r3, uintptr_t ra) >> out: >> g_free(iplb); >> return; >> -case 6: >> -if ((r1 & 1) || (addr & 0x0fffULL)) { >> -s390_program_interrupt(env, PGM_SPECIFICATION, ra); >> -return; >> -} >> -if (!address_space_access_valid(&address_space_memory, addr, >> -sizeof(IplParameterBlock), true, >> -MEMTXATTRS_UNSPECIFIED)) { >> -s390_program_interrupt(env, PGM_ADDRESSING, ra); >> +case DIAG308_STORE: >> +if (diag308_parm_check(env, r1, addr, ra, true)) { >> return; >> } >> iplb = s390_ipl_get_iplb(); >> > > With RESET instead of RES: > > Reviewed-by: Thomas Huth > signature.asc Description: OpenPGP digital signature
Re: [qemu-web PATCH] Add a blog post about the QEMU-related presentation of KVM Forum 2019
On Thu, Nov 21, 2019 at 12:52:07PM +0100, Thomas Huth wrote: > There have been quite a lot of QEMU-related talks at KVM Forum this > year - let's provide a summary for the people who could not attend. > > Signed-off-by: Thomas Huth > --- > Note: For some talks it's hard to decide whether they really fit the > QEMU blog or not. I've assembled the list below by quickly skimming > through the schedule and the videos, and that's what I came up with ... > If you think any of the other talks should be mentioned here, too, > please let me know. Do you also want to link to the LWN's summary, written by QEMU/KVM contributors? (It has at least 20 mentions of the word "QEMU"): https://lwn.net/Articles/805097/ -- A recap of KVM Forum 2019 Other than that, looks good; FWIW: Reviewed-by: Kashyap Chamarthy > _posts/2019-11-21-kvm-forum.md | 45 ++ > 1 file changed, 45 insertions(+) > create mode 100644 _posts/2019-11-21-kvm-forum.md > > diff --git a/_posts/2019-11-21-kvm-forum.md b/_posts/2019-11-21-kvm-forum.md > new file mode 100644 > index 000..e5adf5d > --- /dev/null > +++ b/_posts/2019-11-21-kvm-forum.md > @@ -0,0 +1,45 @@ > +--- > +layout: post > +title: "Presentations from KVM Forum 2019" > +date: 2019-11-21 12:30:00 +0100 > +author: Thomas Huth > +categories: [presentations, conferences] > +--- > +Last month, the > +[KVM Forum 2019](https://events.linuxfoundation.org/events/kvm-forum-2019/) > +took place in Lyon, France. The conference also featured quite a lot talks > +about QEMU, and now the videos of the presentation are available online. > +So for those who could not attend, here is the list of the QEMU-related > +presentations: > + > +* [QEMU Status Report](https://www.youtube.com/watch?v=6_1QQaXPjd4) > + by Paolo Bonzini > + > +* [The Hype Around the RISC-V > + Hypervisor](https://www.youtube.com/watch?v=2MUka4lKGFU) by Alistair > Francis > + and Anup Patel > + > +* [Reworking the Inter-VM Shared Memory > + Device](https://www.youtube.com/watch?v=TiZrngLUFMA) by Jan Kiszka > + > +* [What's Going On? Taking Advantage of TCG's Total System > + Awareness](https://www.youtube.com/watch?v=wxIF0dvGDuM) by Alex Bennée > + > +* [Towards the Higher Level Debugging with > + QEMU](https://www.youtube.com/watch?v=E2yJL82gJYM) by Pavel Dovgalyuk > + > +* [QEMU-Hexagon: Automatic Translation of the ISA Manual Pseudcode to Tiny > Code > + Instructions of a VLIW > Architecture](https://www.youtube.com/watch?v=3EpnTYBOXCI) > + by Niccolò Izzo and Taylor Simpson > + > +* [Reports of my Bloat Have Been Greatly > + Exaggerated](https://www.youtube.com/watch?v=5TY7m1AneRY) by Paolo Bonzini > + > +* [Multi-process QEMU - Status > Update](https://www.youtube.com/watch?v=lslVYCuk4CQ) > + by John Johnson and Elena Ufimtseva > + > +* [Bring QEMU to Micro Service > World](https://www.youtube.com/watch?v=5hIDwkpXUiw) > + by Xiao Guangrong and Zhang Yulei > + > +More interesting virtualization-related talks can be found in the > +[KVM Forum Youtube > Channel](https://www.youtube.com/channel/UCRCSQmAOh7yzgheq-emy1xA). > -- > 2.23.0 > > -- /kashyap
[PATCH v4 0/5] blockdev: avoid acquiring AioContext lock twice at do_drive_backup and do_blockdev_backup
do_drive_backup() acquires the AioContext lock of the corresponding BlockDriverState. This is not a problem when it's called from qmp_drive_backup(), but drive_backup_prepare() also acquires the lock before calling it. The same things happens with do_blockdev_backup() and blockdev_backup_prepare(). This patch series merges do_drive_backup() with drive_backup_prepare() and do_blockdev_backup() with blockdev_backup_prepare(), and ensures they're only getting called from a transaction context. This way, there's a single code path for both transaction requests and qmp commands, as suggested by Kevin Wolf. We also take this opportunity to ensure we're honoring the context acquisition semantics required by bdrv_try_set_aio_context, as suggested by Max Reitz. --- Changelog: v4: - Unify patches 1-4 and 5-7 to avoid producing broken interim states. (thanks Max Reitz) - Include a fix for iotest 141. (thanks Kevin Wolf) v3: - Rework the whole patch series to fix the issue by consolidating all operations in the transaction model. (thanks Kevin Wolf) v2: - Honor bdrv_try_set_aio_context() context acquisition requirements (thanks Max Reitz). - Release the context at drive_backup_prepare() instead of avoiding re-acquiring it at do_drive_baclup(). (thanks Max Reitz) - Convert a single patch into a two-patch series. --- Sergio Lopez (5): blockdev: fix coding style issues in drive_backup_prepare blockdev: unify qmp_drive_backup and drive-backup transaction paths blockdev: unify qmp_blockdev_backup and blockdev-backup transaction paths blockdev: honor bdrv_try_set_aio_context() context requirements iotests: fix 141 after qmp_drive_backup with transactions blockdev.c | 349 ++--- tests/qemu-iotests/141.out | 2 + 2 files changed, 173 insertions(+), 178 deletions(-) -- 2.23.0
[PATCH v4 2/5] blockdev: unify qmp_drive_backup and drive-backup transaction paths
Issuing a drive-backup from qmp_drive_backup takes a slightly different path than when it's issued from a transaction. In the code, this is manifested as some redundancy between do_drive_backup() and drive_backup_prepare(). This change unifies both paths, merging do_drive_backup() and drive_backup_prepare(), and changing qmp_drive_backup() to create a transaction instead of calling do_backup_common() direcly. As a side-effect, now qmp_drive_backup() is executed inside a drained section, as it happens when creating a drive-backup transaction. This change is visible from the user's perspective, as the job gets paused and immediately resumed before starting the actual work. Signed-off-by: Sergio Lopez --- blockdev.c | 224 - 1 file changed, 100 insertions(+), 124 deletions(-) diff --git a/blockdev.c b/blockdev.c index 553e315972..5e85fc042e 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1761,39 +1761,128 @@ typedef struct DriveBackupState { BlockJob *job; } DriveBackupState; -static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, -Error **errp); +static BlockJob *do_backup_common(BackupCommon *backup, + BlockDriverState *bs, + BlockDriverState *target_bs, + AioContext *aio_context, + JobTxn *txn, Error **errp); static void drive_backup_prepare(BlkActionState *common, Error **errp) { DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common); -BlockDriverState *bs; DriveBackup *backup; +BlockDriverState *bs; +BlockDriverState *target_bs; +BlockDriverState *source = NULL; AioContext *aio_context; +QDict *options; Error *local_err = NULL; +int flags; +int64_t size; +bool set_backing_hd = false; assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP); backup = common->action->u.drive_backup.data; +if (!backup->has_mode) { +backup->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; +} + bs = bdrv_lookup_bs(backup->device, backup->device, errp); if (!bs) { return; } +if (!bs->drv) { +error_setg(errp, "Device has no medium"); +return; +} + aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); /* Paired with .clean() */ bdrv_drained_begin(bs); -state->bs = bs; +if (!backup->has_format) { +backup->format = backup->mode == NEW_IMAGE_MODE_EXISTING ? + NULL : (char *) bs->drv->format_name; +} + +/* Early check to avoid creating target */ +if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) { +goto out; +} + +flags = bs->open_flags | BDRV_O_RDWR; + +/* + * See if we have a backing HD we can use to create our new image + * on top of. + */ +if (backup->sync == MIRROR_SYNC_MODE_TOP) { +source = backing_bs(bs); +if (!source) { +backup->sync = MIRROR_SYNC_MODE_FULL; +} +} +if (backup->sync == MIRROR_SYNC_MODE_NONE) { +source = bs; +flags |= BDRV_O_NO_BACKING; +set_backing_hd = true; +} + +size = bdrv_getlength(bs); +if (size < 0) { +error_setg_errno(errp, -size, "bdrv_getlength failed"); +goto out; +} + +if (backup->mode != NEW_IMAGE_MODE_EXISTING) { +assert(backup->format); +if (source) { +bdrv_refresh_filename(source); +bdrv_img_create(backup->target, backup->format, source->filename, +source->drv->format_name, NULL, +size, flags, false, &local_err); +} else { +bdrv_img_create(backup->target, backup->format, NULL, NULL, NULL, +size, flags, false, &local_err); +} +} -state->job = do_drive_backup(backup, common->block_job_txn, &local_err); if (local_err) { error_propagate(errp, local_err); goto out; } +options = qdict_new(); +qdict_put_str(options, "discard", "unmap"); +qdict_put_str(options, "detect-zeroes", "unmap"); +if (backup->format) { +qdict_put_str(options, "driver", backup->format); +} + +target_bs = bdrv_open(backup->target, NULL, options, flags, errp); +if (!target_bs) { +goto out; +} + +if (set_backing_hd) { +bdrv_set_backing_hd(target_bs, source, &local_err); +if (local_err) { +goto unref; +} +} + +state->bs = bs; + +state->job = do_backup_common(qapi_DriveBackup_base(backup), + bs, target_bs, aio_context, + common->block_job_txn, errp); + +unref: +bdrv_unref(target_bs); out: aio_context_release(aio_context); } @@ -3587,126 +3676,13 @
[PATCH v4 1/5] blockdev: fix coding style issues in drive_backup_prepare
Fix a couple of minor coding style issues in drive_backup_prepare. Signed-off-by: Sergio Lopez --- blockdev.c | 8 +--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/blockdev.c b/blockdev.c index 8e029e9c01..553e315972 100644 --- a/blockdev.c +++ b/blockdev.c @@ -3620,7 +3620,7 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, if (!backup->has_format) { backup->format = backup->mode == NEW_IMAGE_MODE_EXISTING ? - NULL : (char*) bs->drv->format_name; + NULL : (char *) bs->drv->format_name; } /* Early check to avoid creating target */ @@ -3630,8 +3630,10 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, flags = bs->open_flags | BDRV_O_RDWR; -/* See if we have a backing HD we can use to create our new image - * on top of. */ +/* + * See if we have a backing HD we can use to create our new image + * on top of. + */ if (backup->sync == MIRROR_SYNC_MODE_TOP) { source = backing_bs(bs); if (!source) { -- 2.23.0
[PATCH v4 5/5] iotests: fix 141 after qmp_drive_backup with transactions
qmp_drive_backup now creates and starts a transactions, which implies that the job will transition to pause and running twice. Fix test 141 to be aware of this change. Signed-off-by: Sergio Lopez --- tests/qemu-iotests/141.out | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/qemu-iotests/141.out b/tests/qemu-iotests/141.out index 3645675ce8..263b680bdf 100644 --- a/tests/qemu-iotests/141.out +++ b/tests/qemu-iotests/141.out @@ -13,6 +13,8 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/m. Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT backing_fmt=IMGFMT {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "job0"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} {'execute': 'blockdev-del', 'arguments': {'node-name': 'drv0'}} {"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: node is used as backing hd of 'NODE_NAME'"}} {'execute': 'block-job-cancel', 'arguments': {'device': 'job0'}} -- 2.23.0
Re: [qemu-web PATCH] Add a blog post about the QEMU-related presentation of KVM Forum 2019
On 11/21/19 5:52 AM, Thomas Huth wrote: There have been quite a lot of QEMU-related talks at KVM Forum this year - let's provide a summary for the people who could not attend. Signed-off-by: Thomas Huth --- Note: For some talks it's hard to decide whether they really fit the QEMU blog or not. I've assembled the list below by quickly skimming through the schedule and the videos, and that's what I came up with ... If you think any of the other talks should be mentioned here, too, please let me know. Perhaps: [Making the Most of NBD](https://www.youtube.com/watch?v=PMa6KFX9AxM) by Eric Blake and Richard Jones which mentions optimizations to 'qemu-img convert' made possible through NBD protocol extensions -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3226 Virtualization: qemu.org | libvirt.org
[PATCH v4 3/5] blockdev: unify qmp_blockdev_backup and blockdev-backup transaction paths
Issuing a blockdev-backup from qmp_blockdev_backup takes a slightly different path than when it's issued from a transaction. In the code, this is manifested as some redundancy between do_blockdev_backup() and blockdev_backup_prepare(). This change unifies both paths, merging do_blockdev_backup() and blockdev_backup_prepare(), and changing qmp_blockdev_backup() to create a transaction instead of calling do_backup_common() direcly. As a side-effect, now qmp_blockdev_backup() is executed inside a drained section, as it happens when creating a blockdev-backup transaction. This change is visible from the user's perspective, as the job gets paused and immediately resumed before starting the actual work. Signed-off-by: Sergio Lopez --- blockdev.c | 60 -- 1 file changed, 13 insertions(+), 47 deletions(-) diff --git a/blockdev.c b/blockdev.c index 5e85fc042e..152a0f7454 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1940,16 +1940,13 @@ typedef struct BlockdevBackupState { BlockJob *job; } BlockdevBackupState; -static BlockJob *do_blockdev_backup(BlockdevBackup *backup, JobTxn *txn, -Error **errp); - static void blockdev_backup_prepare(BlkActionState *common, Error **errp) { BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common); BlockdevBackup *backup; -BlockDriverState *bs, *target; +BlockDriverState *bs; +BlockDriverState *target_bs; AioContext *aio_context; -Error *local_err = NULL; assert(common->action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP); backup = common->action->u.blockdev_backup.data; @@ -1959,8 +1956,8 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp) return; } -target = bdrv_lookup_bs(backup->target, backup->target, errp); -if (!target) { +target_bs = bdrv_lookup_bs(backup->target, backup->target, errp); +if (!target_bs) { return; } @@ -1971,13 +1968,10 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp) /* Paired with .clean() */ bdrv_drained_begin(state->bs); -state->job = do_blockdev_backup(backup, common->block_job_txn, &local_err); -if (local_err) { -error_propagate(errp, local_err); -goto out; -} +state->job = do_backup_common(qapi_BlockdevBackup_base(backup), + bs, target_bs, aio_context, + common->block_job_txn, errp); -out: aio_context_release(aio_context); } @@ -3695,41 +3689,13 @@ XDbgBlockGraph *qmp_x_debug_query_block_graph(Error **errp) return bdrv_get_xdbg_block_graph(errp); } -BlockJob *do_blockdev_backup(BlockdevBackup *backup, JobTxn *txn, - Error **errp) +void qmp_blockdev_backup(BlockdevBackup *backup, Error **errp) { -BlockDriverState *bs; -BlockDriverState *target_bs; -AioContext *aio_context; -BlockJob *job; - -bs = bdrv_lookup_bs(backup->device, backup->device, errp); -if (!bs) { -return NULL; -} - -target_bs = bdrv_lookup_bs(backup->target, backup->target, errp); -if (!target_bs) { -return NULL; -} - -aio_context = bdrv_get_aio_context(bs); -aio_context_acquire(aio_context); - -job = do_backup_common(qapi_BlockdevBackup_base(backup), - bs, target_bs, aio_context, txn, errp); - -aio_context_release(aio_context); -return job; -} - -void qmp_blockdev_backup(BlockdevBackup *arg, Error **errp) -{ -BlockJob *job; -job = do_blockdev_backup(arg, NULL, errp); -if (job) { -job_start(&job->job); -} +TransactionAction action = { +.type = TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP, +.u.blockdev_backup.data = backup, +}; +blockdev_do_action(&action, errp); } /* Parameter check and block job starting for drive mirroring. -- 2.23.0
Re: [PATCH v4 21/37] sm501: make SerialMM a child, export chardev property
On Wed, 20 Nov 2019 at 15:31, Marc-André Lureau wrote: > > Embed the SerialMM sybus device, and re-export its "chardev" property. > That way, we can get rid of PROP_PTR "chr-state" and better track > devices relationship. > > Signed-off-by: Marc-André Lureau > --- > hw/display/sm501.c | 33 - > hw/sh4/r2d.c | 2 +- > 2 files changed, 25 insertions(+), 10 deletions(-) > > diff --git a/hw/display/sm501.c b/hw/display/sm501.c > index 1f33c87e65..c4445b28f9 100644 > --- a/hw/display/sm501.c > +++ b/hw/display/sm501.c > @@ -1930,13 +1930,14 @@ typedef struct { > SM501State state; > uint32_t vram_size; > uint32_t base; > -void *chr_state; > +SerialMM serial; > } SM501SysBusState; > > static void sm501_realize_sysbus(DeviceState *dev, Error **errp) > { > SM501SysBusState *s = SYSBUS_SM501(dev); > SysBusDevice *sbd = SYS_BUS_DEVICE(dev); > +SerialState *ss = &s->serial.serial; > DeviceState *usb_dev; > > sm501_init(&s->state, dev, s->vram_size); > @@ -1958,17 +1959,19 @@ static void sm501_realize_sysbus(DeviceState *dev, > Error **errp) > sysbus_pass_irq(sbd, SYS_BUS_DEVICE(usb_dev)); > > /* bridge to serial emulation module */ > -if (s->chr_state) { > -serial_mm_init(&s->state.mmio_region, SM501_UART0, 2, > - NULL, /* TODO : chain irq to IRL */ > - 115200, s->chr_state, DEVICE_LITTLE_ENDIAN); > +/* FIXME: SM501_UART0 is always mapped, no need to check for the backend > */ > +if (qemu_chr_fe_backend_connected(&ss->chr)) { > +MemoryRegion *mr; > +qdev_init_nofail(DEVICE(&s->serial)); > +mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->serial), 0); > +memory_region_add_subregion(&s->state.mmio_region, SM501_UART0, mr); > +/* TODO : chain irq to IRL */ > } I don't really understand what the FIXME is trying to tell me here. If we don't need to check for the backend, why is the code checking for it ? It means we have to fish around inside the SerialMM's implementation, which seems odd. Only mapping the UART registers if there happens to be a backend connected also doesn't conceptually seem like the right behaviour, because the registers should always exist. Since commit 12051d82f004024d5d the chardev mid-layer has correctly handled the backend not being connected (ie having a NULL chardev), so there's no longer any need for board/device code to special case the lack of a chardev. > } > > static Property sm501_sysbus_properties[] = { > DEFINE_PROP_UINT32("vram-size", SM501SysBusState, vram_size, 0), > DEFINE_PROP_UINT32("base", SM501SysBusState, base, 0), > -DEFINE_PROP_PTR("chr-state", SM501SysBusState, chr_state), > DEFINE_PROP_END_OF_LIST(), > }; > > @@ -1999,9 +2002,20 @@ static void sm501_sysbus_class_init(ObjectClass > *klass, void *data) > dc->props = sm501_sysbus_properties; > dc->reset = sm501_reset_sysbus; > dc->vmsd = &vmstate_sm501_sysbus; > -/* Note: pointer property "chr-state" may remain null, thus > - * no need for dc->user_creatable = false; > - */ > +} > + > +static void sm501_sysbus_init(Object *o) > +{ > +SM501SysBusState *sm501 = SYSBUS_SM501(o); > +SerialMM *smm = &sm501->serial; > + > +sysbus_init_child_obj(o, "serial", smm, sizeof(SerialMM), > TYPE_SERIAL_MM); > +qdev_set_legacy_instance_id(DEVICE(smm), SM501_UART0, 2); The only board we use the sm501 sysbus device is the sh4 r2d board, and we don't care about migration compatibility there (indeed I would be unsurprised to find that it doesn't even work ;-)) So I think we can reasonably not set the legacy-instance-ID and just declare in the commit message that this is a migration compat break for that board. > +qdev_prop_set_uint8(DEVICE(smm), "regshift", 2); > +qdev_prop_set_uint8(DEVICE(smm), "endianness", DEVICE_LITTLE_ENDIAN); > + > +object_property_add_alias(o, "chardev", > + OBJECT(smm), "chardev", &error_abort); > } > > static const TypeInfo sm501_sysbus_info = { > @@ -2009,6 +2023,7 @@ static const TypeInfo sm501_sysbus_info = { > .parent= TYPE_SYS_BUS_DEVICE, > .instance_size = sizeof(SM501SysBusState), > .class_init= sm501_sysbus_class_init, > +.instance_init = sm501_sysbus_init, > }; > > #define TYPE_PCI_SM501 "sm501" > diff --git a/hw/sh4/r2d.c b/hw/sh4/r2d.c > index ee0840f380..72bb5285cc 100644 > --- a/hw/sh4/r2d.c > +++ b/hw/sh4/r2d.c > @@ -272,7 +272,7 @@ static void r2d_init(MachineState *machine) > busdev = SYS_BUS_DEVICE(dev); > qdev_prop_set_uint32(dev, "vram-size", SM501_VRAM_SIZE); > qdev_prop_set_uint32(dev, "base", 0x1000); > -qdev_prop_set_ptr(dev, "chr-state", serial_hd(2)); > +qdev_prop_set_chr(dev, "chardev", serial_hd(2)); > qdev_init_nofail(dev); > sysbus_mmio_map(busdev, 0, 0x1000); > sysbus_mmio_map(busdev, 1, 0x13e000
[PATCH v4 4/5] blockdev: honor bdrv_try_set_aio_context() context requirements
bdrv_try_set_aio_context() requires that the old context is held, and the new context is not held. Fix all the occurrences where it's not done this way. Suggested-by: Max Reitz Signed-off-by: Sergio Lopez --- blockdev.c | 67 ++ 1 file changed, 58 insertions(+), 9 deletions(-) diff --git a/blockdev.c b/blockdev.c index 152a0f7454..b0647d8d33 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1535,6 +1535,7 @@ static void external_snapshot_prepare(BlkActionState *common, DO_UPCAST(ExternalSnapshotState, common, common); TransactionAction *action = common->action; AioContext *aio_context; +AioContext *old_context; int ret; /* 'blockdev-snapshot' and 'blockdev-snapshot-sync' have similar @@ -1675,11 +1676,20 @@ static void external_snapshot_prepare(BlkActionState *common, goto out; } +/* Honor bdrv_try_set_aio_context() context acquisition requirements. */ +old_context = bdrv_get_aio_context(state->new_bs); +aio_context_release(aio_context); +aio_context_acquire(old_context); + ret = bdrv_try_set_aio_context(state->new_bs, aio_context, errp); if (ret < 0) { -goto out; +aio_context_release(old_context); +return; } +aio_context_release(old_context); +aio_context_acquire(aio_context); + /* This removes our old bs and adds the new bs. This is an operation that * can fail, so we need to do it in .prepare; undoing it for abort is * always possible. */ @@ -1775,11 +1785,13 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp) BlockDriverState *target_bs; BlockDriverState *source = NULL; AioContext *aio_context; +AioContext *old_context; QDict *options; Error *local_err = NULL; int flags; int64_t size; bool set_backing_hd = false; +int ret; assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP); backup = common->action->u.drive_backup.data; @@ -1868,6 +1880,20 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp) goto out; } +/* Honor bdrv_try_set_aio_context() context acquisition requirements. */ +old_context = bdrv_get_aio_context(target_bs); +aio_context_release(aio_context); +aio_context_acquire(old_context); + +ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); +if (ret < 0) { +aio_context_release(old_context); +return; + } + +aio_context_release(old_context); +aio_context_acquire(aio_context); + if (set_backing_hd) { bdrv_set_backing_hd(target_bs, source, &local_err); if (local_err) { @@ -1947,6 +1973,8 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp) BlockDriverState *bs; BlockDriverState *target_bs; AioContext *aio_context; +AioContext *old_context; +int ret; assert(common->action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP); backup = common->action->u.blockdev_backup.data; @@ -1961,7 +1989,18 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp) return; } +/* Honor bdrv_try_set_aio_context() context acquisition requirements. */ aio_context = bdrv_get_aio_context(bs); +old_context = bdrv_get_aio_context(target_bs); +aio_context_acquire(old_context); + +ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); +if (ret < 0) { +aio_context_release(old_context); +return; +} + +aio_context_release(old_context); aio_context_acquire(aio_context); state->bs = bs; @@ -3562,7 +3601,6 @@ static BlockJob *do_backup_common(BackupCommon *backup, BlockJob *job = NULL; BdrvDirtyBitmap *bmap = NULL; int job_flags = JOB_DEFAULT; -int ret; if (!backup->has_speed) { backup->speed = 0; @@ -3586,11 +3624,6 @@ static BlockJob *do_backup_common(BackupCommon *backup, backup->compress = false; } -ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); -if (ret < 0) { -return NULL; -} - if ((backup->sync == MIRROR_SYNC_MODE_BITMAP) || (backup->sync == MIRROR_SYNC_MODE_INCREMENTAL)) { /* done before desugaring 'incremental' to print the right message */ @@ -3825,6 +3858,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) BlockDriverState *bs; BlockDriverState *source, *target_bs; AioContext *aio_context; +AioContext *old_context; BlockMirrorBackingMode backing_mode; Error *local_err = NULL; QDict *options = NULL; @@ -3937,12 +3971,22 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) (arg->mode == NEW_IMAGE_MODE_EXISTING || !bdrv_has_zero_init(target_bs))); + +/* Honor bdrv_try_set_aio_context() context acquisition requirements. */ +old_context = bdrv_ge
Re: [PATCH 08/15] s390x: protvirt: KVM intercept changes
On Wed, 20 Nov 2019 06:43:27 -0500 Janosch Frank wrote: > Secure guests no longer intercept with code 4 for an instruction > interception. Instead they have codes 104 and 108 for secure > instruction interception and secure instruction notification > respectively. > > The 104 mirrors the 4, but the 108 is a notification, that something > happened and the hypervisor might need to adjust its tracking data to > that fact. An example for that is the set prefix notification > interception, where KVM only reads the new prefix, but does not update > the prefix in the state description. > > Signed-off-by: Janosch Frank > --- > target/s390x/kvm.c | 6 ++ > 1 file changed, 6 insertions(+) > > diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c > index 418154ccfe..58251c0229 100644 > --- a/target/s390x/kvm.c > +++ b/target/s390x/kvm.c > @@ -115,6 +115,8 @@ > #define ICPT_CPU_STOP 0x28 > #define ICPT_OPEREXC0x2c > #define ICPT_IO 0x40 > +#define ICPT_PV_INSTR 0x68 > +#define ICPT_PV_INSTR_NOT 0x6c _NOTIF ? > > #define NR_LOCAL_IRQS 32 > /* > @@ -151,6 +153,7 @@ static int cap_s390_irq; > static int cap_ri; > static int cap_gs; > static int cap_hpage_1m; > +static int cap_protvirt; > > static int active_cmma; > > @@ -336,6 +339,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) > cap_async_pf = kvm_check_extension(s, KVM_CAP_ASYNC_PF); > cap_mem_op = kvm_check_extension(s, KVM_CAP_S390_MEM_OP); > cap_s390_irq = kvm_check_extension(s, KVM_CAP_S390_INJECT_IRQ); > +cap_protvirt = kvm_check_extension(s, KVM_CAP_S390_PROTECTED); You don't seem to do anything with this yet? > > if (!kvm_check_extension(s, KVM_CAP_S390_GMAP) > || !kvm_check_extension(s, KVM_CAP_S390_COW)) { > @@ -1664,6 +1668,8 @@ static int handle_intercept(S390CPU *cpu) > (long)cs->kvm_run->psw_addr); > switch (icpt_code) { > case ICPT_INSTRUCTION: > +case ICPT_PV_INSTR: > +case ICPT_PV_INSTR_NOT: > r = handle_instruction(cpu, run); Doesn't handle_instruction() want to know whether it got a request for emulation vs a notification? > break; > case ICPT_PROGRAM:
Re: [PATCH 07/15] s390x: protvirt: Handle diag 308 subcodes 0,1,3,4
On 11/21/19 2:50 PM, Cornelia Huck wrote: > On Wed, 20 Nov 2019 06:43:26 -0500 > Janosch Frank wrote: > >> Now that we know the protection state off the cpus, we can start >> handling all diag 308 subcodes in the protected state. > > "As we now have access to the protection state of the cpus, we can > implement special handling of diag 308 subcodes for cpus in the > protected state." > > ? Sure > >> >> For subcodes 0 and 1 we need to unshare all pages before continuing, >> so the guest doesn't accidently expose data when dumping. >> >> For subcode 3/4 we tear down the protected VM and reboot into >> unprotected mode. We do not provide a secure reboot. >> >> Before we can do the unshare calls, we need to mark all cpus as >> stopped. >> >> Signed-off-by: Janosch Frank >> --- >> hw/s390x/s390-virtio-ccw.c | 30 +++--- >> target/s390x/diag.c| 4 >> 2 files changed, 31 insertions(+), 3 deletions(-) >> >> diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c >> index 7262453616..6fd50b4c42 100644 >> --- a/hw/s390x/s390-virtio-ccw.c >> +++ b/hw/s390x/s390-virtio-ccw.c >> @@ -319,11 +319,26 @@ static inline void s390_do_cpu_ipl(CPUState *cs, >> run_on_cpu_data arg) >> s390_cpu_set_state(S390_CPU_STATE_OPERATING, cpu); >> } >> >> +static void s390_pv_prepare_reset(CPUS390XState *env) >> +{ >> +CPUState *cs; >> + >> +if (!env->pv) { >> +return; >> +} >> +CPU_FOREACH(cs) { >> +s390_cpu_set_state(S390_CPU_STATE_STOPPED, S390_CPU(cs)); >> +} >> +s390_pv_unshare(); >> +s390_pv_perf_clear_reset(); >> +} >> + >> static void s390_machine_reset(MachineState *machine) >> { >> enum s390_reset reset_type; >> CPUState *cs, *t; >> S390CPU *cpu; >> +CPUS390XState *env; >> >> /* get the reset parameters, reset them once done */ >> s390_ipl_get_reset_request(&cs, &reset_type); >> @@ -332,29 +347,38 @@ static void s390_machine_reset(MachineState *machine) >> s390_cmma_reset(); >> >> cpu = S390_CPU(cs); >> +env = &cpu->env; >> >> switch (reset_type) { >> case S390_RESET_MODIFIED_CLEAR: /* Subcode 0 */ >> +subsystem_reset(); >> +s390_crypto_reset(); >> +s390_pv_prepare_reset(env); >> CPU_FOREACH(t) { >> run_on_cpu(t, s390_do_cpu_full_reset, RUN_ON_CPU_NULL); >> } >> -subsystem_reset(); >> -s390_crypto_reset(); > > This also switches the order in which different parts are reset for > normal guests. I presume that order doesn't matter here? I don't think that the order is specified in the POP, it is however specified for protected VMs... > >> run_on_cpu(cs, s390_do_cpu_load_normal, RUN_ON_CPU_NULL); >> break; >> case S390_RESET_LOAD_NORMAL: /* Subcode 1*/ > > missing blank before */ (introduced in a previous patch) Will fix > > >> +subsystem_reset(); >> +s390_pv_prepare_reset(env); >> CPU_FOREACH(t) { >> if (t == cs) { >> continue; >> } >> run_on_cpu(t, s390_do_cpu_reset, RUN_ON_CPU_NULL); >> } >> -subsystem_reset(); >> run_on_cpu(cs, s390_do_cpu_initial_reset, RUN_ON_CPU_NULL); >> run_on_cpu(cs, s390_do_cpu_load_normal, RUN_ON_CPU_NULL); >> break; >> case S390_RESET_EXTERNAL: > > Annotate this with the subcode as well? (in the patch introducing it) Sure > >> case S390_RESET_REIPL: /* Subcode 4 */ >> +if (env->pv) { >> +CPU_FOREACH(t) { >> +s390_pv_vcpu_destroy(t); >> +} >> +s390_pv_vm_destroy(); >> +} >> qemu_devices_reset(); >> s390_crypto_reset(); >> /* configure and start the ipl CPU only */ >> diff --git a/target/s390x/diag.c b/target/s390x/diag.c >> index 32049bb4ee..db6d79cef3 100644 >> --- a/target/s390x/diag.c >> +++ b/target/s390x/diag.c >> @@ -68,6 +68,10 @@ int handle_diag_288(CPUS390XState *env, uint64_t r1, >> uint64_t r3) >> static int diag308_parm_check(CPUS390XState *env, uint64_t r1, uint64_t >> addr, >>uintptr_t ra, bool write) >> { >> +/* Handled by the Ultravisor */ >> +if (env->pv) { >> +return 0; >> +} >> if ((r1 & 1) || (addr & ~TARGET_PAGE_MASK)) { >> s390_program_interrupt(env, PGM_SPECIFICATION, ra); >> return -EINVAL; > signature.asc Description: OpenPGP digital signature
Re: [PATCH 07/15] s390x: protvirt: Handle diag 308 subcodes 0,1,3,4
On 11/21/19 2:50 PM, Cornelia Huck wrote: > On Wed, 20 Nov 2019 06:43:26 -0500 > Janosch Frank wrote: > >> run_on_cpu(cs, s390_do_cpu_load_normal, RUN_ON_CPU_NULL); >> break; >> case S390_RESET_LOAD_NORMAL: /* Subcode 1*/ > > missing blank before */ (introduced in a previous patch) > > >> +subsystem_reset(); >> +s390_pv_prepare_reset(env); >> CPU_FOREACH(t) { >> if (t == cs) { >> continue; >> } >> run_on_cpu(t, s390_do_cpu_reset, RUN_ON_CPU_NULL); >> } >> -subsystem_reset(); >> run_on_cpu(cs, s390_do_cpu_initial_reset, RUN_ON_CPU_NULL); >> run_on_cpu(cs, s390_do_cpu_load_normal, RUN_ON_CPU_NULL); >> break; >> case S390_RESET_EXTERNAL: > > Annotate this with the subcode as well? (in the patch introducing it) I think this has no diag 308 subcode and is triggered by qemu signature.asc Description: OpenPGP digital signature
Re: [PATCH for-4.2? v2 0/6] block: Fix resize (extending) of short overlays
On Wed, Nov 20, 2019 at 07:44:55PM +0100, Kevin Wolf wrote: > See patch 2 for the description of the bug fixed. > > v2: > - Switched order of bs->total_sectors update and zero write [Vladimir] > - Fixed coding style [Vladimir] > - Changed the commit message to contain what was in the cover letter > - Test all preallocation modes > - Test allocation status with qemu-io 'map' [Vladimir] > > Kevin Wolf (6): > block: bdrv_co_do_pwrite_zeroes: 64 bit 'bytes' parameter > block: truncate: Don't make backing file data visible > iotests: Add qemu_io_log() > iotests: Fix timeout in run_job() > iotests: Support job-complete in run_job() > iotests: Test committing to short backing file > > block/io.c| 38 +- > tests/qemu-iotests/274| 141 + > tests/qemu-iotests/274.out| 227 ++ > tests/qemu-iotests/group | 1 + > tests/qemu-iotests/iotests.py | 11 +- > 5 files changed, 413 insertions(+), 5 deletions(-) > create mode 100755 tests/qemu-iotests/274 > create mode 100644 tests/qemu-iotests/274.out > > -- > 2.20.1 > > Reviewed-by: Stefan Hajnoczi signature.asc Description: PGP signature
Re: PCI memory sync question (kvm,dpdk,e1000,packet stalled)
On Wed, Nov 20, 2019 at 08:36:32PM +0300, ASM wrote: > I trying solve the problem, with packets stopping (e1000,tap,kvm). > My studies led to the following: > 1. From flatview_write_continue() I see, what e1000 writes the number > "7" to the STAT register. > 2. The driver from target OS reads STAT register with number "7" and > writes to the register the number "0". > 3. From flatview_write_continue() (I make edits): > memcpy(ptr, buf, l); > new1=ptr[0xc]; > usleep(100); > new2=ptr[0xc]; > invalidate_and_set_dirty(mr, addr1, l); > new3=ptr[0xc]; > printf("Old: %i, new1, %i, new2: %i, new3: %i\n", old,new1,new2,new3); > > I see what memory in first printf is "7", but after usleep() is "0". > Do I understand correctly that this should not be? Or RCU lock > suggests the ability to the multiple writers? > > The problem is that qemu(e1000) writes the number 7, after which > target(dpdk driver) reads 7, on the basis of this it writes the number > 0, but as a result (extremely rarely), the value STATUS still remains > 7. Therefore, packet processing is interrupted. This behavior is > observed only on kvm (it is not observed on tcg). > > Please help with advice or ideas. Hi Leonid, Could you be seeing weird behavior with KVM due to MMIO write coalescing? static void e1000_mmio_setup(E1000State *d) { int i; const uint32_t excluded_regs[] = { E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS, E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE }; memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d, "e1000-mmio", PNPMMIO_SIZE); memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]); for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++) memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4, excluded_regs[i+1] - excluded_regs[i] - 4); memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE); } MMIO write coalescing means that QEMU doesn't see the register writes immediately. Instead kvm.ko records them into a ring buffer and QEMU processes the ring when the next ioctl(KVM_RUN) exit occurs. See Linux Documentation/virt/kvm/api.txt "4.116 KVM_(UN)REGISTER_COALESCED_MMIO" for more details. I don't really understand your printf debugging explanation. It would help to see the DPDK code and the exact printf() output. Also, is DPDK accessing the e1000 device from more than 1 vCPU? Stefan signature.asc Description: PGP signature
Re: [PATCH for-4.2 2/2] i386: Add -noTSX aliases for hle=off, rtm=off CPU models
On Wed, Nov 20, 2019 at 01:49:12PM -0300, Eduardo Habkost wrote: > We have been trying to avoid adding new aliases for CPU model > versions, but in the case of changes in defaults introduced by > the TAA mitigation patches, the aliases might help avoid user > confusion when applying host software updates. > > Signed-off-by: Eduardo Habkost Tested-by: Kashyap Chamarthy https://lists.gnu.org/archive/html/qemu-devel/2019-11/msg03501.html - - - Should we (can do it, if you already don't have a patch WIP for it) also update this file to reflect this? https://git.qemu.org/?p=qemu.git;a=blob;f=docs/qemu-cpu-models.texi While at it ... I wonder if it's worth making a separte doc (versioned-cpu-models.rst) explaining the versioned CPU models, usage, etc. There was a very useful discussion between you and Dan Berrangé on this list (Message-Id: <20190625050008.12789-5-ehabk...@redhat.com>, the first version of the thread: "[PATCH 4/6] i386: Infrastructure for versioned CPU models"). Could potentially incorporate some of that content. > --- > target/i386/cpu.c | 5 + > 1 file changed, 5 insertions(+) [...] -- /kashyap
Re: [PATCH 09/15] s390x: protvirt: SCLP interpretation
On Wed, 20 Nov 2019 06:43:28 -0500 Janosch Frank wrote: > SCLP for a protected guest is done over the SIDAD, so we need to use > the s390_cpu_virt_mem_* functions to access the SIDAD instead of guest > memory when reading/writing SCBs. > > To not confuse the sclp emulation, we set 0x42000 as the address, but > ignore it for reading/writing the SCCB. > > Signed-off-by: Janosch Frank > --- > hw/s390x/sclp.c | 16 > include/hw/s390x/sclp.h | 2 ++ > target/s390x/kvm.c | 8 +++- > 3 files changed, 25 insertions(+), 1 deletion(-) > > diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c > index f57ce7b739..00d08adc7f 100644 > --- a/hw/s390x/sclp.c > +++ b/hw/s390x/sclp.c > @@ -193,6 +193,22 @@ static void sclp_execute(SCLPDevice *sclp, SCCB *sccb, > uint32_t code) > } > } > > +int sclp_service_call_protected(CPUS390XState *env, uint64_t sccb, > +uint32_t code) > +{ > +SCLPDevice *sclp = get_sclp_device(); > +SCLPDeviceClass *sclp_c = SCLP_GET_CLASS(sclp); > +SCCB work_sccb; > +hwaddr sccb_len = sizeof(SCCB); > + > +s390_cpu_virt_mem_read(env_archcpu(env), 0, 0, &work_sccb, sccb_len); > +sclp_c->execute(sclp, &work_sccb, code); > +s390_cpu_virt_mem_write(env_archcpu(env), 0, 0, &work_sccb, > +be16_to_cpu(work_sccb.h.length)); > +sclp_c->service_interrupt(sclp, sccb); > +return 0; > +} > + > int sclp_service_call(CPUS390XState *env, uint64_t sccb, uint32_t code) > { > SCLPDevice *sclp = get_sclp_device(); > diff --git a/include/hw/s390x/sclp.h b/include/hw/s390x/sclp.h > index c54413b78c..c0a3faa37d 100644 > --- a/include/hw/s390x/sclp.h > +++ b/include/hw/s390x/sclp.h > @@ -217,5 +217,7 @@ void s390_sclp_init(void); > void sclp_service_interrupt(uint32_t sccb); > void raise_irq_cpu_hotplug(void); > int sclp_service_call(CPUS390XState *env, uint64_t sccb, uint32_t code); > +int sclp_service_call_protected(CPUS390XState *env, uint64_t sccb, > +uint32_t code); > > #endif > diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c > index 58251c0229..0f2458b553 100644 > --- a/target/s390x/kvm.c > +++ b/target/s390x/kvm.c > @@ -1172,7 +1172,13 @@ static int kvm_sclp_service_call(S390CPU *cpu, struct > kvm_run *run, > sccb = env->regs[ipbh0 & 0xf]; > code = env->regs[(ipbh0 & 0xf0) >> 4]; > > -r = sclp_service_call(env, sccb, code); > +if (run->s390_sieic.icptcode == ICPT_PV_INSTR || > +run->s390_sieic.icptcode == ICPT_PV_INSTR_NOT) { > +r = sclp_service_call_protected(env, 0x42000, code); I fear that confuses the reader instead of the emulation :) Especially as you end up passing that value to sclp_c->service_interrupt()... > +} else { > +r = sclp_service_call(env, sccb, code); > +} > + > if (r < 0) { > kvm_s390_program_interrupt(cpu, -r); > } else {
Re: [PATCH v6] Implement backend program convention command for vhost-user-blk
On Thu, Nov 21, 2019 at 09:58:26AM +0800, Micky Yun Chan wrote: > diff --git a/docs/interop/vhost-user.json b/docs/interop/vhost-user.json > index da6aaf51c8..d25c3a957f 100644 > --- a/docs/interop/vhost-user.json > +++ b/docs/interop/vhost-user.json > @@ -54,6 +54,37 @@ >] > } > > +## > +# @VHostUserBackendBlockFeature: > +# > +# List of vhost user "block" features. > +# > +# @read-only: The --read-only command line option is supported. > +# @blk-file: The --blk-file command line option is supported. Please also update docs/interop/vhost-user.rst. That document duplicates the option documentation and it would be good to keep it up-to-date. > +# > +# Since: 4.0 QEMU 4.2 is close to being released and the codebase is frozen. The next release will be 5.0. Please change this to: Since: 5.0 > +## > +{ > + 'enum': 'VHostUserBackendBlockFeature', > + 'data': [ 'read-only', 'blk-file' ] > +} > + > +## > +# @VHostUserBackendCapabilitiesBlock: > +# > +# Capabilities reported by vhost user "block" backends > +# > +# @features: list of supported features. > +# > +# Since: 4.0 Same here. signature.asc Description: PGP signature
Re: [PATCH 07/15] s390x: protvirt: Handle diag 308 subcodes 0,1,3,4
On Thu, 21 Nov 2019 15:04:31 +0100 Janosch Frank wrote: > On 11/21/19 2:50 PM, Cornelia Huck wrote: > > On Wed, 20 Nov 2019 06:43:26 -0500 > > Janosch Frank wrote: > > > > >> run_on_cpu(cs, s390_do_cpu_load_normal, RUN_ON_CPU_NULL); > >> break; > >> case S390_RESET_LOAD_NORMAL: /* Subcode 1*/ > > > > missing blank before */ (introduced in a previous patch) > > > > > >> +subsystem_reset(); > >> +s390_pv_prepare_reset(env); > >> CPU_FOREACH(t) { > >> if (t == cs) { > >> continue; > >> } > >> run_on_cpu(t, s390_do_cpu_reset, RUN_ON_CPU_NULL); > >> } > >> -subsystem_reset(); > >> run_on_cpu(cs, s390_do_cpu_initial_reset, RUN_ON_CPU_NULL); > >> run_on_cpu(cs, s390_do_cpu_load_normal, RUN_ON_CPU_NULL); > >> break; > >> case S390_RESET_EXTERNAL: > > > > Annotate this with the subcode as well? (in the patch introducing it) > > I think this has no diag 308 subcode and is triggered by qemu -ENOCOFFE But even more reason to annotate this with the fact that this is triggered by QEMU :) pgp6k0jMr5wLx.pgp Description: OpenPGP digital signature
[PATCH v3] target/i386: add VMX features to named CPU models
This allows using "-cpu Haswell,+vmx", which we did not really want to support in QEMU but was produced by Libvirt when using the "host-model" CPU model. This was produced from the output of scripts/kvm/vmxcap using the following very ugly Python script: bits = { 'INS/OUTS instruction information': ['FEAT_VMX_BASIC', 'MSR_VMX_BASIC_INS_OUTS'], 'IA32_VMX_TRUE_*_CTLS support': ['FEAT_VMX_BASIC', 'MSR_VMX_BASIC_TRUE_CTLS'], 'External interrupt exiting': ['FEAT_VMX_PINBASED_CTLS', 'VMX_PIN_BASED_EXT_INTR_MASK'], 'NMI exiting': ['FEAT_VMX_PINBASED_CTLS', 'VMX_PIN_BASED_NMI_EXITING'], 'Virtual NMIs': ['FEAT_VMX_PINBASED_CTLS', 'VMX_PIN_BASED_VIRTUAL_NMIS'], 'Activate VMX-preemption timer': ['FEAT_VMX_PINBASED_CTLS', 'VMX_PIN_BASED_VMX_PREEMPTION_TIMER'], 'Process posted interrupts': ['FEAT_VMX_PINBASED_CTLS', 'VMX_PIN_BASED_POSTED_INTR'], 'Interrupt window exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_VIRTUAL_INTR_PENDING'], 'Use TSC offsetting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_USE_TSC_OFFSETING'], 'HLT exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_HLT_EXITING'], 'INVLPG exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_INVLPG_EXITING'], 'MWAIT exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_MWAIT_EXITING'], 'RDPMC exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_RDPMC_EXITING'], 'RDTSC exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_RDTSC_EXITING'], 'CR3-load exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_CR3_LOAD_EXITING'], 'CR3-store exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_CR3_STORE_EXITING'], 'CR8-load exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_CR8_LOAD_EXITING'], 'CR8-store exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_CR8_STORE_EXITING'], 'Use TPR shadow': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_TPR_SHADOW'], 'NMI-window exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_VIRTUAL_NMI_PENDING'], 'MOV-DR exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_MOV_DR_EXITING'], 'Unconditional I/O exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_UNCOND_IO_EXITING'], 'Use I/O bitmaps': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_USE_IO_BITMAPS'], 'Monitor trap flag': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_MONITOR_TRAP_FLAG'], 'Use MSR bitmaps': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_USE_MSR_BITMAPS'], 'MONITOR exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_MONITOR_EXITING'], 'PAUSE exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_PAUSE_EXITING'], 'Activate secondary control': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS'], 'Virtualize APIC accesses': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES'], 'Enable EPT': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_ENABLE_EPT'], 'Descriptor-table exiting': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_DESC'], 'Enable RDTSCP': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_RDTSCP'], 'Virtualize x2APIC mode': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE'], 'Enable VPID': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_ENABLE_VPID'], 'WBINVD exiting': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_WBINVD_EXITING'], 'Unrestricted guest': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST'], 'APIC register emulation': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT'], 'Virtual interrupt delivery': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY'], 'PAUSE-loop exiting': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_PAUSE_LOOP_EXITING'], 'RDRAND exiting': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_RDRAND_EXITING'], 'Enable INVPCID': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_ENABLE_INVPCID'], 'Enable VM functions': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_ENABLE_VMFUNC'], 'VMCS shadowing': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_SHADOW_VMCS'], 'RDSEED exiting': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_RDSEED_EXITING'], 'Enable PML': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_ENABLE_PML'], 'Enable XSAVES/XRSTORS': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_XSAVES'], 'Save debug controls': ['FEAT_VMX_EXIT_CTLS', 'VMX_VM_EXIT_SAVE_DEBUG_CONTROLS'], 'Load IA32_PERF_GLOBAL_CTRL': ['FEAT_VMX_EXIT_CTLS', 'VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL'], 'Acknowledge interrupt on
Re: [PATCH 07/15] s390x: protvirt: Handle diag 308 subcodes 0,1,3,4
On 11/21/19 3:17 PM, Cornelia Huck wrote: > On Thu, 21 Nov 2019 15:04:31 +0100 > Janosch Frank wrote: > >> On 11/21/19 2:50 PM, Cornelia Huck wrote: >>> On Wed, 20 Nov 2019 06:43:26 -0500 >>> Janosch Frank wrote: >> >>> run_on_cpu(cs, s390_do_cpu_load_normal, RUN_ON_CPU_NULL); break; case S390_RESET_LOAD_NORMAL: /* Subcode 1*/ >>> >>> missing blank before */ (introduced in a previous patch) >>> >>> +subsystem_reset(); +s390_pv_prepare_reset(env); CPU_FOREACH(t) { if (t == cs) { continue; } run_on_cpu(t, s390_do_cpu_reset, RUN_ON_CPU_NULL); } -subsystem_reset(); run_on_cpu(cs, s390_do_cpu_initial_reset, RUN_ON_CPU_NULL); run_on_cpu(cs, s390_do_cpu_load_normal, RUN_ON_CPU_NULL); break; case S390_RESET_EXTERNAL: >>> >>> Annotate this with the subcode as well? (in the patch introducing it) >> >> I think this has no diag 308 subcode and is triggered by qemu > > -ENOCOFFE > > But even more reason to annotate this with the fact that this is > triggered by QEMU :) > You're too late with that idea :) I just split out the reordering and the annotation into a new commit: https://github.com/frankjaa/qemu/commit/8c53d5c8a6bbcc53496c7a2877c7cbffc435b708 signature.asc Description: OpenPGP digital signature
Re: [PATCH 06/15] s390x: protvirt: Support unpack facility
On 11/21/19 12:27 PM, David Hildenbrand wrote: > On 20.11.19 12:43, Janosch Frank wrote: >> @@ -357,6 +353,35 @@ static void s390_machine_reset(MachineState *machine) >> run_on_cpu(cs, s390_do_cpu_initial_reset, RUN_ON_CPU_NULL); >> run_on_cpu(cs, s390_do_cpu_load_normal, RUN_ON_CPU_NULL); >> break; >> +case S390_RESET_EXTERNAL: >> +case S390_RESET_REIPL: /* Subcode 4 */ >> +qemu_devices_reset(); >> +s390_crypto_reset(); >> +/* configure and start the ipl CPU only */ >> +run_on_cpu(cs, s390_do_cpu_ipl, RUN_ON_CPU_NULL); >> +break; > > Is there a way to modify this patch to not change unrelated code that > heavily? Makes it harder to review. https://github.com/frankjaa/qemu/commit/8c53d5c8a6bbcc53496c7a2877c7cbffc435b708 And please trim your emails. > >> +case S390_RESET_PV: /* Subcode 10 */ >> +subsystem_reset(); >> +s390_crypto_reset(); >> + >> +CPU_FOREACH(t) { >> +run_on_cpu(t, s390_do_cpu_full_reset, RUN_ON_CPU_NULL); >> +} >> + >> +/* Create SE VM */ >> +s390_pv_vm_create(); >> +CPU_FOREACH(t) { >> +s390_pv_vcpu_create(t); >> +} >> + >> +/* Set SE header and unpack */ >> +s390_ipl_prepare_pv_header(); >> +/* Decrypt image */ >> +s390_ipl_pv_unpack(); >> +/* Verify integrity */ >> +s390_pv_verify(); >> +s390_cpu_set_state(S390_CPU_STATE_OPERATING, cpu); >> +break; >> default: >> g_assert_not_reached(); >> } >> diff --git a/target/s390x/cpu_features_def.inc.h >> b/target/s390x/cpu_features_def.inc.h >> index 31dff0d84e..60db28351d 100644 >> --- a/target/s390x/cpu_features_def.inc.h >> +++ b/target/s390x/cpu_features_def.inc.h >> @@ -107,6 +107,7 @@ DEF_FEAT(DEFLATE_BASE, "deflate-base", STFL, 151, >> "Deflate-conversion facility ( >> DEF_FEAT(VECTOR_PACKED_DECIMAL_ENH, "vxpdeh", STFL, 152, >> "Vector-Packed-Decimal-Enhancement Facility") >> DEF_FEAT(MSA_EXT_9, "msa9-base", STFL, 155, >> "Message-security-assist-extension-9 facility (excluding subfunctions)") >> DEF_FEAT(ETOKEN, "etoken", STFL, 156, "Etoken facility") >> +DEF_FEAT(UNPACK, "unpack", STFL, 161, "Unpack facility") >> >> /* Features exposed via SCLP SCCB Byte 80 - 98 (bit numbers relative to >> byte-80) */ >> DEF_FEAT(SIE_GSLS, "gsls", SCLP_CONF_CHAR, 40, "SIE: >> Guest-storage-limit-suppression facility") >> > > signature.asc Description: OpenPGP digital signature
Re: [PATCH 09/15] s390x: protvirt: SCLP interpretation
On 11/21/19 3:11 PM, Cornelia Huck wrote: > On Wed, 20 Nov 2019 06:43:28 -0500 > Janosch Frank wrote: > >> SCLP for a protected guest is done over the SIDAD, so we need to use >> the s390_cpu_virt_mem_* functions to access the SIDAD instead of guest >> memory when reading/writing SCBs. >> >> To not confuse the sclp emulation, we set 0x42000 as the address, but >> ignore it for reading/writing the SCCB. >> >> Signed-off-by: Janosch Frank >> --- >> hw/s390x/sclp.c | 16 >> include/hw/s390x/sclp.h | 2 ++ >> target/s390x/kvm.c | 8 +++- >> 3 files changed, 25 insertions(+), 1 deletion(-) >> >> diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c >> index f57ce7b739..00d08adc7f 100644 >> --- a/hw/s390x/sclp.c >> +++ b/hw/s390x/sclp.c >> @@ -193,6 +193,22 @@ static void sclp_execute(SCLPDevice *sclp, SCCB *sccb, >> uint32_t code) >> } >> } >> >> +int sclp_service_call_protected(CPUS390XState *env, uint64_t sccb, >> +uint32_t code) >> +{ >> +SCLPDevice *sclp = get_sclp_device(); >> +SCLPDeviceClass *sclp_c = SCLP_GET_CLASS(sclp); >> +SCCB work_sccb; >> +hwaddr sccb_len = sizeof(SCCB); >> + >> +s390_cpu_virt_mem_read(env_archcpu(env), 0, 0, &work_sccb, sccb_len); >> +sclp_c->execute(sclp, &work_sccb, code); >> +s390_cpu_virt_mem_write(env_archcpu(env), 0, 0, &work_sccb, >> +be16_to_cpu(work_sccb.h.length)); >> +sclp_c->service_interrupt(sclp, sccb); >> +return 0; >> +} >> + >> int sclp_service_call(CPUS390XState *env, uint64_t sccb, uint32_t code) >> { >> SCLPDevice *sclp = get_sclp_device(); >> diff --git a/include/hw/s390x/sclp.h b/include/hw/s390x/sclp.h >> index c54413b78c..c0a3faa37d 100644 >> --- a/include/hw/s390x/sclp.h >> +++ b/include/hw/s390x/sclp.h >> @@ -217,5 +217,7 @@ void s390_sclp_init(void); >> void sclp_service_interrupt(uint32_t sccb); >> void raise_irq_cpu_hotplug(void); >> int sclp_service_call(CPUS390XState *env, uint64_t sccb, uint32_t code); >> +int sclp_service_call_protected(CPUS390XState *env, uint64_t sccb, >> +uint32_t code); >> >> #endif >> diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c >> index 58251c0229..0f2458b553 100644 >> --- a/target/s390x/kvm.c >> +++ b/target/s390x/kvm.c >> @@ -1172,7 +1172,13 @@ static int kvm_sclp_service_call(S390CPU *cpu, struct >> kvm_run *run, >> sccb = env->regs[ipbh0 & 0xf]; >> code = env->regs[(ipbh0 & 0xf0) >> 4]; >> >> -r = sclp_service_call(env, sccb, code); >> +if (run->s390_sieic.icptcode == ICPT_PV_INSTR || >> +run->s390_sieic.icptcode == ICPT_PV_INSTR_NOT) { >> +r = sclp_service_call_protected(env, 0x42000, code); > > I fear that confuses the reader instead of the emulation :) > > Especially as you end up passing that value to > sclp_c->service_interrupt()... Pierre has some more opinions on that, so I'll let him present his planned changes to this patch :) > >> +} else { >> +r = sclp_service_call(env, sccb, code); >> +} >> + >> if (r < 0) { >> kvm_s390_program_interrupt(cpu, -r); >> } else { > signature.asc Description: OpenPGP digital signature
Re: [PATCH 06/15] s390x: protvirt: Support unpack facility
On 21.11.19 15:25, Janosch Frank wrote: On 11/21/19 12:27 PM, David Hildenbrand wrote: On 20.11.19 12:43, Janosch Frank wrote: @@ -357,6 +353,35 @@ static void s390_machine_reset(MachineState *machine) run_on_cpu(cs, s390_do_cpu_initial_reset, RUN_ON_CPU_NULL); run_on_cpu(cs, s390_do_cpu_load_normal, RUN_ON_CPU_NULL); break; +case S390_RESET_EXTERNAL: +case S390_RESET_REIPL: /* Subcode 4 */ +qemu_devices_reset(); +s390_crypto_reset(); +/* configure and start the ipl CPU only */ +run_on_cpu(cs, s390_do_cpu_ipl, RUN_ON_CPU_NULL); +break; Is there a way to modify this patch to not change unrelated code that heavily? Makes it harder to review. https://github.com/frankjaa/qemu/commit/8c53d5c8a6bbcc53496c7a2877c7cbffc435b708 And please trim your emails. If you use Thunderbird I suggest QuoteCollapse ... because nobody got time for that ;) -- Thanks, David / dhildenb
Re: [PATCH 08/15] s390x: protvirt: KVM intercept changes
On 11/21/19 3:07 PM, Cornelia Huck wrote: > On Wed, 20 Nov 2019 06:43:27 -0500 > Janosch Frank wrote: > >> Secure guests no longer intercept with code 4 for an instruction >> interception. Instead they have codes 104 and 108 for secure >> instruction interception and secure instruction notification >> respectively. >> >> The 104 mirrors the 4, but the 108 is a notification, that something >> happened and the hypervisor might need to adjust its tracking data to >> that fact. An example for that is the set prefix notification >> interception, where KVM only reads the new prefix, but does not update >> the prefix in the state description. >> >> Signed-off-by: Janosch Frank >> --- >> target/s390x/kvm.c | 6 ++ >> 1 file changed, 6 insertions(+) >> >> diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c >> index 418154ccfe..58251c0229 100644 >> --- a/target/s390x/kvm.c >> +++ b/target/s390x/kvm.c >> @@ -115,6 +115,8 @@ >> #define ICPT_CPU_STOP 0x28 >> #define ICPT_OPEREXC0x2c >> #define ICPT_IO 0x40 >> +#define ICPT_PV_INSTR 0x68 >> +#define ICPT_PV_INSTR_NOT 0x6c > > _NOTIF ? Yeah, forgot about that > >> >> #define NR_LOCAL_IRQS 32 >> /* >> @@ -151,6 +153,7 @@ static int cap_s390_irq; >> static int cap_ri; >> static int cap_gs; >> static int cap_hpage_1m; >> +static int cap_protvirt; >> >> static int active_cmma; >> >> @@ -336,6 +339,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) >> cap_async_pf = kvm_check_extension(s, KVM_CAP_ASYNC_PF); >> cap_mem_op = kvm_check_extension(s, KVM_CAP_S390_MEM_OP); >> cap_s390_irq = kvm_check_extension(s, KVM_CAP_S390_INJECT_IRQ); >> +cap_protvirt = kvm_check_extension(s, KVM_CAP_S390_PROTECTED); > > You don't seem to do anything with this yet? No, I'm still a bit in the dark about how we want to tie protvirt into qemu. > >> >> if (!kvm_check_extension(s, KVM_CAP_S390_GMAP) >> || !kvm_check_extension(s, KVM_CAP_S390_COW)) { >> @@ -1664,6 +1668,8 @@ static int handle_intercept(S390CPU *cpu) >> (long)cs->kvm_run->psw_addr); >> switch (icpt_code) { >> case ICPT_INSTRUCTION: >> +case ICPT_PV_INSTR: >> +case ICPT_PV_INSTR_NOT: >> r = handle_instruction(cpu, run); > > Doesn't handle_instruction() want to know whether it got a request for > emulation vs a notification? Currently not, the sclp patch looks at the vcpu run icptcode to figure out what's going on. > >> break; >> case ICPT_PROGRAM: > > signature.asc Description: OpenPGP digital signature
Re: [PATCH 06/15] s390x: protvirt: Support unpack facility
On 21.11.19 15:28, David Hildenbrand wrote: >> And please trim your emails. >> > > If you use Thunderbird I suggest QuoteCollapse ... because nobody got time > for that ;) neat.
Re: [PATCH] RISC-V: virt: This is a "sifive,test1" test finisher
On Sun, 10 Nov 2019 13:10:33 PST (-0800), da...@gibson.dropbear.id.au wrote: On Fri, Nov 08, 2019 at 10:13:16AM -0800, Palmer Dabbelt wrote: On Fri, 08 Nov 2019 10:04:47 PST (-0800), Peter Maydell wrote: > On Fri, 8 Nov 2019 at 17:15, Alistair Francis wrote: > > > > On Fri, Nov 8, 2019 at 9:05 AM Palmer Dabbelt wrote: > > > > > > The test finisher implements the reset command, which means it's a > > > "sifive,test1" device. This is a backwards compatible change, so it's > > > also a "sifive,test0" device. I copied the odd idiom for adding a > > > two-string compatible field from the ARM virt board. > > > > > > Fixes: 9a2551ed6f ("riscv: sifive_test: Add reset functionality") > > > Signed-off-by: Palmer Dabbelt > > > Signed-off-by: Palmer Dabbelt > > > --- > > > hw/riscv/virt.c | 5 - > > > 1 file changed, 4 insertions(+), 1 deletion(-) > > > > > > diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c > > > index 23f340df19..74f2dce81c 100644 > > > --- a/hw/riscv/virt.c > > > +++ b/hw/riscv/virt.c > > > @@ -359,7 +359,10 @@ static void create_fdt(RISCVVirtState *s, const struct MemmapEntry *memmap, > > > nodename = g_strdup_printf("/test@%lx", > > > (long)memmap[VIRT_TEST].base); > > > qemu_fdt_add_subnode(fdt, nodename); > > > -qemu_fdt_setprop_string(fdt, nodename, "compatible", "sifive,test0"); > > > +{ > > > +const char compat[] = "sifive,test1\0sifive,test0"; > > > > Does this really work? Why not use qemu_fdt_setprop_cells()? > > > > Alistair > > > > > +qemu_fdt_setprop(fdt, nodename, "compatible", compat, sizeof(compat)); > > > +} > > qemu_fdt_setprop_cells() is for "set this property to > contain this list of 32-bit integers" (and it does a byteswap > of each 32-bit value from host to BE). That's not what > you want for a string (or a string list, which is what > we have here). > > Cc'ing David Gibson who's our device tree expert to see if there's > a nicer way to write this. Oddly, given that it's used in the > ubiquitous 'compatible' prop, the dtc Documentation/manual.txt > doesn't say anything about properties being able to be > 'string lists', only 'strings', '32 bit numbers', 'lists of > 32-bit numbers' and 'byte sequences'. You have to dig through > the header file comments to deduce that a string list is > represented by a string with embedded NULs separating > each list item. I copied this from hw/arm/virt.c, but messed up. There they use const char compat[] = "arm,armv8-timer\0arm,armv7-timer"; qemu_fdt_setprop(vms->fdt, "/timer", "compatible", compat, sizeof(compat)); I'm not sure what you're saying is messed up. AFAICT, this matches the code you have above, and both should be correct. Sorry, I must have been hallucinating. For some reason I though I wrote qemu_fdt_setprop_string(... compat). I'd like to take this for 4.2 if possible, but I don't think I have a reviewed-by (I just got my email set up on my Google computer, so it might be messy for a bit). I'm happy to submit the cleaner valist version after 4.2, as per Peter's suggestion. Alistair: are you OK with this? I'll send a v2, but I'd be happy to add some sort of setprop_stringlist function. Maybe we just indicate the length with two '\0's? IIRC that's how other similar-looking data structures are encoded. -- David Gibson| I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson
Re: [PATCH 06/15] s390x: protvirt: Support unpack facility
On 11/21/19 3:31 PM, Christian Borntraeger wrote: > > > On 21.11.19 15:28, David Hildenbrand wrote: >>> And please trim your emails. >>> >> >> If you use Thunderbird I suggest QuoteCollapse ... because nobody got time >> for that ;) > > neat. > Yeah, seems like I'm already too old-school for fancy addons at my young age ;-) signature.asc Description: OpenPGP digital signature
Re: [PATCH v2 2/6] block: truncate: Don't make backing file data visible
Am 21.11.2019 um 13:21 hat Max Reitz geschrieben: > On 21.11.19 12:34, Kevin Wolf wrote: > > Am 21.11.2019 um 09:59 hat Max Reitz geschrieben: > >> On 20.11.19 19:44, Kevin Wolf wrote: > >>> When extending the size of an image that has a backing file larger than > >>> its old size, make sure that the backing file data doesn't become > >>> visible in the guest, but the added area is properly zeroed out. > >>> > >>> Consider the following scenario where the overlay is shorter than its > >>> backing file: > >>> > >>> base.qcow2: > >>> overlay.qcow2: > >>> > >>> When resizing (extending) overlay.qcow2, the new blocks should not stay > >>> unallocated and make the additional As from base.qcow2 visible like > >>> before this patch, but zeros should be read. > >>> > >>> A similar case happens with the various variants of a commit job when an > >>> intermediate file is short (- for unallocated): > >>> > >>> base.qcow2: A-A- > >>> mid.qcow2: BB-B > >>> top.qcow2: C--C--C- > >>> > >>> After commit top.qcow2 to mid.qcow2, the following happens: > >>> > >>> mid.qcow2: CB-C00C0 (correct result) > >>> mid.qcow2: CB-C--C- (before this fix) > >>> > >>> Without the fix, blocks that previously read as zeros on top.qcow2 > >>> suddenly turn into A. > >>> > >>> Signed-off-by: Kevin Wolf > >>> --- > >>> block/io.c | 32 > >>> 1 file changed, 32 insertions(+) > >> > >> Zeroing the intersection may take some time. So is it right for QMP’s > >> block_resize to do this, seeing it is a synchronous operation? > > > > What else would be right? Returning an error? > > Going through a deprecation cycle. And keeping the buggy behaviour for two more releases? > > Common cases (raw and qcow2 v3 without external data files) are quick > > anyway. > > Well, but quick enough for a fully blocking operation? For raw definitely yes, because raw doesn't have backing files, so the code will never run. For qcow2, block_resize can already block for a relatively long time while metadata tables are resized, clusters are discarded etc. I just don't really see the difference in quality between that and allocating some zero clusters in a corner case like having a short overlay. Would you feel more comfortable if we set BDRV_REQ_NO_FALLBACK and return an error if we can't zero out the area? We would have to advertise that flag in bs->supported_zero_flags for qcow2 then (but probably we should do that anyway?) > >> As far as I can tell, jobs actually have the same problem. I don’t > >> think mirror or commit have a pause point before truncating, so they > >> still block the monitor there, don’t they? > > > > Do you really need a pause point? They call bdrv_co_truncate() from > > inside the job coroutine, so it will yield. I would expect that this > > is enough. > > OK then. > > > But in fact, all jobs have a pause point before even calling .run(), so > > even if that made a difference, it should still be fine. > > Good. > > But I believe this is still a problem for block_resize. I don’t see why > this needs to be fixed in 4.2-rc3. What’s the problem with going > through a proper deprecation cycle other than the fact that we can’t > start it in 4.2 because we don’t have a resize job yet? That the behaviour is wrong. For commit it's an image corruptor. For resize, I'll admit that it's just wrong behaviour that is probably harmless in most cases, but it's still wrong behaviour. It would be a corruptor in the same way as commit if it allowed resizing intermediate nodes, but I _think_ the old-style op blockers still forbid this. We'd have to double-check this if we leave things broken for block_resize. Anyway, so are you suggesting adding another parameter to bdrv_co_truncate() that enables wrong, but quicker semantics, and that would only be used by block_resize? Kevin signature.asc Description: PGP signature
Re: [PATCH 03/15] s390x: protvirt: Add diag308 subcodes 8 - 10
On 20/11/2019 12.43, Janosch Frank wrote: > For diag308 subcodes 8 - 10 we have a new ipib of type 5. The ipib > holds the address and length of the secure execution header, as well > as a list of guest components. > > Each component is a block of memory, for example kernel or initrd, > which needs to be decrypted by the Ultravisor in order to run a > protected VM. The secure execution header instructs the Ultravisor on > how to handle the protected VM and its components. > > Subcodes 8 and 9 are similiar to 5 and 6 and subcode 10 will finally > start the protected guest. > > Subcodes 8-10 are not valid in protected mode, we have to do a subcode > 3 and then the 8 and 10 combination for a protected reboot. > > Signed-off-by: Janosch Frank > --- [...] > diff --git a/hw/s390x/ipl.h b/hw/s390x/ipl.h > index d4813105db..7b8a493509 100644 > --- a/hw/s390x/ipl.h > +++ b/hw/s390x/ipl.h > @@ -15,6 +15,24 @@ > #include "cpu.h" > #include "hw/qdev-core.h" > > +struct IPLBlockPVComp { > +uint64_t tweak_pref; > +uint64_t addr; > +uint64_t size; > +} QEMU_PACKED; > +typedef struct IPLBlockPVComp IPLBlockPVComp; > + > +struct IPLBlockPV { > +uint8_t reserved[84]; > +uint8_t reserved67[3]; What does the "67" mean here? > +uint8_t version; > +uint32_t num_comp; > +uint64_t pv_header_addr; > +uint64_t pv_header_len; > +struct IPLBlockPVComp components[]; > +} QEMU_PACKED; > +typedef struct IPLBlockPV IPLBlockPV; Given the fact that we had quite some headaches with QEMU_PACKED structs in the past already, and the structs seem to be naturally aligned ... what about dropping the QEMU_PACKED here and using QEMU_BUILD_BUG() to assert that the struct has the correct size? [...] > @@ -185,4 +211,11 @@ static inline bool iplb_valid_fcp(IplParameterBlock > *iplb) > iplb->pbt == S390_IPL_TYPE_FCP; > } > > +static inline bool iplb_valid_se(IplParameterBlock *iplb) > +{ > +return be32_to_cpu(iplb->len) >= S390_IPLB_MIN_PV_LEN && > + iplb->pbt == S390_IPL_TYPE_PV; > +} > + > + Drop one empty line? > #endif > diff --git a/target/s390x/diag.c b/target/s390x/diag.c > index 067c667ba7..32049bb4ee 100644 > --- a/target/s390x/diag.c > +++ b/target/s390x/diag.c > @@ -52,6 +52,8 @@ int handle_diag_288(CPUS390XState *env, uint64_t r1, > uint64_t r3) > #define DIAG_308_RC_OK 0x0001 > #define DIAG_308_RC_NO_CONF 0x0102 > #define DIAG_308_RC_INVALID 0x0402 > +#define DIAG_308_RC_NO_PV_CONF 0x0a02 > +#define DIAG_308_RC_INV_FOR_PV 0x0b02 > > #define DIAG308_RES_MOD_CLR 0 > #define DIAG308_RES_LOAD_NORM1 > @@ -59,6 +61,9 @@ int handle_diag_288(CPUS390XState *env, uint64_t r1, > uint64_t r3) > #define DIAG308_LOAD_NORMAL_DUMP 4 > #define DIAG308_SET 5 > #define DIAG308_STORE6 > +#define DIAG308_PV_SET 8 > +#define DIAG308_PV_STORE 9 > +#define DIAG308_PV_START 10 > > static int diag308_parm_check(CPUS390XState *env, uint64_t r1, uint64_t addr, >uintptr_t ra, bool write) > @@ -105,6 +110,7 @@ void handle_diag_308(CPUS390XState *env, uint64_t r1, > uint64_t r3, uintptr_t ra) > s390_ipl_reset_request(cs, S390_RESET_REIPL); > break; > case DIAG308_SET: > +case DIAG308_PV_SET: /* Set SE parms */ > if (diag308_parm_check(env, r1, addr, ra, false)) { > return; > } > @@ -117,7 +123,8 @@ void handle_diag_308(CPUS390XState *env, uint64_t r1, > uint64_t r3, uintptr_t ra) > > cpu_physical_memory_read(addr, iplb, be32_to_cpu(iplb->len)); > > -if (!iplb_valid_ccw(iplb) && !iplb_valid_fcp(iplb)) { > +if (!iplb_valid_ccw(iplb) && !iplb_valid_fcp(iplb) && > +!(iplb_valid_se(iplb) && s390_ipl_pv_check_comp(iplb) >= 0)) { > env->regs[r1 + 1] = DIAG_308_RC_INVALID; > goto out; > } > @@ -128,10 +135,15 @@ out: > g_free(iplb); > return; > case DIAG308_STORE: > +case DIAG308_PV_STORE: /* Get SE parms */ > if (diag308_parm_check(env, r1, addr, ra, true)) { > return; > } > -iplb = s390_ipl_get_iplb(); > +if (subcode == DIAG308_PV_STORE) { > +iplb = s390_ipl_get_iplb_secure(); > +} else { > +iplb = s390_ipl_get_iplb(); > +} > if (iplb) { > cpu_physical_memory_write(addr, iplb, be32_to_cpu(iplb->len)); > env->regs[r1 + 1] = DIAG_308_RC_OK; > @@ -139,6 +151,16 @@ out: > env->regs[r1 + 1] = DIAG_308_RC_NO_CONF; > } > return; > +break; To return or to break, that's the question... ... please choose one of the two. > +case DIAG308_PV_START: /* SE start */ > +iplb = s390_ipl_get_iplb_secure(); > +if (!iplb_valid_se(iplb)) { > +env->regs[r1 +
Re: [PATCH 05/15] s390x: protvirt: Sync PV state
On 20/11/2019 12.43, Janosch Frank wrote: > We do not always have the SIE intercept code handy at each place where > we do emulation. Unfortunately emulation for secure guests often > differ slightly from normal emulation and we need to make decisions > based on the protected state of the VCPU. > > Let's sync the protected state and make it available. > > Signed-off-by: Janosch Frank > --- > linux-headers/asm-s390/kvm.h | 1 + > target/s390x/cpu.h | 1 + > target/s390x/kvm.c | 4 > 3 files changed, 6 insertions(+) > > diff --git a/linux-headers/asm-s390/kvm.h b/linux-headers/asm-s390/kvm.h > index 41976d33f0..7c46cf6078 100644 > --- a/linux-headers/asm-s390/kvm.h > +++ b/linux-headers/asm-s390/kvm.h > @@ -231,6 +231,7 @@ struct kvm_guest_debug_arch { > #define KVM_SYNC_GSCB (1UL << 9) > #define KVM_SYNC_BPBC (1UL << 10) > #define KVM_SYNC_ETOKEN (1UL << 11) > +#define KVM_SYNC_PV (1UL << 12) > /* length and alignment of the sdnx as a power of two */ > #define SDNXC 8 > #define SDNXL (1UL << SDNXC) > diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h > index 17460ed7b3..a787221772 100644 > --- a/target/s390x/cpu.h > +++ b/target/s390x/cpu.h > @@ -116,6 +116,7 @@ struct CPUS390XState { > > /* Fields up to this point are cleared by a CPU reset */ > struct {} end_reset_fields; > +bool pv; /* protected virtualization */ > > #if !defined(CONFIG_USER_ONLY) > uint32_t core_id; /* PoP "CPU address", same as cpu_index */ > diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c > index c24c869e77..418154ccfe 100644 > --- a/target/s390x/kvm.c > +++ b/target/s390x/kvm.c > @@ -676,6 +676,10 @@ int kvm_arch_get_registers(CPUState *cs) > env->etoken_extension = cs->kvm_run->s.regs.etoken_extension; > } > > +if (can_sync_regs(cs, KVM_SYNC_PV)) { > +env->pv = !!cs->kvm_run->s.regs.pv; > +} Does this really need to be sync'ed each time during kvm_arch_get_registers()? I mean, this is not a state that continuously changes ... so when I guest enters PV mode, I assume that we have to do some stuff in QEMU anyway, so we could set the "pv = true" in that case. And during reset, we could clear it again. Or do I miss something? Thomas
Re: [PATCH] Fix incorrect int->float conversions caught by clang -Wimplicit-int-float-conversion
Richard Henderson writes: > On 11/20/19 6:30 PM, Fangrui Song wrote: >> On 2019-11-20, Juan Quintela wrote: >>> Markus Armbruster wrote: Fangrui Song writes: [...] > diff --git a/util/cutils.c b/util/cutils.c > index fd591cadf0..2b4484c015 100644 > --- a/util/cutils.c > +++ b/util/cutils.c > @@ -239,10 +239,10 @@ static int do_strtosz(const char *nptr, const char > **end, > goto out; > } > /* > - * Values >= 0xfc00 overflow uint64_t after their trip > + * Values > nextafter(0x1p64, 0) overflow uint64_t after their trip > * through double (53 bits of precision). > */ > - if ((val * mul >= 0xfc00) || val < 0) { > + if ((val * mul > nextafter(0x1p64, 0)) || val < 0) { > retval = -ERANGE; > goto out; > } >>> >>> This comment was really bad (it says the same that the code). >>> On the other hand, I can *kind of* understand what does 0x>> f's here>. >>> >>> But I am at a complete loss about what value is: >>> >>> nextafter(0x1p64, 0). >>> >>> Can we put what value is that instead? >> >> It is a C99 hexadecimal floating-point literal. >> 0x1p64 represents hex fraction 1.0 scaled by 2**64, that is 2**64. >> >> We can write this as `val * mul > 0xf800p0`, but I feel that >> counting the number of f's is error-prone and is not fun. >> >> (We cannot use val * mul >= 0x1p64. >> If FLT_EVAL_METHOD == 2, the intermediate computation val * mul will be >> performed at long double precision, val * mul may not by representable >> by a double and will overflow as (double)0x1p64.) > > I agree about not spelling out the f's, or the 0x800 at the end. That's > something that the compiler can do for us, resolving this standard library > function at compile-time. > > We just need a better comment. Perhaps: > > /* > * Values near UINT64_MAX overflow to 2**64 when converting > * to double precision. Compare against the maximum representable > * double precision value below 2**64, computed as "the next value > * after 2**64 (0x1p64) in the direction of 0". > */ Yes, please.
Re: [PATCH v2 1/1] ide: check DMA transfer size in ide_dma_cb() to prevent qemu DoS from quests
Am 14.11.2019 um 18:25 hat Alexander Popov geschrieben: > The commit a718978ed58a from July 2015 introduced the assertion which > implies that the size of successful DMA transfers handled in ide_dma_cb() > should be multiple of 512 (the size of a sector). But guest systems can > initiate DMA transfers that don't fit this requirement. > > PoC for Linux that uses SCSI_IOCTL_SEND_COMMAND to perform such an ATA > command and crash qemu: > > #include > #include > #include > #include > #include > #include > #include > #include > #include > #include > > #define CMD_SIZE 2048 > > struct scsi_ioctl_cmd_6 { > unsigned int inlen; > unsigned int outlen; > unsigned char cmd[6]; > unsigned char data[]; > }; > > int main(void) > { > intptr_t fd = 0; > struct scsi_ioctl_cmd_6 *cmd = NULL; > > cmd = malloc(CMD_SIZE); > if (!cmd) { > perror("[-] malloc"); > return 1; > } > > memset(cmd, 0, CMD_SIZE); > cmd->inlen = 1337; > cmd->cmd[0] = READ_6; > > fd = open("/dev/sg0", O_RDONLY); > if (fd == -1) { > perror("[-] opening sg"); > return 1; > } > > printf("[+] sg0 is opened\n"); > > printf("[.] qemu should break here:\n"); > fflush(stdout); > ioctl(fd, SCSI_IOCTL_SEND_COMMAND, cmd); > printf("[-] qemu didn't break\n"); > > free(cmd); > > return 1; > } It would be nicer to turn the reproducer into a test case for tests/ide-test.c. > For fixing that let's check the number of bytes prepared for the transfer > by the prepare_buf() handler. If it is not a multiple of 512 then end > the DMA transfer with an error. > > That also fixes the I/O stall in guests after a DMA transfer request > for less than the size of a sector. > > Signed-off-by: Alexander Popov This patch makes ide-test fail: TESTcheck-qtest-x86_64: tests/ide-test ** ERROR:tests/ide-test.c:469:test_bmdma_short_prdt: assertion failed (status == 0): (0x0004 == 0x) ERROR - Bail out! ERROR:tests/ide-test.c:469:test_bmdma_short_prdt: assertion failed (status == 0): (0x0004 == 0x) > diff --git a/hw/ide/core.c b/hw/ide/core.c > index 754ff4dc34..85aac614f0 100644 > --- a/hw/ide/core.c > +++ b/hw/ide/core.c > @@ -849,6 +849,7 @@ static void ide_dma_cb(void *opaque, int ret) > int64_t sector_num; > uint64_t offset; > bool stay_active = false; > +int32_t prepared = 0; > > if (ret == -EINVAL) { > ide_dma_error(s); > @@ -892,12 +893,10 @@ static void ide_dma_cb(void *opaque, int ret) > n = s->nsector; > s->io_buffer_index = 0; > s->io_buffer_size = n * 512; > -if (s->bus->dma->ops->prepare_buf(s->bus->dma, s->io_buffer_size) < 512) > { > -/* The PRDs were too short. Reset the Active bit, but don't raise an > - * interrupt. */ > -s->status = READY_STAT | SEEK_STAT; > -dma_buf_commit(s, 0); > -goto eot; > +prepared = s->bus->dma->ops->prepare_buf(s->bus->dma, s->io_buffer_size); > +if (prepared % 512) { > +ide_dma_error(s); Which I assume is because you changed the error mode here compared to the old version. I'm not sure offhand what the correct behaviour is for non-aligned values > 512. I think we actually have two cases here: Either a short or a long PRD. The commit message should explain this with spec references and a test case should be added for both cases. Kevin
Re: [Qemu-devel] [PATCH v4 03/14] qapi: Introduce default values for struct members
Max Reitz writes: > With this change, it is possible to give default values for struct > members, as follows: > > What you had to do so far: > > # @member: Some description, defaults to 42. > { 'struct': 'Foo', > 'data': { '*member': 'int' } } > > What you can do now: > > { 'struct': 'Foo', > 'data': { '*member': { 'type': 'int', 'default': 42 } } The '*' is redundant in this form. Can anyone think of reasons for keeping it anyway? Against? > On the C side, this change would remove Foo.has_member, because > Foo.member is always valid now. The input visitor deals with setting > it. (Naturally, this means that such defaults are useful only for input > parameters.) > > At least three things are left unimplemented: > > First, support for alternate data types. This is because supporting > them would mean having to allocate the object in the input visitor, and > then potentially going through multiple levels of nested types. In any > case, it would have been difficult and I do not think there is need for > such support at this point. > > Second, support for null. The most important reason for this is that > introspection already uses "'default': null" for "no default, but this > field is optional". The second reason is that without support for > alternate data types, there is not really a point in supporting null. > > Third, full support for default lists. This has a similar reason to the > lack of support for alternate data types: Allocating a default list is > not trivial -- unless the list is empty, which is exactly what we have > support for. > > Signed-off-by: Max Reitz
Re: [PATCH 08/15] s390x: protvirt: KVM intercept changes
On 20/11/2019 12.43, Janosch Frank wrote: > Secure guests no longer intercept with code 4 for an instruction > interception. Instead they have codes 104 and 108 for secure > instruction interception and secure instruction notification > respectively. > > The 104 mirrors the 4, but the 108 is a notification, that something > happened and the hypervisor might need to adjust its tracking data to > that fact. An example for that is the set prefix notification > interception, where KVM only reads the new prefix, but does not update > the prefix in the state description. > > Signed-off-by: Janosch Frank > --- > target/s390x/kvm.c | 6 ++ > 1 file changed, 6 insertions(+) > > diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c > index 418154ccfe..58251c0229 100644 > --- a/target/s390x/kvm.c > +++ b/target/s390x/kvm.c > @@ -115,6 +115,8 @@ > #define ICPT_CPU_STOP 0x28 > #define ICPT_OPEREXC0x2c > #define ICPT_IO 0x40 > +#define ICPT_PV_INSTR 0x68 > +#define ICPT_PV_INSTR_NOT 0x6c > > #define NR_LOCAL_IRQS 32 > /* > @@ -151,6 +153,7 @@ static int cap_s390_irq; > static int cap_ri; > static int cap_gs; > static int cap_hpage_1m; > +static int cap_protvirt; > > static int active_cmma; > > @@ -336,6 +339,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) > cap_async_pf = kvm_check_extension(s, KVM_CAP_ASYNC_PF); > cap_mem_op = kvm_check_extension(s, KVM_CAP_S390_MEM_OP); > cap_s390_irq = kvm_check_extension(s, KVM_CAP_S390_INJECT_IRQ); > +cap_protvirt = kvm_check_extension(s, KVM_CAP_S390_PROTECTED); > > if (!kvm_check_extension(s, KVM_CAP_S390_GMAP) > || !kvm_check_extension(s, KVM_CAP_S390_COW)) { > @@ -1664,6 +1668,8 @@ static int handle_intercept(S390CPU *cpu) > (long)cs->kvm_run->psw_addr); > switch (icpt_code) { > case ICPT_INSTRUCTION: > +case ICPT_PV_INSTR: > +case ICPT_PV_INSTR_NOT: > r = handle_instruction(cpu, run); Even if this works by default, my gut feeling tells me that it would be safer and cleaner to have a separate handler for this... Otherwise we might get surprising results if future machine generations intercept/notify for more or different instructions, I guess? However, it's just a gut feeling ... I really don't have much experience with this PV stuff yet ... what do the others here think? Thomas
Re: [Qemu-devel] [PATCH v4 04/14] qapi: Allow optional discriminators
Max Reitz writes: > Optional discriminators are fine, as long as there is a default value. > > Signed-off-by: Max Reitz > --- > scripts/qapi/common.py | 14 -- > 1 file changed, 12 insertions(+), 2 deletions(-) > > diff --git a/scripts/qapi/common.py b/scripts/qapi/common.py > index 8c57d0c67a..203623795b 100644 > --- a/scripts/qapi/common.py > +++ b/scripts/qapi/common.py > @@ -1052,11 +1052,21 @@ def check_union(expr, info): > base_members = find_base_members(base) > assert base_members is not None > > -# The value of member 'discriminator' must name a non-optional > -# member of the base struct. > +# The value of member 'discriminator' must name a member of > +# the base struct. (Optional members are allowed, but the > +# discriminator name must not start with '*', so keep > +# allow_optional=False.) > check_name(info, "Discriminator of flat union '%s'" % name, > discriminator) > + > discriminator_value = base_members.get(discriminator) > +if not discriminator_value: > +discriminator_value = base_members.get('*' + discriminator) > +if discriminator_value and 'default' not in discriminator_value: > +raise QAPISemError(info, > +"Optional discriminator '%s' has no default value" % > +discriminator) > + > if not discriminator_value: > raise QAPISemError(info, > "Discriminator '%s' is not a member of base " Needs test coverage and doc update. Oh, looks like later patches provide. Please consider squashing. Doc updates and tests often make code changes easier to understand.
Re: [PATCH v4 00/14] block: Try to create well-typed json:{} filenames
Markus Armbruster writes: > Max Reitz writes: > >> On 13.09.19 13:49, Max Reitz wrote: >>> Another gentle ping. >> >> And another. > > Conflicts with the refactoring merged in commit 69717d0f890. Please > accept my apologies for the inconvenience caused by the excessive delay. > > I'll try to review anyway. I reviewed the proposed changes to the QAPI schema language, and they look reasonable enough to justify a rebase. Thanks!
Re: [Qemu-devel] [PATCH v4 03/14] qapi: Introduce default values for struct members
On 11/21/19 9:07 AM, Markus Armbruster wrote: Max Reitz writes: With this change, it is possible to give default values for struct members, as follows: What you had to do so far: # @member: Some description, defaults to 42. { 'struct': 'Foo', 'data': { '*member': 'int' } } What you can do now: { 'struct': 'Foo', 'data': { '*member': { 'type': 'int', 'default': 42 } } The '*' is redundant in this form. Can anyone think of reasons for keeping it anyway? Against? Is there ever a reason to allow an optional member but without a 'default' value? Or can we just blindly state that if 'default' is not present, that is the same as 'default':0/'default':null? Or, applying your statement further, 'data': { '*a':'int', '*b':'str' } is shorthand for: 'data': { 'a': { 'type':'int', 'default':0 }, 'b': { 'type':'str', 'default':null } } So I could live with permitting '*' only in the shorthand form, and declaring that it is incompatible with longhand form because the existence of a 'default' key in longhand form is evidence that the member is therefore optional. -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3226 Virtualization: qemu.org | libvirt.org
Re: [PATCH v2 2/6] block: truncate: Don't make backing file data visible
On 21.11.19 15:33, Kevin Wolf wrote: > Am 21.11.2019 um 13:21 hat Max Reitz geschrieben: >> On 21.11.19 12:34, Kevin Wolf wrote: >>> Am 21.11.2019 um 09:59 hat Max Reitz geschrieben: On 20.11.19 19:44, Kevin Wolf wrote: > When extending the size of an image that has a backing file larger than > its old size, make sure that the backing file data doesn't become > visible in the guest, but the added area is properly zeroed out. > > Consider the following scenario where the overlay is shorter than its > backing file: > > base.qcow2: > overlay.qcow2: > > When resizing (extending) overlay.qcow2, the new blocks should not stay > unallocated and make the additional As from base.qcow2 visible like > before this patch, but zeros should be read. > > A similar case happens with the various variants of a commit job when an > intermediate file is short (- for unallocated): > > base.qcow2: A-A- > mid.qcow2: BB-B > top.qcow2: C--C--C- > > After commit top.qcow2 to mid.qcow2, the following happens: > > mid.qcow2: CB-C00C0 (correct result) > mid.qcow2: CB-C--C- (before this fix) > > Without the fix, blocks that previously read as zeros on top.qcow2 > suddenly turn into A. > > Signed-off-by: Kevin Wolf > --- > block/io.c | 32 > 1 file changed, 32 insertions(+) Zeroing the intersection may take some time. So is it right for QMP’s block_resize to do this, seeing it is a synchronous operation? >>> >>> What else would be right? Returning an error? >> >> Going through a deprecation cycle. > > And keeping the buggy behaviour for two more releases? This sounds like you don’t care about adding another bug when it means fixing this bug. I do care. And so all I was saying is that it seemed problematic to me to fix the problem in this way, because clearly this would make block_resize block the monitor in some cases and clearly that would be a problem, if not a bug. (And that’s precisely what can be said about the current block_resize behavior of revealing previous backing file data: It is a problem, but I wouldn’t call it a full-on bug. It just seems to me that nobody has ever really thought about it.) Also, I don’t see this as a really pressing issue for block_resize at least, because this isn’t a recent regression or anything. It was just the behavior we had, I believe everyone knew it, we just never thought about whether it really is the best kind of behavior. So, yes, I’m in absolutely no hurry to fix this for block_resize. (Commit is a different story, but then again commit is a job already, so it doesn’t suffer from the blocking issue.) But of course this wasn’t all that you’re saying, you give a very good point next. >>> Common cases (raw and qcow2 v3 without external data files) are quick >>> anyway. >> >> Well, but quick enough for a fully blocking operation?> For qcow2, >> block_resize can already block for a relatively long time > while metadata tables are resized, clusters are discarded etc. I just > don't really see the difference in quality between that and allocating > some zero clusters in a corner case like having a short overlay. Discarding cropped clusters when shrinking is a good point. I didn’t think of that. So block_resize already has the very problem I was afraid you’d introduce, because of course discarding isn’t very different from quick-zeroing. > Would you feel more comfortable if we set BDRV_REQ_NO_FALLBACK and > return an error if we can't zero out the area? We would have to > advertise that flag in bs->supported_zero_flags for qcow2 then (but > probably we should do that anyway?) Hm. I don’t feel that bad about disallowing this edge case (growing a shrunken overlay) for potentially all non-qcow2v3 formats. I don’t know how bad it would be for users other than block_resize, though. (And I suppose we want a resize job anyway then, and that would work for those cases?) As far as I can tell, jobs actually have the same problem. I don’t think mirror or commit have a pause point before truncating, so they still block the monitor there, don’t they? >>> >>> Do you really need a pause point? They call bdrv_co_truncate() from >>> inside the job coroutine, so it will yield. I would expect that this >>> is enough. >> >> OK then. >> >>> But in fact, all jobs have a pause point before even calling .run(), so >>> even if that made a difference, it should still be fine. >> >> Good. >> >> But I believe this is still a problem for block_resize. I don’t see why >> this needs to be fixed in 4.2-rc3. What’s the problem with going >> through a proper deprecation cycle other than the fact that we can’t >> start it in 4.2 because we don’t have a resize job yet? > > That the behaviour is wrong. I know a couple of wrong behaviors
Re: [PATCH v1 1/1] hw/arm: versal: Add the CRP as unimplemented
Hi Peter, Can we consider this patch for 4.2? Thanks, Edgar On Fri, Nov 15, 2019 at 04:47:34PM +0100, Edgar E. Iglesias wrote: > From: "Edgar E. Iglesias" > > Add the CRP as unimplemented thus avoiding bus errors when > guests access these registers. > > Signed-off-by: Edgar E. Iglesias > --- > hw/arm/xlnx-versal.c | 2 ++ > include/hw/arm/xlnx-versal.h | 3 +++ > 2 files changed, 5 insertions(+) > > diff --git a/hw/arm/xlnx-versal.c b/hw/arm/xlnx-versal.c > index 98163eb1aa..8b3d8d85b8 100644 > --- a/hw/arm/xlnx-versal.c > +++ b/hw/arm/xlnx-versal.c > @@ -257,6 +257,8 @@ static void versal_unimp(Versal *s) > MM_CRL, MM_CRL_SIZE); > versal_unimp_area(s, "crf", &s->mr_ps, > MM_FPD_CRF, MM_FPD_CRF_SIZE); > +versal_unimp_area(s, "crp", &s->mr_ps, > +MM_PMC_CRP, MM_PMC_CRP_SIZE); > versal_unimp_area(s, "iou-scntr", &s->mr_ps, > MM_IOU_SCNTR, MM_IOU_SCNTR_SIZE); > versal_unimp_area(s, "iou-scntr-seucre", &s->mr_ps, > diff --git a/include/hw/arm/xlnx-versal.h b/include/hw/arm/xlnx-versal.h > index 14405c1465..d844c4ffe4 100644 > --- a/include/hw/arm/xlnx-versal.h > +++ b/include/hw/arm/xlnx-versal.h > @@ -119,4 +119,7 @@ typedef struct Versal { > #define MM_IOU_SCNTRS_SIZE 0x1 > #define MM_FPD_CRF 0xfd1aU > #define MM_FPD_CRF_SIZE 0x14 > + > +#define MM_PMC_CRP 0xf126U > +#define MM_PMC_CRP_SIZE 0x1 > #endif > -- > 2.20.1 >
Re: [Qemu-devel] [PATCH 0/2] hw/timer/m48t59: Convert to trace events
Philippe Mathieu-Daudé writes: > ping? Did this fall through the cracks? > On 6/26/19 2:20 PM, Philippe Mathieu-Daudé wrote: >> Another trivial cleanup series. >> >> Philippe Mathieu-Daudé (2): >> MAINTAINERS: Add missing m48t59 files to the PReP section >> hw/timer/m48t59: Convert debug printf()s to trace events >> >> MAINTAINERS| 2 ++ >> hw/timer/m48t59-internal.h | 5 - >> hw/timer/m48t59.c | 11 +-- >> hw/timer/trace-events | 6 ++ >> 4 files changed, 13 insertions(+), 11 deletions(-) >>
Re: [PATCH v1 1/1] hw/arm: versal: Add the CRP as unimplemented
On Thu, 21 Nov 2019 at 15:28, Edgar E. Iglesias wrote: > > Hi Peter, > > Can we consider this patch for 4.2? Sure, it looks pretty safe. I've applied it to my queue for rc3. thanks -- PMM
Re: [PATCH] ppc/pnv: Create BMC devices at machine init
On Wed, Nov 20, 2019 at 09:05:07AM +1100, David Gibson wrote: > On Tue, Nov 19, 2019 at 07:49:33AM +0100, Cédric Le Goater wrote: > > On 19/11/2019 01:52, David Gibson wrote: > > > On Mon, Nov 18, 2019 at 10:22:22AM +0100, Cédric Le Goater wrote: > > >> The BMC of the OpenPOWER systems monitors the machine state using > > >> sensors, controls the power and controls the access to the PNOR flash > > >> device containing the firmware image required to boot the host. > > >> > > >> QEMU models the power cycle process, access to the sensors and access > > >> to the PNOR device. But, for these features to be available, the QEMU > > >> PowerNV machine needs two extras devices on the command line, an IPMI > > >> BT device for communication and a BMC backend device: > > >> > > >> -device ipmi-bmc-sim,id=bmc0 -device isa-ipmi-bt,bmc=bmc0,irq=10 > > >> > > >> The BMC properties are then defined accordingly in the device tree and > > >> OPAL self adapts. If a BMC device and an IPMI BT device are not > > >> available, OPAL does not try to communicate with the BMC in any > > >> manner. This is not how real systems behave. > > >> > > >> To be closer to the default behavior, create an IPMI BMC simulator > > >> device and an IPMI BT device at machine initialization time. We loose > > >> the ability to define an external BMC device but there are benefits: > > >> > > >> - a better match with real systems, > > >> - a better test coverage of the OPAL code, > > >> - system powerdown and reset commands that work, > > >> - a QEMU device tree compliant with the specifications (*). > > >> > > >> (*) Still needs a MBOX device. > > >> > > >> Signed-off-by: Cédric Le Goater > > > > > > This doesn't apply to ppc-for-5.0 for me. I'm not sure which change > > > in there it's conflicting with, but there seems to be something. > > > > Sorry I should have been more precise. > > > > > > This is because we need an IPMI patch to be merged first in > > Corey's tree : > > > > ipmi: Add support to customize OEM functions > > http://patchwork.ozlabs.org/patch/1185187/ > > > > and another one merged in the PPC tree: > > > > ppc/pnv: Add HIOMAP commands > > http://patchwork.ozlabs.org/patch/1185185/ > > > > > > David, if Corey agrees, I think it would be simpler if you took > > them all. > > Ok. Corey, could I get an Acked-by from you? Sure Acked-by: Corey Minyard > > -- > David Gibson | I'll have my music baroque, and my code > david AT gibson.dropbear.id.au| minimalist, thank you. NOT _the_ > _other_ > | _way_ _around_! > http://www.ozlabs.org/~dgibson
Re: [qemu-web PATCH] Add a blog post about the QEMU-related presentation of KVM Forum 2019
Thomas Huth writes: > There have been quite a lot of QEMU-related talks at KVM Forum this > year - let's provide a summary for the people who could not attend. > > Signed-off-by: Thomas Huth > --- > Note: For some talks it's hard to decide whether they really fit the > QEMU blog or not. I've assembled the list below by quickly skimming > through the schedule and the videos, and that's what I came up with ... > If you think any of the other talks should be mentioned here, too, > please let me know. Looks good to me: Reviewed-by: Alex Bennée > > _posts/2019-11-21-kvm-forum.md | 45 ++ > 1 file changed, 45 insertions(+) > create mode 100644 _posts/2019-11-21-kvm-forum.md > > diff --git a/_posts/2019-11-21-kvm-forum.md b/_posts/2019-11-21-kvm-forum.md > new file mode 100644 > index 000..e5adf5d > --- /dev/null > +++ b/_posts/2019-11-21-kvm-forum.md > @@ -0,0 +1,45 @@ > +--- > +layout: post > +title: "Presentations from KVM Forum 2019" > +date: 2019-11-21 12:30:00 +0100 > +author: Thomas Huth > +categories: [presentations, conferences] > +--- > +Last month, the > +[KVM Forum 2019](https://events.linuxfoundation.org/events/kvm-forum-2019/) > +took place in Lyon, France. The conference also featured quite a lot talks > +about QEMU, and now the videos of the presentation are available online. > +So for those who could not attend, here is the list of the QEMU-related > +presentations: > + > +* [QEMU Status Report](https://www.youtube.com/watch?v=6_1QQaXPjd4) > + by Paolo Bonzini > + > +* [The Hype Around the RISC-V > + Hypervisor](https://www.youtube.com/watch?v=2MUka4lKGFU) by Alistair > Francis > + and Anup Patel > + > +* [Reworking the Inter-VM Shared Memory > + Device](https://www.youtube.com/watch?v=TiZrngLUFMA) by Jan Kiszka > + > +* [What's Going On? Taking Advantage of TCG's Total System > + Awareness](https://www.youtube.com/watch?v=wxIF0dvGDuM) by Alex Bennée > + > +* [Towards the Higher Level Debugging with > + QEMU](https://www.youtube.com/watch?v=E2yJL82gJYM) by Pavel Dovgalyuk > + > +* [QEMU-Hexagon: Automatic Translation of the ISA Manual Pseudcode to Tiny > Code > + Instructions of a VLIW > Architecture](https://www.youtube.com/watch?v=3EpnTYBOXCI) > + by Niccolò Izzo and Taylor Simpson > + > +* [Reports of my Bloat Have Been Greatly > + Exaggerated](https://www.youtube.com/watch?v=5TY7m1AneRY) by Paolo Bonzini > + > +* [Multi-process QEMU - Status > Update](https://www.youtube.com/watch?v=lslVYCuk4CQ) > + by John Johnson and Elena Ufimtseva > + > +* [Bring QEMU to Micro Service > World](https://www.youtube.com/watch?v=5hIDwkpXUiw) > + by Xiao Guangrong and Zhang Yulei > + > +More interesting virtualization-related talks can be found in the > +[KVM Forum Youtube > Channel](https://www.youtube.com/channel/UCRCSQmAOh7yzgheq-emy1xA). -- Alex Bennée
Re: [PATCH] ppc/pnv: Create BMC devices at machine init
On Mon, Nov 18, 2019 at 10:22:22AM +0100, Cédric Le Goater wrote: > The BMC of the OpenPOWER systems monitors the machine state using > sensors, controls the power and controls the access to the PNOR flash > device containing the firmware image required to boot the host. > > QEMU models the power cycle process, access to the sensors and access > to the PNOR device. But, for these features to be available, the QEMU > PowerNV machine needs two extras devices on the command line, an IPMI > BT device for communication and a BMC backend device: > > -device ipmi-bmc-sim,id=bmc0 -device isa-ipmi-bt,bmc=bmc0,irq=10 With these changes, the above line will no longer be necessary, right? Minor nit inline ... > > The BMC properties are then defined accordingly in the device tree and > OPAL self adapts. If a BMC device and an IPMI BT device are not > available, OPAL does not try to communicate with the BMC in any > manner. This is not how real systems behave. > > To be closer to the default behavior, create an IPMI BMC simulator > device and an IPMI BT device at machine initialization time. We loose > the ability to define an external BMC device but there are benefits: > > - a better match with real systems, > - a better test coverage of the OPAL code, > - system powerdown and reset commands that work, > - a QEMU device tree compliant with the specifications (*). > > (*) Still needs a MBOX device. > > Signed-off-by: Cédric Le Goater > --- > include/hw/ppc/pnv.h | 2 +- > hw/ppc/pnv.c | 33 - > hw/ppc/pnv_bmc.c | 20 +--- > 3 files changed, 34 insertions(+), 21 deletions(-) > > diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h > index 07c56c05ad30..90f1343ed07c 100644 > --- a/include/hw/ppc/pnv.h > +++ b/include/hw/ppc/pnv.h > @@ -198,7 +198,7 @@ static inline bool pnv_is_power9(PnvMachineState *pnv) > */ > void pnv_dt_bmc_sensors(IPMIBmc *bmc, void *fdt); > void pnv_bmc_powerdown(IPMIBmc *bmc); > -int pnv_bmc_hiomap(IPMIBmc *bmc); > +IPMIBmc *pnv_bmc_create(void); > > /* > * POWER8 MMIO base addresses > diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c > index c3ac0d6d5b4a..2117d879895c 100644 > --- a/hw/ppc/pnv.c > +++ b/hw/ppc/pnv.c > @@ -551,27 +551,10 @@ static void pnv_powerdown_notify(Notifier *n, void > *opaque) > > static void pnv_reset(MachineState *machine) > { > -PnvMachineState *pnv = PNV_MACHINE(machine); > void *fdt; > -Object *obj; > > qemu_devices_reset(); > > -/* > - * OpenPOWER systems have a BMC, which can be defined on the > - * command line with: > - * > - * -device ipmi-bmc-sim,id=bmc0 > - * > - * This is the internal simulator but it could also be an external > - * BMC. > - */ > -obj = object_resolve_path_type("", "ipmi-bmc-sim", NULL); > -if (obj) { > -pnv->bmc = IPMI_BMC(obj); > -pnv_bmc_hiomap(pnv->bmc); > -} > - > fdt = pnv_dt_create(machine); > > /* Pack resulting tree */ > @@ -629,6 +612,16 @@ static bool pnv_match_cpu(const char *default_type, > const char *cpu_type) > return ppc_default->pvr_match(ppc_default, ppc->pvr); > } > > +static void ipmi_bt_init(ISABus *bus, IPMIBmc *bmc, uint32_t irq) All the other names have pnv_ in front of them, could you add that to this, too? -corey > +{ > +Object *obj; > + > +obj = OBJECT(isa_create(bus, "isa-ipmi-bt")); > +object_property_set_link(obj, OBJECT(bmc), "bmc", &error_fatal); > +object_property_set_int(obj, irq, "irq", &error_fatal); > +object_property_set_bool(obj, true, "realized", &error_fatal); > +} > + > static void pnv_init(MachineState *machine) > { > PnvMachineState *pnv = PNV_MACHINE(machine); > @@ -751,6 +744,9 @@ static void pnv_init(MachineState *machine) > } > g_free(chip_typename); > > +/* Create the machine BMC simulator */ > +pnv->bmc = pnv_bmc_create(); > + > /* Instantiate ISA bus on chip 0 */ > pnv->isa_bus = pnv_isa_create(pnv->chips[0], &error_fatal); > > @@ -760,6 +756,9 @@ static void pnv_init(MachineState *machine) > /* Create an RTC ISA device too */ > mc146818_rtc_init(pnv->isa_bus, 2000, NULL); > > +/* Create the IPMI BT device for communication with the BMC */ > +ipmi_bt_init(pnv->isa_bus, pnv->bmc, 10); > + > /* > * OpenPOWER systems use a IPMI SEL Event message to notify the > * host to powerdown > diff --git a/hw/ppc/pnv_bmc.c b/hw/ppc/pnv_bmc.c > index aa5c89586c63..07fa1e1c7e45 100644 > --- a/hw/ppc/pnv_bmc.c > +++ b/hw/ppc/pnv_bmc.c > @@ -17,6 +17,8 @@ > */ > > #include "qemu/osdep.h" > +#include "qemu-common.h" > +#include "qapi/error.h" > #include "target/ppc/cpu.h" > #include "qemu/log.h" > #include "hw/ipmi/ipmi.h" > @@ -211,8 +213,20 @@ static const IPMINetfn hiomap_netfn = { > .cmd_handlers = hiomap_cmds > }; > > -int pnv_bmc_hiomap(IPMIBmc *bmc) > +/* > + * Instantiate the machine BMC. Powe
[PULL 4/4] i386: Add -noTSX aliases for hle=off, rtm=off CPU models
From: Eduardo Habkost We have been trying to avoid adding new aliases for CPU model versions, but in the case of changes in defaults introduced by the TAA mitigation patches, the aliases might help avoid user confusion when applying host software updates. Signed-off-by: Eduardo Habkost Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 5 + 1 file changed, 5 insertions(+) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 37c023f..730fb28 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -2904,6 +2904,7 @@ static X86CPUDefinition builtin_x86_defs[] = { }, { .version = 3, +.alias = "Skylake-Client-noTSX-IBRS", .props = (PropValue[]) { { "hle", "off" }, { "rtm", "off" }, @@ -3025,6 +3026,7 @@ static X86CPUDefinition builtin_x86_defs[] = { }, { .version = 3, +.alias = "Skylake-Server-noTSX-IBRS", .props = (PropValue[]) { { "hle", "off" }, { "rtm", "off" }, @@ -3145,6 +3147,7 @@ static X86CPUDefinition builtin_x86_defs[] = { }, }, { .version = 3, + .alias = "Cascadelake-Server-noTSX", .props = (PropValue[]) { { "hle", "off" }, { "rtm", "off" }, @@ -3257,6 +3260,7 @@ static X86CPUDefinition builtin_x86_defs[] = { { .version = 1 }, { .version = 2, +.alias = "Icelake-Client-noTSX", .props = (PropValue[]) { { "hle", "off" }, { "rtm", "off" }, @@ -3373,6 +3377,7 @@ static X86CPUDefinition builtin_x86_defs[] = { { .version = 1 }, { .version = 2, +.alias = "Icelake-Server-noTSX", .props = (PropValue[]) { { "hle", "off" }, { "rtm", "off" }, -- 1.8.3.1
[PULL 0/4] x86 updates for QEMU 4.2-rc
The following changes since commit 39e2821077e6dcf788b7c2a9ef50970ec7995437: Update version for v4.2.0-rc2 release (2019-11-19 19:34:10 +) are available in the git repository at: git://github.com/bonzini/qemu.git tags/for-upstream for you to fetch changes up to 02fa60d10137ed2ef17534718d7467e0d2170142: i386: Add -noTSX aliases for hle=off, rtm=off CPU models (2019-11-21 16:35:05 +0100) * x86 updates for Intel errata (myself, Eduardo) * the big ugly list of x86 VMX features, which was targeted for 5.0 but caused a Libvirt regression (myself) The diffstat is scary, but 90% of the patches are adding initializers to the CPU model definitions. Eduardo Habkost (2): i386: Add new versions of Skylake/Cascadelake/Icelake without TSX i386: Add -noTSX aliases for hle=off, rtm=off CPU models Paolo Bonzini (2): target/i386: add VMX features to named CPU models target/i386: add support for MSR_IA32_TSX_CTRL target/i386/cpu.c | 759 +- target/i386/cpu.h | 5 + target/i386/kvm.c | 13 + target/i386/machine.c | 20 ++ 4 files changed, 796 insertions(+), 1 deletion(-) -- 1.8.3.1
[PULL 2/4] target/i386: add support for MSR_IA32_TSX_CTRL
The MSR_IA32_TSX_CTRL MSR can be used to hide TSX (also known as the Trusty Side-channel Extension). By virtualizing the MSR, KVM guests can disable TSX and avoid paying the price of mitigating TSX-based attacks on microarchitectural side channels. Reviewed-by: Eduardo Habkost Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 2 +- target/i386/cpu.h | 5 + target/i386/kvm.c | 13 + target/i386/machine.c | 20 4 files changed, 39 insertions(+), 1 deletion(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 056874f..9cd9adf 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1204,7 +1204,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { .type = MSR_FEATURE_WORD, .feat_names = { "rdctl-no", "ibrs-all", "rsba", "skip-l1dfl-vmentry", -"ssb-no", "mds-no", "pschange-mc-no", NULL, +"ssb-no", "mds-no", "pschange-mc-no", "tsx-ctrl", "taa-no", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 5352c9f..cde2a16 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -349,7 +349,11 @@ typedef enum X86Seg { #define MSR_VIRT_SSBD 0xc001011f #define MSR_IA32_PRED_CMD 0x49 #define MSR_IA32_CORE_CAPABILITY0xcf + #define MSR_IA32_ARCH_CAPABILITIES 0x10a +#define ARCH_CAP_TSX_CTRL_MSR (1<<7) + +#define MSR_IA32_TSX_CTRL 0x122 #define MSR_IA32_TSCDEADLINE0x6e0 #define FEATURE_CONTROL_LOCKED(1<<0) @@ -1449,6 +1453,7 @@ typedef struct CPUX86State { uint64_t msr_smi_count; uint32_t pkru; +uint32_t tsx_ctrl; uint64_t spec_ctrl; uint64_t virt_ssbd; diff --git a/target/i386/kvm.c b/target/i386/kvm.c index bfd09bd..bf16556 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -97,6 +97,7 @@ static bool has_msr_hv_reenlightenment; static bool has_msr_xss; static bool has_msr_umwait; static bool has_msr_spec_ctrl; +static bool has_msr_tsx_ctrl; static bool has_msr_virt_ssbd; static bool has_msr_smi_count; static bool has_msr_arch_capabs; @@ -2036,6 +2037,9 @@ static int kvm_get_supported_msrs(KVMState *s) case MSR_IA32_SPEC_CTRL: has_msr_spec_ctrl = true; break; +case MSR_IA32_TSX_CTRL: +has_msr_tsx_ctrl = true; +break; case MSR_VIRT_SSBD: has_msr_virt_ssbd = true; break; @@ -2694,6 +2698,9 @@ static int kvm_put_msrs(X86CPU *cpu, int level) if (has_msr_spec_ctrl) { kvm_msr_entry_add(cpu, MSR_IA32_SPEC_CTRL, env->spec_ctrl); } +if (has_msr_tsx_ctrl) { +kvm_msr_entry_add(cpu, MSR_IA32_TSX_CTRL, env->tsx_ctrl); +} if (has_msr_virt_ssbd) { kvm_msr_entry_add(cpu, MSR_VIRT_SSBD, env->virt_ssbd); } @@ -3110,6 +3117,9 @@ static int kvm_get_msrs(X86CPU *cpu) if (has_msr_spec_ctrl) { kvm_msr_entry_add(cpu, MSR_IA32_SPEC_CTRL, 0); } +if (has_msr_tsx_ctrl) { +kvm_msr_entry_add(cpu, MSR_IA32_TSX_CTRL, 0); +} if (has_msr_virt_ssbd) { kvm_msr_entry_add(cpu, MSR_VIRT_SSBD, 0); } @@ -3502,6 +3512,9 @@ static int kvm_get_msrs(X86CPU *cpu) case MSR_IA32_SPEC_CTRL: env->spec_ctrl = msrs[i].data; break; +case MSR_IA32_TSX_CTRL: +env->tsx_ctrl = msrs[i].data; +break; case MSR_VIRT_SSBD: env->virt_ssbd = msrs[i].data; break; diff --git a/target/i386/machine.c b/target/i386/machine.c index 7bdeb78..2699eed 100644 --- a/target/i386/machine.c +++ b/target/i386/machine.c @@ -1293,6 +1293,25 @@ static const VMStateDescription vmstate_efer32 = { }; #endif +static bool msr_tsx_ctrl_needed(void *opaque) +{ +X86CPU *cpu = opaque; +CPUX86State *env = &cpu->env; + +return env->features[FEAT_ARCH_CAPABILITIES] & ARCH_CAP_TSX_CTRL_MSR; +} + +static const VMStateDescription vmstate_msr_tsx_ctrl = { +.name = "cpu/msr_tsx_ctrl", +.version_id = 1, +.minimum_version_id = 1, +.needed = msr_tsx_ctrl_needed, +.fields = (VMStateField[]) { +VMSTATE_UINT32(env.tsx_ctrl, X86CPU), +VMSTATE_END_OF_LIST() +} +}; + VMStateDescription vmstate_x86_cpu = { .name = "cpu", .version_id = 12, @@ -1427,6 +1446,7 @@ VMStateDescription vmstate_x86_cpu = { #ifdef CONFIG_KVM &vmstate_nested_state, #endif +&vmstate_msr_tsx_ctrl, NULL } }; -- 1.8.3.1
[PULL 3/4] i386: Add new versions of Skylake/Cascadelake/Icelake without TSX
From: Eduardo Habkost One of the mitigation methods for TAA[1] is to disable TSX support on the host system. Linux added a mechanism to disable TSX globally through the kernel command line, and many Linux distributions now default to tsx=off. This makes existing CPU models that have HLE and RTM enabled not usable anymore. Add new versions of all CPU models that have the HLE and RTM features enabled, that can be used when TSX is disabled in the host system. References: [1] TAA, TSX asynchronous Abort: https://software.intel.com/security-software-guidance/insights/deep-dive-intel-transactional-synchronization-extensions-intel-tsx-asynchronous-abort https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html Signed-off-by: Eduardo Habkost Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 47 +++ 1 file changed, 47 insertions(+) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 9cd9adf..37c023f 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -2902,6 +2902,14 @@ static X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ } } }, +{ +.version = 3, +.props = (PropValue[]) { +{ "hle", "off" }, +{ "rtm", "off" }, +{ /* end of list */ } +} +}, { /* end of list */ } } }, @@ -3015,6 +3023,14 @@ static X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ } } }, +{ +.version = 3, +.props = (PropValue[]) { +{ "hle", "off" }, +{ "rtm", "off" }, +{ /* end of list */ } +} +}, { /* end of list */ } } }, @@ -3128,6 +3144,13 @@ static X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ } }, }, +{ .version = 3, + .props = (PropValue[]) { + { "hle", "off" }, + { "rtm", "off" }, + { /* end of list */ } + }, +}, { /* end of list */ } } }, @@ -3230,6 +3253,18 @@ static X86CPUDefinition builtin_x86_defs[] = { .features[FEAT_VMX_VMFUNC] = MSR_VMX_VMFUNC_EPT_SWITCHING, .xlevel = 0x8008, .model_id = "Intel Core Processor (Icelake)", +.versions = (X86CPUVersionDefinition[]) { +{ .version = 1 }, +{ +.version = 2, +.props = (PropValue[]) { +{ "hle", "off" }, +{ "rtm", "off" }, +{ /* end of list */ } +}, +}, +{ /* end of list */ } +} }, { .name = "Icelake-Server", @@ -3334,6 +3369,18 @@ static X86CPUDefinition builtin_x86_defs[] = { VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS, .xlevel = 0x8008, .model_id = "Intel Xeon Processor (Icelake)", +.versions = (X86CPUVersionDefinition[]) { +{ .version = 1 }, +{ +.version = 2, +.props = (PropValue[]) { +{ "hle", "off" }, +{ "rtm", "off" }, +{ /* end of list */ } +}, +}, +{ /* end of list */ } +} }, { .name = "Denverton", -- 1.8.3.1
[Bug 1848556] Re: qemu-img check failing on remote image in Eoan
This was tonight first accepted and then immediately rejected as it was surpassed by a security fix. => Rebased and uploaded 1:4.0+dfsg-0ubuntu9.2 to eoan-unapproved again. -- You received this bug notification because you are a member of qemu- devel-ml, which is subscribed to QEMU. https://bugs.launchpad.net/bugs/1848556 Title: qemu-img check failing on remote image in Eoan Status in QEMU: Fix Released Status in qemu package in Ubuntu: Fix Released Status in qemu source package in Eoan: Triaged Status in qemu source package in Focal: Fix Released Bug description: Ubuntu SRU Template: [Impact] * There is fallout due to changes in libcurl that affect qemu and might lead to a hang. * Fix by backporting the upstream fix [Test Case] * If you have network just run $ qemu-img check http://10.193.37.117/cloud/eoan-server-cloudimg-amd64.img * Without network, install apache2, and get a complex qemu file (like a cloud image) onto the system. Then access the file via apache http but not localhost (that would work) [Regression Potential] * The change is local to the libcurl usage of qemu, so that could be affected. But then this is what has been found to not work here, so I'd expect not too much trouble. But if so then in the curl usage (which means disks on http) [Other Info] * n/a --- The "qemu-img check" function is failing on remote (HTTP-hosted) images, beginning with Ubuntu 19.10 (qemu-utils version 1:4.0+dfsg- 0ubuntu9). With previous versions, through Ubuntu 19.04/qemu-utils version 1:3.1+dfsg-2ubuntu3.5, the following worked: $ /usr/bin/qemu-img check http://10.193.37.117/cloud/eoan-server-cloudimg-amd64.img No errors were found on the image. 19778/36032 = 54.89% allocated, 90.34% fragmented, 89.90% compressed clusters Image end offset: 514064384 The 10.193.37.117 server holds an Apache server that hosts the cloud images on a LAN. Beginning with Ubuntu 19.10/qemu-utils 1:4.0+dfsg- 0ubuntu9, the same command never returns. (I've left it for up to an hour with no change.) I'm able to wget the image from the same server and installation on which qemu-img check fails. I've tried several .img files on the server, ranging from Bionic to Eoan, with the same results with all of them. To manage notifications about this bug go to: https://bugs.launchpad.net/qemu/+bug/1848556/+subscriptions
[PULL 1/4] target/i386: add VMX features to named CPU models
This allows using "-cpu Haswell,+vmx", which we did not really want to support in QEMU but was produced by Libvirt when using the "host-model" CPU model. Without this patch, no VMX feature is _actually_ supported (only the basic instruction set extensions are) and KVM fails to load in the guest. This was produced from the output of scripts/kvm/vmxcap using the following very ugly Python script: bits = { 'INS/OUTS instruction information': ['FEAT_VMX_BASIC', 'MSR_VMX_BASIC_INS_OUTS'], 'IA32_VMX_TRUE_*_CTLS support': ['FEAT_VMX_BASIC', 'MSR_VMX_BASIC_TRUE_CTLS'], 'External interrupt exiting': ['FEAT_VMX_PINBASED_CTLS', 'VMX_PIN_BASED_EXT_INTR_MASK'], 'NMI exiting': ['FEAT_VMX_PINBASED_CTLS', 'VMX_PIN_BASED_NMI_EXITING'], 'Virtual NMIs': ['FEAT_VMX_PINBASED_CTLS', 'VMX_PIN_BASED_VIRTUAL_NMIS'], 'Activate VMX-preemption timer': ['FEAT_VMX_PINBASED_CTLS', 'VMX_PIN_BASED_VMX_PREEMPTION_TIMER'], 'Process posted interrupts': ['FEAT_VMX_PINBASED_CTLS', 'VMX_PIN_BASED_POSTED_INTR'], 'Interrupt window exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_VIRTUAL_INTR_PENDING'], 'Use TSC offsetting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_USE_TSC_OFFSETING'], 'HLT exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_HLT_EXITING'], 'INVLPG exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_INVLPG_EXITING'], 'MWAIT exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_MWAIT_EXITING'], 'RDPMC exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_RDPMC_EXITING'], 'RDTSC exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_RDTSC_EXITING'], 'CR3-load exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_CR3_LOAD_EXITING'], 'CR3-store exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_CR3_STORE_EXITING'], 'CR8-load exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_CR8_LOAD_EXITING'], 'CR8-store exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_CR8_STORE_EXITING'], 'Use TPR shadow': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_TPR_SHADOW'], 'NMI-window exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_VIRTUAL_NMI_PENDING'], 'MOV-DR exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_MOV_DR_EXITING'], 'Unconditional I/O exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_UNCOND_IO_EXITING'], 'Use I/O bitmaps': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_USE_IO_BITMAPS'], 'Monitor trap flag': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_MONITOR_TRAP_FLAG'], 'Use MSR bitmaps': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_USE_MSR_BITMAPS'], 'MONITOR exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_MONITOR_EXITING'], 'PAUSE exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_PAUSE_EXITING'], 'Activate secondary control': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS'], 'Virtualize APIC accesses': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES'], 'Enable EPT': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_ENABLE_EPT'], 'Descriptor-table exiting': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_DESC'], 'Enable RDTSCP': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_RDTSCP'], 'Virtualize x2APIC mode': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE'], 'Enable VPID': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_ENABLE_VPID'], 'WBINVD exiting': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_WBINVD_EXITING'], 'Unrestricted guest': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST'], 'APIC register emulation': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT'], 'Virtual interrupt delivery': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY'], 'PAUSE-loop exiting': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_PAUSE_LOOP_EXITING'], 'RDRAND exiting': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_RDRAND_EXITING'], 'Enable INVPCID': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_ENABLE_INVPCID'], 'Enable VM functions': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_ENABLE_VMFUNC'], 'VMCS shadowing': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_SHADOW_VMCS'], 'RDSEED exiting': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_RDSEED_EXITING'], 'Enable PML': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_ENABLE_PML'], 'Enable XSAVES/XRSTORS': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_XSAVES'], 'Save debug controls': ['FEAT_VMX_EXIT_CTLS', 'VMX_VM_EXIT_SAVE_DEBUG_CONTROLS'],
Re: [Qemu-devel] [PATCH v8 00/11] Measure Tiny Code Generation Quality
vandersonmr writes: > This patch is part of Google Summer of Code (GSoC) 2019. > More about the project can be found in: > https://wiki.qemu.org/Internships/ProjectIdeas/TCGCodeQuality > > The goal of this patch is to add infrastructure to collect > execution and JIT statistics during the emulation with accel/TCG. > The statistics are stored in TBStatistic structures (TBStats) > with each TB having its respective TBStats. > > We added -d tb_stats and HMP tb_stats commands to allow the control > of this statistics collection. And info tb, tbs, and coverset commands > were also added to allow dumping and exploring all this information > while emulating. > > Collecting these statistics and information is useful to understand > qemu performance and to help to add the support for traces to QEMU. Are you still pursuing this?
Re: [qemu-web PATCH] Add a blog post about the QEMU-related presentation of KVM Forum 2019
On 21/11/19 14:59, Eric Blake wrote: > On 11/21/19 5:52 AM, Thomas Huth wrote: >> There have been quite a lot of QEMU-related talks at KVM Forum this >> year - let's provide a summary for the people who could not attend. >> >> Signed-off-by: Thomas Huth >> --- >> Note: For some talks it's hard to decide whether they really fit the >> QEMU blog or not. I've assembled the list below by quickly skimming >> through the schedule and the videos, and that's what I came up with ... >> If you think any of the other talks should be mentioned here, too, >> please let me know. > > Perhaps: > > [Making the Most of NBD](https://www.youtube.com/watch?v=PMa6KFX9AxM) by > Eric Blake and Richard Jones > > which mentions optimizations to 'qemu-img convert' made possible through > NBD protocol extensions Agreed. Also I would remove the RISC-V talk, as I was going to write a blog post about it too. Paolo
Re: [PATCH] ppc/pnv: Create BMC devices at machine init
On 21/11/2019 16:36, Corey Minyard wrote: > On Mon, Nov 18, 2019 at 10:22:22AM +0100, Cédric Le Goater wrote: >> The BMC of the OpenPOWER systems monitors the machine state using >> sensors, controls the power and controls the access to the PNOR flash >> device containing the firmware image required to boot the host. >> >> QEMU models the power cycle process, access to the sensors and access >> to the PNOR device. But, for these features to be available, the QEMU >> PowerNV machine needs two extras devices on the command line, an IPMI >> BT device for communication and a BMC backend device: >> >> -device ipmi-bmc-sim,id=bmc0 -device isa-ipmi-bt,bmc=bmc0,irq=10 > > With these changes, the above line will no longer be necessary, right? Yes. I would have liked to find a way to introduce some kind of lazy binding between the two devices and still be able to change the IPMI backend on the command line but it seemed too complex. > Minor nit inline ... > >> >> The BMC properties are then defined accordingly in the device tree and >> OPAL self adapts. If a BMC device and an IPMI BT device are not >> available, OPAL does not try to communicate with the BMC in any >> manner. This is not how real systems behave. >> >> To be closer to the default behavior, create an IPMI BMC simulator >> device and an IPMI BT device at machine initialization time. We loose >> the ability to define an external BMC device but there are benefits: >> >> - a better match with real systems, >> - a better test coverage of the OPAL code, >> - system powerdown and reset commands that work, >> - a QEMU device tree compliant with the specifications (*). >> >> (*) Still needs a MBOX device. >> >> Signed-off-by: Cédric Le Goater >> --- >> include/hw/ppc/pnv.h | 2 +- >> hw/ppc/pnv.c | 33 - >> hw/ppc/pnv_bmc.c | 20 +--- >> 3 files changed, 34 insertions(+), 21 deletions(-) >> >> diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h >> index 07c56c05ad30..90f1343ed07c 100644 >> --- a/include/hw/ppc/pnv.h >> +++ b/include/hw/ppc/pnv.h >> @@ -198,7 +198,7 @@ static inline bool pnv_is_power9(PnvMachineState *pnv) >> */ >> void pnv_dt_bmc_sensors(IPMIBmc *bmc, void *fdt); >> void pnv_bmc_powerdown(IPMIBmc *bmc); >> -int pnv_bmc_hiomap(IPMIBmc *bmc); >> +IPMIBmc *pnv_bmc_create(void); >> >> /* >> * POWER8 MMIO base addresses >> diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c >> index c3ac0d6d5b4a..2117d879895c 100644 >> --- a/hw/ppc/pnv.c >> +++ b/hw/ppc/pnv.c >> @@ -551,27 +551,10 @@ static void pnv_powerdown_notify(Notifier *n, void >> *opaque) >> >> static void pnv_reset(MachineState *machine) >> { >> -PnvMachineState *pnv = PNV_MACHINE(machine); >> void *fdt; >> -Object *obj; >> >> qemu_devices_reset(); >> >> -/* >> - * OpenPOWER systems have a BMC, which can be defined on the >> - * command line with: >> - * >> - * -device ipmi-bmc-sim,id=bmc0 >> - * >> - * This is the internal simulator but it could also be an external >> - * BMC. >> - */ >> -obj = object_resolve_path_type("", "ipmi-bmc-sim", NULL); >> -if (obj) { >> -pnv->bmc = IPMI_BMC(obj); >> -pnv_bmc_hiomap(pnv->bmc); >> -} >> - >> fdt = pnv_dt_create(machine); >> >> /* Pack resulting tree */ >> @@ -629,6 +612,16 @@ static bool pnv_match_cpu(const char *default_type, >> const char *cpu_type) >> return ppc_default->pvr_match(ppc_default, ppc->pvr); >> } >> >> +static void ipmi_bt_init(ISABus *bus, IPMIBmc *bmc, uint32_t irq) > > All the other names have pnv_ in front of them, could you add that to > this, too? Sure. I will resend a v2 with this change and your acked-by. C. > > -corey > >> +{ >> +Object *obj; >> + >> +obj = OBJECT(isa_create(bus, "isa-ipmi-bt")); >> +object_property_set_link(obj, OBJECT(bmc), "bmc", &error_fatal); >> +object_property_set_int(obj, irq, "irq", &error_fatal); >> +object_property_set_bool(obj, true, "realized", &error_fatal); >> +} >> + >> static void pnv_init(MachineState *machine) >> { >> PnvMachineState *pnv = PNV_MACHINE(machine); >> @@ -751,6 +744,9 @@ static void pnv_init(MachineState *machine) >> } >> g_free(chip_typename); >> >> +/* Create the machine BMC simulator */ >> +pnv->bmc = pnv_bmc_create(); >> + >> /* Instantiate ISA bus on chip 0 */ >> pnv->isa_bus = pnv_isa_create(pnv->chips[0], &error_fatal); >> >> @@ -760,6 +756,9 @@ static void pnv_init(MachineState *machine) >> /* Create an RTC ISA device too */ >> mc146818_rtc_init(pnv->isa_bus, 2000, NULL); >> >> +/* Create the IPMI BT device for communication with the BMC */ >> +ipmi_bt_init(pnv->isa_bus, pnv->bmc, 10); >> + >> /* >> * OpenPOWER systems use a IPMI SEL Event message to notify the >> * host to powerdown >> diff --git a/hw/ppc/pnv_bmc.c b/hw/ppc/pnv_bmc.c >> index aa5c89586c63..07fa1e1c7e45 10
Re: [Qemu-devel] [PATCH 0/2] hw/timer/m48t59: Convert to trace events
On 11/21/19 4:28 PM, Markus Armbruster wrote: Philippe Mathieu-Daudé writes: ping? Did this fall through the cracks? Certainly =) Thanks for noticing. This now need a (trivial) rebase. I'll respin for 5.0. On 6/26/19 2:20 PM, Philippe Mathieu-Daudé wrote: Another trivial cleanup series. Philippe Mathieu-Daudé (2): MAINTAINERS: Add missing m48t59 files to the PReP section hw/timer/m48t59: Convert debug printf()s to trace events MAINTAINERS| 2 ++ hw/timer/m48t59-internal.h | 5 - hw/timer/m48t59.c | 11 +-- hw/timer/trace-events | 6 ++ 4 files changed, 13 insertions(+), 11 deletions(-)
[PATCH v2] ppc/pnv: Create BMC devices at machine init
The BMC of the OpenPOWER systems monitors the machine state using sensors, controls the power and controls the access to the PNOR flash device containing the firmware image required to boot the host. QEMU models the power cycle process, access to the sensors and access to the PNOR device. But, for these features to be available, the QEMU PowerNV machine needs two extras devices on the command line, an IPMI BT device for communication and a BMC backend device: -device ipmi-bmc-sim,id=bmc0 -device isa-ipmi-bt,bmc=bmc0,irq=10 The BMC properties are then defined accordingly in the device tree and OPAL self adapts. If a BMC device and an IPMI BT device are not available, OPAL does not try to communicate with the BMC in any manner. This is not how real systems behave. To be closer to the default behavior, create an IPMI BMC simulator device and an IPMI BT device at machine initialization time. We loose the ability to define an external BMC device but there are benefits: - a better match with real systems, - a better test coverage of the OPAL code, - system powerdown and reset commands that work, - a QEMU device tree compliant with the specifications (*). (*) Still needs a MBOX device. Signed-off-by: Cédric Le Goater --- Changes since v1: - renamed ipmi_bt_init() to pnv_ipmi_bt_init() include/hw/ppc/pnv.h | 2 +- hw/ppc/pnv.c | 33 - hw/ppc/pnv_bmc.c | 20 +--- 3 files changed, 34 insertions(+), 21 deletions(-) diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h index 07c56c05ad30..90f1343ed07c 100644 --- a/include/hw/ppc/pnv.h +++ b/include/hw/ppc/pnv.h @@ -198,7 +198,7 @@ static inline bool pnv_is_power9(PnvMachineState *pnv) */ void pnv_dt_bmc_sensors(IPMIBmc *bmc, void *fdt); void pnv_bmc_powerdown(IPMIBmc *bmc); -int pnv_bmc_hiomap(IPMIBmc *bmc); +IPMIBmc *pnv_bmc_create(void); /* * POWER8 MMIO base addresses diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index c3ac0d6d5b4a..f0adb06c8d65 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -551,27 +551,10 @@ static void pnv_powerdown_notify(Notifier *n, void *opaque) static void pnv_reset(MachineState *machine) { -PnvMachineState *pnv = PNV_MACHINE(machine); void *fdt; -Object *obj; qemu_devices_reset(); -/* - * OpenPOWER systems have a BMC, which can be defined on the - * command line with: - * - * -device ipmi-bmc-sim,id=bmc0 - * - * This is the internal simulator but it could also be an external - * BMC. - */ -obj = object_resolve_path_type("", "ipmi-bmc-sim", NULL); -if (obj) { -pnv->bmc = IPMI_BMC(obj); -pnv_bmc_hiomap(pnv->bmc); -} - fdt = pnv_dt_create(machine); /* Pack resulting tree */ @@ -629,6 +612,16 @@ static bool pnv_match_cpu(const char *default_type, const char *cpu_type) return ppc_default->pvr_match(ppc_default, ppc->pvr); } +static void pnv_ipmi_bt_init(ISABus *bus, IPMIBmc *bmc, uint32_t irq) +{ +Object *obj; + +obj = OBJECT(isa_create(bus, "isa-ipmi-bt")); +object_property_set_link(obj, OBJECT(bmc), "bmc", &error_fatal); +object_property_set_int(obj, irq, "irq", &error_fatal); +object_property_set_bool(obj, true, "realized", &error_fatal); +} + static void pnv_init(MachineState *machine) { PnvMachineState *pnv = PNV_MACHINE(machine); @@ -751,6 +744,9 @@ static void pnv_init(MachineState *machine) } g_free(chip_typename); +/* Create the machine BMC simulator */ +pnv->bmc = pnv_bmc_create(); + /* Instantiate ISA bus on chip 0 */ pnv->isa_bus = pnv_isa_create(pnv->chips[0], &error_fatal); @@ -760,6 +756,9 @@ static void pnv_init(MachineState *machine) /* Create an RTC ISA device too */ mc146818_rtc_init(pnv->isa_bus, 2000, NULL); +/* Create the IPMI BT device for communication with the BMC */ +pnv_ipmi_bt_init(pnv->isa_bus, pnv->bmc, 10); + /* * OpenPOWER systems use a IPMI SEL Event message to notify the * host to powerdown diff --git a/hw/ppc/pnv_bmc.c b/hw/ppc/pnv_bmc.c index aa5c89586c63..07fa1e1c7e45 100644 --- a/hw/ppc/pnv_bmc.c +++ b/hw/ppc/pnv_bmc.c @@ -17,6 +17,8 @@ */ #include "qemu/osdep.h" +#include "qemu-common.h" +#include "qapi/error.h" #include "target/ppc/cpu.h" #include "qemu/log.h" #include "hw/ipmi/ipmi.h" @@ -211,8 +213,20 @@ static const IPMINetfn hiomap_netfn = { .cmd_handlers = hiomap_cmds }; -int pnv_bmc_hiomap(IPMIBmc *bmc) +/* + * Instantiate the machine BMC. PowerNV uses the QEMU internal + * simulator but it could also be external. + */ +IPMIBmc *pnv_bmc_create(void) { -return ipmi_sim_register_netfn(IPMI_BMC_SIMULATOR(bmc), - IPMI_NETFN_OEM, &hiomap_netfn); +Object *obj; + +obj = object_new(TYPE_IPMI_BMC_SIMULATOR); +object_property_set_bool(obj, true, "realized", &error_fatal); + +/* Install the HIOMAP protocol handlers to access the PNOR */ +