[PATCH] hw/nvme: fix attachment of private namespaces

2025-04-08 Thread Klaus Jensen
From: Klaus Jensen 

Fix regression when attaching private namespaces that gets attached to
the wrong controller.

Keep track of the original controller "owner" of private namespaces, and
only attach if this matches on controller enablement.

Fixes: 6ccca4b6bb9f ("hw/nvme: rework csi handling")
Reported-by: Alan Adamson 
Suggested-by: Alan Adamson 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c   | 7 ++-
 hw/nvme/ns.c | 4 
 hw/nvme/nvme.h   | 3 +++
 hw/nvme/subsys.c | 9 +
 4 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 
518d02dc66706e2d2e86f1705db52188a97a67fc..d6b77d4fbc9def4639d53074c93f35ca882c4a02
 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -7755,7 +7755,11 @@ static int nvme_start_ctrl(NvmeCtrl *n)
 for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) {
 NvmeNamespace *ns = nvme_subsys_ns(n->subsys, i);
 
-if (ns && nvme_csi_supported(n, ns->csi) && !ns->params.detached) {
+if (!ns || (!ns->params.shared && ns->ctrl != n)) {
+continue;
+}
+
+if (nvme_csi_supported(n, ns->csi) && !ns->params.detached) {
 if (!ns->attached || ns->params.shared) {
 nvme_attach_ns(n, ns);
 }
@@ -8988,6 +8992,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
 if (n->namespace.blkconf.blk) {
 ns = &n->namespace;
 ns->params.nsid = 1;
+ns->ctrl = n;
 
 if (nvme_ns_setup(ns, errp)) {
 return;
diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
index 
98c1e75a5d29627351f1aa741da3625c984a2d40..4ab8ba74f51b346a50419869b6f4a7f4b2d0e9c2
 100644
--- a/hw/nvme/ns.c
+++ b/hw/nvme/ns.c
@@ -763,6 +763,10 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
 
 ns->id_ns.endgid = cpu_to_le16(0x1);
 ns->id_ns_ind.endgrpid = cpu_to_le16(0x1);
+
+if (!ns->params.shared) {
+ns->ctrl = n;
+}
 }
 
 static const Property nvme_ns_props[] = {
diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
index 
6f782ba18826d3ff8db7198d3a29c7654262bb7b..b5c9378ea4e524abacced613fbc4ce5a404350c0
 100644
--- a/hw/nvme/nvme.h
+++ b/hw/nvme/nvme.h
@@ -268,6 +268,9 @@ typedef struct NvmeNamespace {
 NvmeSubsystem *subsys;
 NvmeEnduranceGroup *endgrp;
 
+/* NULL for shared namespaces; set to specific controller if private */
+NvmeCtrl *ctrl;
+
 struct {
 uint32_t err_rec;
 } features;
diff --git a/hw/nvme/subsys.c b/hw/nvme/subsys.c
index 
2ae56f12a596198e93a118428579301f8c8275d8..b617ac3892a32efebcaedca837eff59104dcc751
 100644
--- a/hw/nvme/subsys.c
+++ b/hw/nvme/subsys.c
@@ -56,7 +56,7 @@ int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp)
 {
 NvmeSubsystem *subsys = n->subsys;
 NvmeSecCtrlEntry *sctrl = nvme_sctrl(n);
-int cntlid, nsid, num_rsvd, num_vfs = n->params.sriov_max_vfs;
+int cntlid, num_rsvd, num_vfs = n->params.sriov_max_vfs;
 
 if (pci_is_vf(&n->parent_obj)) {
 cntlid = le16_to_cpu(sctrl->scid);
@@ -92,13 +92,6 @@ int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp)
 
 subsys->ctrls[cntlid] = n;
 
-for (nsid = 1; nsid < ARRAY_SIZE(subsys->namespaces); nsid++) {
-NvmeNamespace *ns = subsys->namespaces[nsid];
-if (ns && ns->params.shared && !ns->params.detached) {
-nvme_attach_ns(n, ns);
-}
-}
-
 return cntlid;
 }
 

---
base-commit: dfaecc04c46d298e9ee81bd0ca96d8754f1c27ed
change-id: 20250408-fix-private-ns-19b2bdf62696

Best regards,
-- 
Klaus Jensen 




Re: [PATCH] Fix data races in test-bdrv-drain test

2025-04-08 Thread Kevin Wolf
Am 02.04.2025 um 12:21 hat Vitalii Mordan geschrieben:
> This patch addresses potential data races involving access to Job fields
> in the test-bdrv-drain test.
> 
> Fixes: 7253220de4 ("test-bdrv-drain: Test drain vs. block jobs")
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2900
> Signed-off-by: Vitalii Mordan 

Considering that we're nearing the end of the code freeze for 10.0, I
fixed up a few trivial problems myself instead of asking for a v2 (see
diff below).

Thanks, applied to the block branch.

Kevin

diff --git a/include/qemu/job.h b/include/qemu/job.h
index f27551a9ad..a5a04155ea 100644
--- a/include/qemu/job.h
+++ b/include/qemu/job.h
@@ -520,8 +520,6 @@ bool job_is_internal(Job *job);
  */
 bool job_is_cancelled(Job *job);

-bool job_is_paused(Job *job);
-
 /* Same as job_is_cancelled(), but called with job lock held. */
 bool job_is_cancelled_locked(Job *job);

@@ -547,6 +545,9 @@ bool job_is_ready(Job *job);
 /* Same as job_is_ready(), but called with job lock held. */
 bool job_is_ready_locked(Job *job);

+/** Returns whether the job is paused. Called with job_mutex *not* held. */
+bool job_is_paused(Job *job);
+
 /**
  * Request @job to pause at the next pause point. Must be paired with
  * job_resume(). If the job is supposed to be resumed by user action, call
diff --git a/job.c b/job.c
index d9b2dd8532..0653bc2ba6 100644
--- a/job.c
+++ b/job.c
@@ -253,8 +253,8 @@ bool job_is_cancelled_locked(Job *job)

 bool job_is_paused(Job *job)
 {
-   JOB_LOCK_GUARD();
-   return job->paused;
+JOB_LOCK_GUARD();
+return job->paused;
 }

 bool job_is_cancelled(Job *job)
diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
index 65041c9230..290cd2a70e 100644
--- a/tests/unit/test-bdrv-drain.c
+++ b/tests/unit/test-bdrv-drain.c
@@ -632,6 +632,8 @@ typedef struct TestBlockJob {
 BlockDriverState *bs;
 int run_ret;
 int prepare_ret;
+
+/* Accessed with atomics */
 bool running;
 bool should_complete;
 } TestBlockJob;
@@ -799,7 +801,7 @@ static void test_blockjob_common_drain_node(enum drain_type 
drain_type,
 WITH_JOB_LOCK_GUARD() {
 g_assert_cmpint(job->job.pause_count, ==, 0);
 g_assert_false(job->job.paused);
-g_assert_true(tjob->running);
+g_assert_true(qatomic_read(&tjob->running));
 g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */
 }

@@ -1411,10 +1413,12 @@ static void test_set_aio_context(void)

 typedef struct TestDropBackingBlockJob {
 BlockJob common;
-bool should_complete;
 bool *did_complete;
 BlockDriverState *detach_also;
 BlockDriverState *bs;
+
+/* Accessed with atomics */
+bool should_complete;
 } TestDropBackingBlockJob;

 static int coroutine_fn test_drop_backing_job_run(Job *job, Error **errp)
@@ -1557,8 +1561,10 @@ static void test_blockjob_commit_by_drained_end(void)

 typedef struct TestSimpleBlockJob {
 BlockJob common;
-bool should_complete;
 bool *did_complete;
+
+/* Accessed with atomics */
+bool should_complete;
 } TestSimpleBlockJob;

 static int coroutine_fn test_simple_job_run(Job *job, Error **errp)




Re: hw/nvme: Issue with multiple controllers behind a subsystem

2025-04-08 Thread Klaus Jensen
On Apr  4 10:52, alan.adam...@oracle.com wrote:
> I'm running into a issue with the latest qemu-nvme with v10.0.0-rc2 with
> regards to multiple controllers behind a subsystem.  When I setup a
> subsystem with 2 controllers, each with a private/non-shared namespace, the
> two private/non-shared namespaces all get attached to one of the
> controllers.
> 
> I'm sending out diffs that resolve the problem but would like to get some
> feedback before sending a formal patch.
> 

Hi Alan,

Thanks for reporting this! This is definitely a regression caused by the
csi refactoring I did.

Few comments below, but I'd like to try to get this into 10.0. Timeline
is tight, so I'll send out a patch with my suggestings below.

> @@ -7751,17 +7752,23 @@ static int nvme_start_ctrl(NvmeCtrl *n)
> 
>  nvme_set_timestamp(n, 0ULL);
> 
> -    /* verify that the command sets of attached namespaces are supported */
> -    for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) {
> -    NvmeNamespace *ns = nvme_subsys_ns(n->subsys, i);
> +    if (n->subsys) {
> +    for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) {
> +    NvmeNamespace *ns = n->subsys->namespaces[i].namespace;
> 
> -    if (ns && nvme_csi_supported(n, ns->csi) && !ns->params.detached) {
> -    if (!ns->attached || ns->params.shared) {
> -    nvme_attach_ns(n, ns);
> +    if (!ns) {
> +    continue;
>  }
> +    if (!(n->subsys->namespaces[i].ctrl == n) && !ns->params.shared) {
> +    continue;
> +    }
> +    if (nvme_csi_supported(n, ns->csi) && !ns->params.detached) {
> +    if (!ns->attached || ns->params.shared) {
> +    nvme_attach_ns(n, ns);
> +    }
> +    }
>  }
>  }
> -
>  nvme_update_dsm_limits(n, NULL);
> 
>  return 0;

Yeah, this is where things went wrong.

> diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
> index 6f782ba18826..bea3b96a6dfa 100644
> --- a/hw/nvme/nvme.h
> +++ b/hw/nvme/nvme.h
> @@ -97,6 +97,11 @@ typedef struct NvmeEnduranceGroup {
>  } fdp;
>  } NvmeEnduranceGroup;
> 
> +typedef struct Namespaces {
> +    NvmeCtrl   *ctrl;
> +    NvmeNamespace  *namespace;
> +} Namespaces;

Let's just add an NvmeCtrl* in struct NvmeNamespace. If set, the
namespace is private.

> diff --git a/hw/nvme/subsys.c b/hw/nvme/subsys.c
> index 2ae56f12a596..d5751564c05c 100644
> --- a/hw/nvme/subsys.c
> +++ b/hw/nvme/subsys.c
> @@ -92,13 +92,19 @@ int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp)
> 
>  subsys->ctrls[cntlid] = n;
> 
> -    for (nsid = 1; nsid < ARRAY_SIZE(subsys->namespaces); nsid++) {
> -    NvmeNamespace *ns = subsys->namespaces[nsid];
> -    if (ns && ns->params.shared && !ns->params.detached) {
> -    nvme_attach_ns(n, ns);
> +    for (nsid = 1; nsid <= NVME_MAX_NAMESPACES; nsid++) {
> +    NvmeNamespace *ns = subsys->namespaces[nsid].namespace;
> +
> +    if (!ns) {
> +    continue;
> +    }
> +    if (!(subsys->namespaces[nsid].ctrl == n) && !ns->params.shared) {
> +    continue;
>  }
> +    if (ns->params.shared && !ns->params.detached) {
> +    nvme_attach_ns(n, ns);
> +    }
>  }
> -
>  return cntlid;
>  }

The code here that attach namespaces when the controller is registers
actually need to go away. This is a leftover.


signature.asc
Description: PGP signature


Re: [PATCH v2 2/2] qapi/block-core: derpecate some block-job- APIs

2025-04-08 Thread Markus Armbruster
Typo in subject, make it "deprecate".

Vladimir Sementsov-Ogievskiy  writes:

> For change, pause, resume, complete, dismiss and finalize actions
> corresponding job- and block-job commands are almost equal. The
> difference is in find_block_job_locked() vs find_job_locked()
> functions. What's different?
>
> 1. find_block_job_locked() do check, is found job a block-job. This OK
>when moving to more generic API, no needs to document this change.
>
> 2. find_block_job_locked() reports DeviceNotActive on failure, when
>find_job_locked() reports GenericError. So, lets document this
>difference in deprecated.txt. Still, for dismiss and finalize errors
>are not documented at all, so be silent in deprecated.txt as well.
>
> ACKed-by: Peter Krempa 
> Signed-off-by: Vladimir Sementsov-Ogievskiy 

Reviewed-by: Markus Armbruster 




Re: [PATCH v6] hw/misc/vmfwupdate: Introduce hypervisor fw-cfg interface support

2025-04-08 Thread Gerd Hoffman
  Hi,

> Which means we are back to the single firmware image.  I think it makes
> sense to continue supporting classic rom images (which can also be
> loaded via -bios).  Any use case which needs more fine-grained control
> must use igvm.  We can use format bits in both capabilities and control
> fields to indicate what the hypervisor supports and what has been
> uploaded to the firmware image region.  See interface header file draft
> below.

Updated draft.  Idea is to go all-in on IGVM and support IGVM only.  We
keep the format bit, but more to make things future-proof (have the
option to support other formats should the need arise at some point in
the future) and not because we plan to support multiple formats today.

So we are down to this:

--- cut here 

/*
 * igvm only vmfwupdate interface rewrite
 */

struct vmfwupdate {
// VMM capabilities, see VMFWUPDATE_CAP_*, read-only.
uint64_t capabilities;
// control bits, see VMFWUPDATE_CTL_*
uint64_t control;

// address and size of the firmware update image.  Will be cleared on
// firmware update and reset.
uint64_t fw_image_addr;
uint16_t fw_image_size;
};

// --- format bits, used by both 'capabilities' and 'control' ---
// igvm
#define VMFWUPDATE_FORMAT_IGVM   (1 << 32)

// --- 'control' field bits ---
// disable vmfwupdate interface
#define VMFWUPDATE_CTL_DISABLE(1 << 0)

--- cut here 

All other details will be offloaded to IGVM.  We will need some IGVM
format updates for that:

 * Add a parameter to specify the location of the payload (i.e.
   the UKI, or some container format in case we want pass on more
   than just one efi binary).
 * Add a page types for db/dbx signature databases where we can
   store either the signing key or the authenticode hash of the
   payload.

take care,
  Gerd




[PULL 09/11] qapi/qapi-schema: Address the introduction's bit rot

2025-04-08 Thread Markus Armbruster
Cut the crap that stopped making sense years ago.  Adjust the
remainder.

Signed-off-by: Markus Armbruster 
Message-ID: <20250404121413.1743790-10-arm...@redhat.com>
Reviewed-by: Eric Blake 
---
 qapi/qapi-schema.json | 27 +++
 1 file changed, 7 insertions(+), 20 deletions(-)

diff --git a/qapi/qapi-schema.json b/qapi/qapi-schema.json
index 0d027d5017..7bc600bb76 100644
--- a/qapi/qapi-schema.json
+++ b/qapi/qapi-schema.json
@@ -3,37 +3,24 @@
 ##
 # = Introduction
 #
-# This document describes all commands currently supported by QMP.
+# This manual describes the commands and events supported by the QEMU
+# Monitor Protocol (QMP).
 #
 # For locating a particular item, please see the `qapi-qmp-index`.
 #
-# Most of the time their usage is exactly the same as in the user
-# Monitor, this means that any other document which also describe
-# commands (the manpage, QEMU's manual, etc) can and should be
-# consulted.
-#
-# QMP has two types of commands: regular and query commands.  Regular
-# commands usually change the Virtual Machine's state someway, while
-# query commands just return information.  The sections below are
-# divided accordingly.
-#
-# It's important to observe that all communication examples are
-# formatted in a reader-friendly way, so that they're easier to
-# understand.  However, in real protocol usage, they're emitted as a
-# single line.
-#
-# Also, the following notation is used to denote data flow:
-#
-# Example:
+# The following notation is used in examples:
 #
 # .. qmp-example::
 #
 #   -> ... text sent by client (commands) ...
 #   <- ... text sent by server (command responses and events) ...
 #
+# Example text is formatted for readability.  However, in real
+# protocol usage, its commonly emitted as a single line.
+#
 # Please refer to the
 # :doc:`QEMU Machine Protocol Specification `
-# for detailed information on the Server command and response formats.
+# for the general format of commands, responses, and events.
 ##
 
 { 'include': 'pragma.json' }
-- 
2.48.1




[PATCH v6 2/4] virtio_net: Add the check for vdpa's mac address

2025-04-08 Thread Cindy Lu
When using a VDPA device, it is important to ensure that the MAC
address is correctly set. The MAC address in the hardware should
match the MAC address from the QEMU command line. This is a recommended
configuration and will allow the system to boot.

Signed-off-by: Cindy Lu 
---
 hw/net/virtio-net.c | 40 +++-
 1 file changed, 39 insertions(+), 1 deletion(-)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 340c6b6422..94ee21d1fc 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -3751,12 +3751,43 @@ static bool failover_hide_primary_device(DeviceListener 
*listener,
 /* failover_primary_hidden is set during feature negotiation */
 return qatomic_read(&n->failover_primary_hidden);
 }
+static bool virtio_net_check_vdpa_mac(NetClientState *nc, VirtIONet *n,
+  MACAddr *cmdline_mac, Error **errp)
+{
+struct virtio_net_config hwcfg = {};
+static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
+
+vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&hwcfg, ETH_ALEN);
+
+/* For VDPA device following situations are acceptable: */
 
+if (memcmp(&hwcfg.mac, &zero, sizeof(MACAddr)) != 0) {
+/*
+ * 1. The hardware MAC address is the same as the QEMU command line MAC
+ *   address, and both of them are not 0.
+ */
+if ((memcmp(&hwcfg.mac, cmdline_mac, sizeof(MACAddr)) == 0)) {
+return true;
+}
+}
+error_setg(errp,
+   "vDPA device's MAC address %02x:%02x:%02x:%02x:%02x:%02x "
+   "is not the same as the QEMU command line MAC address "
+   "%02x:%02x:%02x:%02x:%02x:%02x,"
+   "Initialization failed.",
+   hwcfg.mac[0], hwcfg.mac[1], hwcfg.mac[2], hwcfg.mac[3],
+   hwcfg.mac[4], hwcfg.mac[5], cmdline_mac->a[0], 
cmdline_mac->a[1],
+   cmdline_mac->a[2], cmdline_mac->a[3], cmdline_mac->a[4],
+   cmdline_mac->a[5]);
+
+return false;
+}
 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
 {
 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
 VirtIONet *n = VIRTIO_NET(dev);
 NetClientState *nc;
+MACAddr macaddr_cmdline;
 int i;
 
 if (n->net_conf.mtu) {
@@ -3864,6 +3895,7 @@ static void virtio_net_device_realize(DeviceState *dev, 
Error **errp)
 virtio_net_add_queue(n, 0);
 
 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
+memcpy(&macaddr_cmdline, &n->nic_conf.macaddr, sizeof(n->mac));
 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
 n->status = VIRTIO_NET_S_LINK_UP;
@@ -3910,7 +3942,13 @@ static void virtio_net_device_realize(DeviceState *dev, 
Error **errp)
 nc = qemu_get_queue(n->nic);
 nc->rxfilter_notify_enabled = 1;
 
-   if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
+if (nc->peer && (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA)) {
+if (nc->peer->check_mac) {
+if (!virtio_net_check_vdpa_mac(nc, n, &macaddr_cmdline, errp)) {
+virtio_cleanup(vdev);
+return;
+}
+}
 struct virtio_net_config netcfg = {};
 memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
 vhost_net_set_config(get_vhost_net(nc->peer),
-- 
2.45.0




Re: [PATCH 00/16] Add Multi-Core Debug (MCD) API support

2025-04-08 Thread Markus Armbruster
Mario Fleischmann  writes:

> Apologies for the line wrapping in yesterday's answer. Should be fixed now.
>
> On 08.04.2025 09:00, Markus Armbruster wrote:
>> Mario Fleischmann  writes:
>> 
>>> Thanks a lot for the response, I really appreciate your time.
>>>
>>> On 07.04.2025 14:33, Markus Armbruster wrote:
>>>
 Mario Fleischmann  writes:

> This patch series introduces support for the Multi-Core Debug (MCD) API, a
> commonly used debug interface by emulators. The MCD API, defined through a
> header file, consists of 54 functions for implementing debug and trace.
> However, since it is a header-file-only interface, MCD does not specify a
> communication protocol. We get around this limitation by following a 
> remote
> procedure call approach using QMP. The client stub corresponding to this
> implementation can be found at https://gitlab.com/lauterbach/mcdrefsrv
>
> This series is the successor to:
> "[PATCH v5 00/18] first version of mcdstub"
> (https://patchew.org/QEMU/20231220162555.19545-1-nicolas.e...@lauterbach.com/)
>
> * Architecture-independent MCD implementation
> * QMP instead of custom TCP protocol

 Rationale?  There must be pros and cons.
>>>
>>> Assuming you're referring to the protocol of the previous patch series:
>>> The previous TCP protocol only supported a subset of MCD. As the 
>>> implementation progresses, the protocol eventually needs to be extended, 
>>> possibly resulting in backwards compatibility problems.
>>> Following an RPC approach and keeping the communication layer as close 
>>> to the MCD API as possible results in a larger protocol at first, but 
>>> does not need to be changed afterwards.
>>> By directly mapping MCD functions onto QMP commands, the complexity in 
>>> the server and client stubs can be minimized.
>>>
>>> Assuming you're referring to the QMP choice:
>>> QMP is being described as the "protocol which allows applications to 
>>> control a QEMU instance".
>>> It provides a RPC framework which automatically (de)serializes methods 
>>> and their parameters, even inside QTests.
>>> The whole interface is automatically documented.
>> 
>> Let's see whether I understand.
>> 
>> MCD is an established C interface.
>> 
>> Your goal is to provide remote MCD for QEMU, i.e. the client uses the
>> MCD C interface, and the interface's implementation talks to an MCD
>> server integrated into QEMU via some remote transport.
>> 
>> The previous version connects the two with a bespoke protocol via TCP.
>> The client software translates between the C interface and this
>> protocol.  QEMU implements the protocol's server side.  Designing and
>> maintaining a protocol is expensive.
>> 
>> This versions makes two changes:
>> 
>> 1. Instead of layering a protocol on top of MCD, you use MCD directly.
>> This eliminates protocol design and maintenance.  Moreover, translation
>> becomes straightforward marshaling / unmarshaling for the transport.
>> 
>> 2. You use QMP as a transport.  This gets you marshaling / unmarshaling
>> for free.  It also provides some useful infrastructure for tests,
>> documentation and such.
>> 
>> Fair?
>
> Couldn't have put it better myself.
>
 How much data would you expect to flow in practical usage?  QMP isn't
 designed for bulk transfer...
>>>
>>> According to ifstat, the expected data rate in practical usage is around
>>>
>>> KB/s in  KB/s out
>>> 100  100
>>>
>>> I fully understand your concern and agree that a JSON-based
>>> protocol does not result in the lowest data rate.
>>>
>>> If the data rate is the highest priority: *Before* the QMP supported was 
>>> implemented, the MCD interface was built on a custom RPC framework, 
>>> generated with the code generator at:
>>>
>>> https://gitlab.com/lauterbach/mcdrefsrv/-/tree/main/codegen
>>>
>>> The resulting header file was basically a set of functions capable of 
>>> serializing MCD's function arguments into a byte stream and vice-versa:
>>>
>>> https://gitlab.com/lauterbach/mcdrefsrv/-/blob/df754cef7f19ece2d00b6ce4e307ba37e91e5dcb/include/mcd_rpc.h
>>>
>>> The QMP support was added because of the advantages listed above and in 
>>> order to evade yet another custom communication protocol.
>>> As a user of the MCD interface, I haven't noticed any negative impact of 
>>> the increased data rate in realistic debugging scenarios, even when 
>>> trying to drive the data rate up. If that would have been the case, I 
>>> would have sent this patch request with our custom RPC protocol.
>> 
>> I see.
>> 
> qemu-system- [options] -qmp tcp::1235,server=on,wait=off
>
> * Architecture-independent QTest test suite
>
> V=1 QTEST_QEMU_BINARY="./qemu-system- [options]" 
> tests/qtest/mcd-test
>
> * Architecture-specific tests can be found at the client stub

 [...]

>   qapi/mcd.json | 2366 ++

 This is *massive*.  By non-blank, non-comme

Re: [PATCH v8 0/6] Specifying cache topology on ARM

2025-04-08 Thread Alireza Sanaee via
On Mon, 7 Apr 2025 19:24:16 +0200
Eric Auger  wrote:

> Hi Ali,
> 
> On 3/10/25 5:23 PM, Alireza Sanaee via wrote:
> > Specifying the cache layout in virtual machines is useful for
> > applications and operating systems to fetch accurate information
> > about the cache structure and make appropriate adjustments.
> > Enforcing correct sharing information can lead to better
> > optimizations. This patch enables the specification of cache layout
> > through a command line parameter, building on a patch set by Intel
> > [1,2,3]. It uses this set as a  
> some dependencies were merged. The series does not apply anymore.
This has already picked up, thanks for pointing out. Will remove in the
next version.
> > foundation.  The device tree and ACPI/PPTT table, and device tree
> > are populated based on user-provided information and CPU topology.  
> this last sentence need some rewording.
Sure, gonna fix.
> >
> > Example:
> >
> >
> > ++++
> > |Socket 0||Socket 1|
> > |(L3 Cache)  ||(L3 Cache)  |
> > ++---+++---+
> >  | |
> > ++++++
> > |   Cluster 0 ||   Cluster 0 |
> > |   (L2 Cache)||   (L2 Cache)|
> > ++++++
> >  | |
> > +++  ++++++
> > +++ |   Core 0 | |   Core 1||
> > Core 0|  |   Core 1| |   (L1i, L1d) | |   (L1i,
> > L1d)||   (L1i, L1d)|  |   (L1i, L1d)|
> > +++  ++++++
> > +++ |   |   |
> > | ++  ++
> > ++  ++ |Thread 0|  |Thread 1|
> >|Thread 1|  |Thread 0| ++
> > ++  ++  ++ |Thread 1|
> >|Thread 0|  |Thread 0|  |Thread 1|
> > ++  ++  ++
> > ++
> >
> >
> > The following command will represent the system relying on **ACPI
> > PPTT tables**.
> >
> > ./qemu-system-aarch64 \
> >  -machine
> > virt,smp-cache.0.cache=l1i,smp-cache.0.topology=core,smp-cache.1.cache=l1d,smp-cache.1.topology=core,smp-cache.2.cache=l2,smp-cache.2.topology=cluseter,smp-
> >  
> s/cluseter/cluster
> > cache.3.cache=l3,smp-cache.3.topology=socket \
> >  -cpu max \
> >  -m 2048 \
> >  -smp sockets=2,clusters=1,cores=2,threads=2 \
> >  -kernel ./Image.gz \
> >  -append "console=ttyAMA0 root=/dev/ram rdinit=/init acpi=force" \
> >  -initrd rootfs.cpio.gz \
> >  -bios ./edk2-aarch64-code.fd \
> >  -nographic
> >
> > The following command will represent the system relying on **the
> > device tree**.
> >
> > ./qemu-system-aarch64 \
> >  -machine
> > virt,smp-cache.0.cache=l1i,smp-cache.0.topology=core,smp-cache.1.cache=l1d,smp-cache.1.topology=core,smp-cache.2.cache=l2,smp-cache.2.topology=cluseter,smp-cache.3.cache=l3,smp-cache.3.topology=socket
> > \ -cpu max \ -m 2048 \
> >  -smp sockets=2,clusters=1,cores=2,threads=2 \
> >  -kernel ./Image.gz \
> >  -append "console=ttyAMA0 root=/dev/ram rdinit=/init acpi=off" \
> >  -initrd rootfs.cpio.gz \
> >  -nographic
> >
> > Failure cases:
> > 1) There are scenarios where caches exist in systems' registers
> > but left unspecified by users. In this case qemu returns failure.  
> Can you give more details on 1)? is it a TCG case or does it also
> exist with KVM acceleration?
Yes, imagine your CLIDR register describes L1, L2, and L3. Then in the
interface we only describe L1, and L2 and leave L3. Situation for L3 
right now is undefined, in a sense that what is the sharing situation?
This is failure.

I think this is applicable to both TCG and KVM, because, the miss-match
might come from real systems' registers or emulated ones. User might
not exactly know the supported caches and misconfigure.

I spend some time thinking about a default behavior but then that
again will be unclear to users, and decided it is better if users
decide what EXACTLY should happen in an informed fashion.
> >
> > 2) SMT threads cannot share caches which is not very common.
> > More discussions here [4].
> >
> > Currently only three levels of caches are supported to be specified
> > from the command line. However, increasing the value does not
> > require significant changes. Further, this patch assumes l2 and l3
> > unified caches and does not allow l(2/3)(i/d). The level
> > terminology is thread/core/cluster/socket right now. Hierarchy
> > assumed in this patch: Socket level = Cluster level + 1

Re: [PATCH v8 1/6] target/arm/tcg: increase cache level for cpu=max

2025-04-08 Thread Alireza Sanaee via
On Mon, 7 Apr 2025 19:07:29 +0200
Eric Auger  wrote:

Hi Eric,

> Hi Ali,
> 
> On 3/10/25 5:23 PM, Alireza Sanaee via wrote:
> > This patch addresses cache description in the
> > `aarch64_max_tcg_initfn` function for cpu=max. It introduces three
> > layers of caches and modifies the cache description registers
> > accordingly.
> >
> > Signed-off-by: Alireza Sanaee 
> > Reviewed-by: Jonathan Cameron 
> > ---
> >  target/arm/tcg/cpu64.c | 13 +
> >  1 file changed, 13 insertions(+)
> >
> > diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c
> > index 29ab0ac79da9..1405506594c2 100644
> > --- a/target/arm/tcg/cpu64.c
> > +++ b/target/arm/tcg/cpu64.c
> > @@ -1086,6 +1086,19 @@ void aarch64_max_tcg_initfn(Object *obj)
> >  uint64_t t;
> >  uint32_t u;
> >  
> > +/*
> > + * Expanded cache set
> > + */
> > +cpu->clidr = 0x8200123; /* 4 4 3 in 3 bit fields */  
>  /* 4 4 3 in 3 bit fields */ was not obvious to me at the first
> reading. I guess it means unified for L2 and L3 (0x4) and separate
> I&D for L1
That is correct. It might be a good idea to update this comment.
> > +/* 64KB L1 dcache */
> > +cpu->ccsidr[0] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 4, 64, 64 *
> > KiB, 7);
> > +/* 64KB L1 icache */
> > +cpu->ccsidr[1] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 4, 64, 64 *
> > KiB, 2);
> > +/* 1MB L2 unified cache */
> > +cpu->ccsidr[2] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 8, 64, 1 *
> > MiB, 7);
> > +/* 2MB L3 unified cache */
> > +cpu->ccsidr[4] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 8, 64, 2 *
> > MiB, 7);
> > +  
> Out of curiosity how did you come up with those values? Is it an
> arvbitrary choice?
The numbers are arbitrary. It was important to just have at least three
layers so I can experiment with L3 cache level, as currently we got
three layers in the interface.

Thanks,
Alireza
> 
> Thanks
> 
> Eric
> >  /*
> >   * Unset ARM_FEATURE_BACKCOMPAT_CNTFRQ, which we would
> > otherwise default
> >   * to because we started with aarch64_a57_initfn(). A 'max'
> > CPU might  
> 
> 




[PATCH v6 0/4] virtio_net: Add the check for vdpa's mac address

2025-04-08 Thread Cindy Lu
When using a VDPA device, it is important to ensure that the MAC address
is correctly set. In this patch series, we add a new parameter to
enable this check.
Only three MAC setup configurations are acceptable; any other will
fail to boot.

The usage is:

-netdev 
type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-0,id=vhost-vdpa0,check-mac=true\
-device virtio-net-pci,netdev=vhost-vdpa0\


tested by ConnectX-6 Dx/vdpa_sim device

change in v3
1. add a new parameter to enable the check and keep the old behavior
2. adjust the comment and make it more clear

change in v4
1. change the new parameter's name to check-mac
2. change the comment and make it more clear

change in v5
1.These patches haven't been merged for a while, so I rebased
  them with the latest code and resubmitted

change in v6
1. Address comments
2. Rebase with the latest QEMU

Cindy Lu (4):
  vhost_vdpa : Add a new parameter to enable check mac address
  virtio_net: Add the check for vdpa's mac address
  virtio_net: Add second acceptable configuration for MAC setup
  virtio_net: Add third acceptable configuration for MAC setup.

 hw/net/virtio-net.c | 66 -
 include/net/net.h   |  1 +
 net/vhost-vdpa.c|  4 +++
 qapi/net.json   |  5 
 4 files changed, 75 insertions(+), 1 deletion(-)

-- 
2.45.0




Re: [PATCH v8 5/6] tests/qtest/bios-table-test: testing new ARM ACPI PPTT topology

2025-04-08 Thread Alireza Sanaee via
On Mon, 7 Apr 2025 19:30:47 +0200
Eric Auger  wrote:

> Hi,
> 
> On 3/10/25 5:23 PM, Alireza Sanaee via wrote:
> > Test new PPTT topolopy with cache representation.
> >
> > Signed-off-by: Alireza Sanaee 
> > Reviewed-by: Jonathan Cameron 
> > ---
> >  tests/qtest/bios-tables-test.c | 4 
> >  1 file changed, 4 insertions(+)
> >
> > diff --git a/tests/qtest/bios-tables-test.c
> > b/tests/qtest/bios-tables-test.c index 0a333ec43536..6bdc25f4df1e
> > 100644 --- a/tests/qtest/bios-tables-test.c
> > +++ b/tests/qtest/bios-tables-test.c
> > @@ -2142,6 +2142,10 @@ static void
> > test_acpi_aarch64_virt_tcg_topology(void) };
> >  
> >  test_acpi_one("-cpu cortex-a57 "
> > +  "-M
> > virt,smp-cache.0.cache=l1i,smp-cache.0.topology=cluster,"
> > +
> > "smp-cache.1.cache=l1d,smp-cache.1.topology=cluster,"
> > +
> > "smp-cache.2.cache=l2,smp-cache.2.topology=cluster,"
> > +
> > "smp-cache.3.cache=l3,smp-cache.3.topology=cluster "  
> In the coverletter you used different topologies: core, cluster,
> socket. Don't you want to test the same config here?

The reason I used this one is because it is a weird setup (everything
shared at Cluster) and if this one works, I can sorta say the rest of
the scenarios work fine too. I get that this differs from the cover
letter setup.

I could also add a single test for all combinations of caches. 
They are quite a few. But then I would be happy to do if I should.
> 
> Is it better to reuse the existing ".topology" variant or to create
> another one? (I have no strong opinion though)
I am not sure how to do variants and I just replaced the existing one, 
but happy to do either way.
> 
> Thanks
> 
> Eric
> >"-smp sockets=1,clusters=2,cores=2,threads=2",
> > &data); free_test_data(&data);
> >  }  
> 
> 




Re: [PATCH v2 00/13] Introduce AST27x0 multi-SoC machine

2025-04-08 Thread Cédric Le Goater

On 4/8/25 08:35, Steven Lee wrote:

-Original Message-
From: Cédric Le Goater 
Sent: Wednesday, March 26, 2025 1:34 AM
To: Steven Lee ; Peter Maydell
; Troy Lee ; Jamin Lin
; Andrew Jeffery
; Joel Stanley ; open
list:ASPEED BMCs ; open list:All patches CC here

Cc: Troy Lee ; Yunlin Tang

Subject: Re: [PATCH v2 00/13] Introduce AST27x0 multi-SoC machine

Hello Steven,

On 3/13/25 06:40, Steven Lee wrote:

This patch series introduces full core support for the AST27x0 SoC, along with

necessary updates to the ASPEED AST27x0 SOC.

The AST27x0 SoC is a new family of ASPEED SoCs featuring 4 Cortex-A35

cores and 2 Cortex-M4 cores.


v1:
- Map unimplemented devices in SoC memory
- Intruduce AST2700 CM4 SoC
- Introduce AST27x0FC Machine

v2:
- Remove unused functions
- Correct hex notation for device addresses in AST27x0 SoC
- Add AST2700 SSP INTC and AST2700 TSP INTC
- Split AST27x0 CM4 SoC to AST27x0 SSP SoC and AST27x0 TSP SoC
- Add AST27x0 A0 SSP SoC and AST27x0 A1 SSP SoC
- Add AST27x0 A0 TSP SoC and AST27x0 A1 TSP SoC
- Add functional tests for AST2700FC A0 and AST2700FC A1
- Add Documentation for AST2700FC


Please try to label all patches as v2. See 'git format-patch -v ...'



Hi Cédric,

Thanks for the reminder. I will label the patches as v3 in the next submission.

Regards,
Steven


Steven Lee (13):
aspeed: ast27x0: Map unimplemented devices in SoC memory
aspeed: ast27x0: Correct hex notation for device addresses
hw/intc/aspeed: Add support for AST2700 SSP INTC
hw/intc/aspeed: Add support for AST2700 TSP INTC
hw/arm/aspeed_ast27x0-ssp: Introduce AST27x0 A0 SSP SoC
hw/arm/aspeed_ast27x0-ssp: Introduce AST27x0 A1 SSP SoC
hw/arm/aspeed_ast27x0-tsp: Introduce AST27x0 A0 TSP SoC
hw/arm/aspeed_ast27x0-tsp: Introduce AST27x0 A1 TSP SoC
hw/arm: Introduce ASPEED AST2700 a0 full core machine
hw/arm: Introduce ASPEED AST2700 a1 full core machine
tests/function/aspeed: Add functional test for AST2700FC
tests/function/aspeed: Add functional test for AST2700FC A1
docs: Add support for ast2700fc machine


Do we need to add support for the A0 and A1 ? Would A1 be enough ?



I will remove A0 in v3 the patch series.


Thanks, it all looked good to me. I should be able to queue the series
quickly for QEMU 10.1 before Jamin's HACE changes.

C.










   docs/system/arm/aspeed.rst |  61 ++-
   include/hw/arm/aspeed_soc.h|  32 ++
   include/hw/intc/aspeed_intc.h  |   5 +
   hw/arm/aspeed_ast27x0-fc.c | 340

+

   hw/arm/aspeed_ast27x0-ssp.c| 400

+++

   hw/arm/aspeed_ast27x0-tsp.c| 400

+++

   hw/arm/aspeed_ast27x0.c|  79 +++-
   hw/intc/aspeed_intc.c  | 424

+

   hw/arm/meson.build |   6 +-
   tests/functional/test_aarch64_ast2700fc.py | 161 
   10 files changed, 1884 insertions(+), 24 deletions(-)
   create mode 100644 hw/arm/aspeed_ast27x0-fc.c
   create mode 100644 hw/arm/aspeed_ast27x0-ssp.c
   create mode 100644 hw/arm/aspeed_ast27x0-tsp.c
   create mode 100755 tests/functional/test_aarch64_ast2700fc.py








[PATCH v6 3/4] virtio_net: Add second acceptable configuration for MAC setup

2025-04-08 Thread Cindy Lu
For VDPA devices, Allow configurations where the hardware MAC address
is non-zero while the MAC address in the QEMU command line is zero.

Signed-off-by: Cindy Lu 
---
 hw/net/virtio-net.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 94ee21d1fc..45b63eb9de 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -3769,6 +3769,20 @@ static bool virtio_net_check_vdpa_mac(NetClientState 
*nc, VirtIONet *n,
 if ((memcmp(&hwcfg.mac, cmdline_mac, sizeof(MACAddr)) == 0)) {
 return true;
 }
+/*
+ * 2. The hardware MAC address is NOT 0 and the MAC address in
+ *  the QEMU command line is 0.
+ *  In this situation, we use the hardware MAC address overwrite
+ *  the QEMU command line address saved in VirtIONet->mac[0].
+ *  In the following process, QEMU will use this MAC address
+ *  in VirtIONet to complete the initialization.
+ */
+if (memcmp(cmdline_mac, &zero, sizeof(MACAddr)) == 0) {
+/* overwrite the mac address with hardware address */
+memcpy(&n->mac[0], &hwcfg.mac, sizeof(n->mac));
+memcpy(&n->nic_conf.macaddr, &hwcfg.mac, sizeof(n->mac));
+return true;
+}
 }
 error_setg(errp,
"vDPA device's MAC address %02x:%02x:%02x:%02x:%02x:%02x "
-- 
2.45.0




Re: [PATCH v2 1/2] vfio/spapr: Enhance error handling in vfio_spapr_create_window()

2025-04-08 Thread Amit Machhiwal
Hi Cédric,

Thanks for taking a look at this patch. Please find my responses below:

On 2025/04/08 08:29 AM, Cédric Le Goater wrote:
> Hello Amit,
> 
> Please use --cover-letter for the next spin.

Sure, will do.

> 
> 
> On 4/7/25 16:31, Amit Machhiwal wrote:
> > Introduce an Error ** parameter to vfio_spapr_create_window() to enable
> > structured error reporting. This allows the function to propagate
> > detailed errors back to callers.
> > 
> > Suggested-by: Cédric Le Goater 
> > Signed-off-by: Amit Machhiwal 
> > ---
> >   hw/vfio/spapr.c | 23 ---
> >   1 file changed, 12 insertions(+), 11 deletions(-)
> > 
> > diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c
> > index 1a5d1611f2cd..4f2858b43f36 100644
> > --- a/hw/vfio/spapr.c
> > +++ b/hw/vfio/spapr.c
> > @@ -232,7 +232,7 @@ static int vfio_spapr_remove_window(VFIOContainer 
> > *container,
> >   static int vfio_spapr_create_window(VFIOContainer *container,
> 
> This routine can return a bool since vfio_spapr_container_add_section_window()
> does not check the returned errno.

Sure, I can make this change in next version.

> 
> >   MemoryRegionSection *section,
> > -hwaddr *pgsize)
> > +hwaddr *pgsize, Error **errp)
> >   {
> >   int ret = 0;
> >   VFIOContainerBase *bcontainer = &container->bcontainer;
> > @@ -252,10 +252,10 @@ static int vfio_spapr_create_window(VFIOContainer 
> > *container,
> >   pgmask = bcontainer->pgsizes & (pagesize | (pagesize - 1));
> >   pagesize = pgmask ? (1ULL << (63 - clz64(pgmask))) : 0;
> >   if (!pagesize) {
> > -error_report("Host doesn't support page size 0x%"PRIx64
> > - ", the supported mask is 0x%lx",
> > - memory_region_iommu_get_min_page_size(iommu_mr),
> > - bcontainer->pgsizes);
> > +error_setg(errp, "Host doesn't support page size 0x%"PRIx64
> > +   ", the supported mask is 0x%lx",
> > +   memory_region_iommu_get_min_page_size(iommu_mr),
> > +   bcontainer->pgsizes);
> 
> This can use error_setg_errno(errp, EINVAL, ... ) instead of
> returning -EINVAL.

Sure.

> 
> >   return -EINVAL;
> >   }
> > @@ -302,16 +302,16 @@ static int vfio_spapr_create_window(VFIOContainer 
> > *container,
> >   }
> >   }
> >   if (ret) {
> > -error_report("Failed to create a window, ret = %d (%m)", ret);
> > +error_setg_errno(errp, -ret, "Failed to create a window, ret = %d 
> > (%m)", ret);
> >   return -errno;
> >   }
> >   if (create.start_addr != section->offset_within_address_space) {
> >   vfio_spapr_remove_window(container, create.start_addr);
> > -error_report("Host doesn't support DMA window at %"HWADDR_PRIx", 
> > must be %"PRIx64,
> > - section->offset_within_address_space,
> > - (uint64_t)create.start_addr);
> > +error_setg(errp, "Host doesn't support DMA window at %"HWADDR_PRIx
> > +   ", must be %"PRIx64, 
> > section->offset_within_address_space,
> > +   (uint64_t)create.start_addr);
> 
> This can use error_setg_errno(errp, EINVAL, ... ) instead of
> returning -EINVAL.

Sure.

> 
> >   return -EINVAL;
> >   }
> >   trace_vfio_spapr_create_window(create.page_shift,
> > @@ -334,6 +334,7 @@ 
> > vfio_spapr_container_add_section_window(VFIOContainerBase *bcontainer,
> > container);
> >   VFIOHostDMAWindow *hostwin;
> >   hwaddr pgsize = 0;
> > +Error *local_err = NULL;
> >   int ret;> /*
> > @@ -377,9 +378,9 @@ 
> > vfio_spapr_container_add_section_window(VFIOContainerBase *bcontainer,
> >   }
> >   }
> > -ret = vfio_spapr_create_window(container, section, &pgsize);
> > +ret = vfio_spapr_create_window(container, section, &pgsize, 
> > &local_err);
> 
> please pass errp instead.
> 
> >   if (ret) {
> > -error_setg_errno(errp, -ret, "Failed to create SPAPR window");
> > +error_propagate(errp, local_err);
> 
> no need to propagate if errp is passed to vfio_spapr_create_window()

As per my understanding, for calling error_setg() and friends, the Error **
object has be NULL. If I were to call vfio_spapr_create_window() with errp
instead of the local Error object, that'd result into the below assertion
failure with only the first patch applied and a guest booted with a memory >
128G and PCI device passthrough:

 qemu-system-ppc64: ../util/error.c:68: error_setv: Assertion `*errp == NULL' 
failed.

This happens because the errp would already be set in vfio_spapr_create_window()
and calling error_setg_errno(errp, ...) in 
vfio_spapr_container_add_section_window()
would fail as errp is no more NULL. This is the reason I chose to use a local
Error object and later propagate it w

[PULL 07/11] docs/sphinx/qmp_lexer: Highlight elisions like comments, not prompts

2025-04-08 Thread Markus Armbruster
Signed-off-by: Markus Armbruster 
Message-ID: <20250404121413.1743790-8-arm...@redhat.com>
Reviewed-by: Eric Blake 
---
 docs/sphinx/qmp_lexer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/sphinx/qmp_lexer.py b/docs/sphinx/qmp_lexer.py
index 1bd1b81b70..7b3b808d12 100644
--- a/docs/sphinx/qmp_lexer.py
+++ b/docs/sphinx/qmp_lexer.py
@@ -24,7 +24,7 @@ class QMPExampleMarkersLexer(RegexLexer):
 'root': [
 (r'-> ', token.Generic.Prompt),
 (r'<- ', token.Generic.Prompt),
-(r'\.{3}( .* \.{3})?', token.Generic.Prompt),
+(r'\.{3}( .* \.{3})?', token.Comment.Multiline),
 ]
 }
 
-- 
2.48.1




[PULL 03/11] docs/interop: Delete "QEMU Guest Agent Protocol Reference" TOC

2025-04-08 Thread Markus Armbruster
The "QEMU Guest Agent Protocol Reference" starts with the following
table of contents:

Contents

* QEMU Guest Agent Protocol Reference

  * QEMU guest agent protocol commands and structs

This is useless.  Delete the entire TOC.

Signed-off-by: Markus Armbruster 
Message-ID: <20250404121413.1743790-4-arm...@redhat.com>
Reviewed-by: Eric Blake 
---
 docs/interop/qemu-ga-ref.rst | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/docs/interop/qemu-ga-ref.rst b/docs/interop/qemu-ga-ref.rst
index 19b5c7a549..25f6e24b03 100644
--- a/docs/interop/qemu-ga-ref.rst
+++ b/docs/interop/qemu-ga-ref.rst
@@ -1,9 +1,6 @@
 QEMU Guest Agent Protocol Reference
 ===
 
-.. contents::
-   :depth: 3
-
 .. qapi-doc:: qga/qapi-schema.json
:transmogrify:
:namespace: QGA
-- 
2.48.1




Re: [PATCH v2 1/2] vfio/spapr: Enhance error handling in vfio_spapr_create_window()

2025-04-08 Thread Cédric Le Goater

On 4/8/25 11:14, Amit Machhiwal wrote:

Hi Cédric,

Thanks for taking a look at this patch. Please find my responses below:

On 2025/04/08 08:29 AM, Cédric Le Goater wrote:

Hello Amit,

Please use --cover-letter for the next spin.


Sure, will do.




On 4/7/25 16:31, Amit Machhiwal wrote:

Introduce an Error ** parameter to vfio_spapr_create_window() to enable
structured error reporting. This allows the function to propagate
detailed errors back to callers.

Suggested-by: Cédric Le Goater 
Signed-off-by: Amit Machhiwal 
---
   hw/vfio/spapr.c | 23 ---
   1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c
index 1a5d1611f2cd..4f2858b43f36 100644
--- a/hw/vfio/spapr.c
+++ b/hw/vfio/spapr.c
@@ -232,7 +232,7 @@ static int vfio_spapr_remove_window(VFIOContainer 
*container,
   static int vfio_spapr_create_window(VFIOContainer *container,


This routine can return a bool since vfio_spapr_container_add_section_window()
does not check the returned errno.


Sure, I can make this change in next version.




   MemoryRegionSection *section,
-hwaddr *pgsize)
+hwaddr *pgsize, Error **errp)
   {
   int ret = 0;
   VFIOContainerBase *bcontainer = &container->bcontainer;
@@ -252,10 +252,10 @@ static int vfio_spapr_create_window(VFIOContainer 
*container,
   pgmask = bcontainer->pgsizes & (pagesize | (pagesize - 1));
   pagesize = pgmask ? (1ULL << (63 - clz64(pgmask))) : 0;
   if (!pagesize) {
-error_report("Host doesn't support page size 0x%"PRIx64
- ", the supported mask is 0x%lx",
- memory_region_iommu_get_min_page_size(iommu_mr),
- bcontainer->pgsizes);
+error_setg(errp, "Host doesn't support page size 0x%"PRIx64
+   ", the supported mask is 0x%lx",
+   memory_region_iommu_get_min_page_size(iommu_mr),
+   bcontainer->pgsizes);


This can use error_setg_errno(errp, EINVAL, ... ) instead of
returning -EINVAL.


Sure.




   return -EINVAL;
   }
@@ -302,16 +302,16 @@ static int vfio_spapr_create_window(VFIOContainer 
*container,
   }
   }
   if (ret) {
-error_report("Failed to create a window, ret = %d (%m)", ret);
+error_setg_errno(errp, -ret, "Failed to create a window, ret = %d 
(%m)", ret);
   return -errno;
   }
   if (create.start_addr != section->offset_within_address_space) {
   vfio_spapr_remove_window(container, create.start_addr);
-error_report("Host doesn't support DMA window at %"HWADDR_PRIx", must be 
%"PRIx64,
- section->offset_within_address_space,
- (uint64_t)create.start_addr);
+error_setg(errp, "Host doesn't support DMA window at %"HWADDR_PRIx
+   ", must be %"PRIx64, section->offset_within_address_space,
+   (uint64_t)create.start_addr);


This can use error_setg_errno(errp, EINVAL, ... ) instead of
returning -EINVAL.


Sure.




   return -EINVAL;
   }
   trace_vfio_spapr_create_window(create.page_shift,
@@ -334,6 +334,7 @@ vfio_spapr_container_add_section_window(VFIOContainerBase 
*bcontainer,
 container);
   VFIOHostDMAWindow *hostwin;
   hwaddr pgsize = 0;
+Error *local_err = NULL;
   int ret;> /*
@@ -377,9 +378,9 @@ vfio_spapr_container_add_section_window(VFIOContainerBase 
*bcontainer,
   }
   }
-ret = vfio_spapr_create_window(container, section, &pgsize);
+ret = vfio_spapr_create_window(container, section, &pgsize, &local_err);


please pass errp instead.


   if (ret) {
-error_setg_errno(errp, -ret, "Failed to create SPAPR window");
+error_propagate(errp, local_err);


no need to propagate if errp is passed to vfio_spapr_create_window()


As per my understanding, for calling error_setg() and friends, the Error **
object has be NULL. If I were to call vfio_spapr_create_window() with errp
instead of the local Error object, that'd result into the below assertion
failure with only the first patch applied and a guest booted with a memory >
128G and PCI device passthrough:

  qemu-system-ppc64: ../util/error.c:68: error_setv: Assertion `*errp == NULL' 
failed.

This happens because the errp would already be set in vfio_spapr_create_window()
and calling error_setg_errno(errp, ...) in 
vfio_spapr_container_add_section_window()
would fail as errp is no more NULL. 


Yes but I don't understand how this can happen.

vfio_spapr_container_add_section_window() calls vfio_spapr_create_window()
and if, in each case of error, error_setg() is called and false returned,
it shouldn't reach the assert. In case of error, the caller *should not*
re-set the 'Error **' parameter, that would trigger the assert.


This is 

Re: [PATCH 2/3] add vnc h264 encoder

2025-04-08 Thread Dietmar Maurer
> Please resend the series with a cover letter
> (https://www.qemu.org/docs/master/devel/submitting-a-patch.html#use-git-format-patch)

Ok, just resend this series with a cover letter and commit message.
(patches unchanged)

> > +#include 
> > +
> > +static void libavcodec_destroy_encoder_context(VncState *vs)
> 
> it's not libavcodec.

I will fix that in v2.


> > +#ifdef CONFIG_GSTREAMER
> > +case VNC_ENCODING_H264:
> > +if (vnc_h264_encoder_init(vs) == 0) {
> > +vnc_set_feature(vs, VNC_FEATURE_H264);
> 
> Before advertising support for the codec, it should actually check if
> the encoder is present.

ok.

> It would also be useful to have an extra VNC
> option like H264=on/off/auto.

I thought it would be better to do that at the client?




[PULL 10/11] storage-daemon/qapi/qapi-schema: Add a proper introduction

2025-04-08 Thread Markus Armbruster
Contents adapted from qapi/qapi-schema.json.

Signed-off-by: Markus Armbruster 
Message-ID: <20250404121413.1743790-11-arm...@redhat.com>
Reviewed-by: Eric Blake 
---
 storage-daemon/qapi/qapi-schema.json | 22 +++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/storage-daemon/qapi/qapi-schema.json 
b/storage-daemon/qapi/qapi-schema.json
index 2a562ee32e..0427594c98 100644
--- a/storage-daemon/qapi/qapi-schema.json
+++ b/storage-daemon/qapi/qapi-schema.json
@@ -14,10 +14,26 @@
 # storage daemon.
 
 ##
-# = QEMU storage daemon protocol commands and structs
+# = Introduction
 #
-# For a concise listing of all commands, events, and types in the QEMU
-# storage daemon, please consult the `qapi-qsd-index`.
+# This manual describes the commands and events supported by the QEMU
+# storage daemon QMP.
+#
+# For locating a particular item, please see the `qapi-qsd-index`.
+#
+# The following notation is used in examples:
+#
+# .. qmp-example::
+#
+#   -> ... text sent by client (commands) ...
+#   <- ... text sent by server (command responses and events) ...
+#
+# Example text is formatted for readability.  However, in real
+# protocol usage, its commonly emitted as a single line.
+#
+# Please refer to the
+# :doc:`QEMU Machine Protocol Specification `
+# for the general format of commands, responses, and events.
 ##
 
 
-- 
2.48.1




[PULL 06/11] docs/sphinx/qmp_lexer: Generalize elision syntax

2025-04-08 Thread Markus Armbruster
Accept "... lorem ipsum ..."  in addition to "...".

Signed-off-by: Markus Armbruster 
Message-ID: <20250404121413.1743790-7-arm...@redhat.com>
Reviewed-by: Eric Blake 
---
 docs/devel/qapi-code-gen.rst| 6 --
 docs/sphinx/qmp_lexer.py| 2 +-
 tests/qapi-schema/doc-good.json | 2 +-
 tests/qapi-schema/doc-good.out  | 2 +-
 tests/qapi-schema/doc-good.txt  | 2 +-
 5 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/docs/devel/qapi-code-gen.rst b/docs/devel/qapi-code-gen.rst
index 25a46fafb6..231cc0fecf 100644
--- a/docs/devel/qapi-code-gen.rst
+++ b/docs/devel/qapi-code-gen.rst
@@ -1029,7 +1029,9 @@ used.
 QMP Examples can be added by using the ``.. qmp-example::`` directive.
 In its simplest form, this can be used to contain a single QMP code
 block which accepts standard JSON syntax with additional server
-directionality indicators (``->`` and ``<-``), and elisions (``...``).
+directionality indicators (``->`` and ``<-``), and elisions.  An
+elision is commonly ``...``, but it can also be or a pair of ``...``
+with text in between.
 
 Optionally, a plaintext title may be provided by using the ``:title:``
 directive option.  If the title is omitted, the example title will
@@ -1062,7 +1064,7 @@ For example::
   #   "device": "ide0-hd0",
   #   ...
   # }
-  # ...
+  # ... more ...
   #  ] }
   #
   #Above, lengthy output has been omitted for brevity.
diff --git a/docs/sphinx/qmp_lexer.py b/docs/sphinx/qmp_lexer.py
index a59de8a079..1bd1b81b70 100644
--- a/docs/sphinx/qmp_lexer.py
+++ b/docs/sphinx/qmp_lexer.py
@@ -24,7 +24,7 @@ class QMPExampleMarkersLexer(RegexLexer):
 'root': [
 (r'-> ', token.Generic.Prompt),
 (r'<- ', token.Generic.Prompt),
-(r' ?\.{3} ?', token.Generic.Prompt),
+(r'\.{3}( .* \.{3})?', token.Generic.Prompt),
 ]
 }
 
diff --git a/tests/qapi-schema/doc-good.json b/tests/qapi-schema/doc-good.json
index 0a4f139f83..14b808f909 100644
--- a/tests/qapi-schema/doc-good.json
+++ b/tests/qapi-schema/doc-good.json
@@ -212,7 +212,7 @@
 #
 #-> "this example"
 #
-#<- "has no title"
+#<- ... has no title ...
 ##
 { 'command': 'cmd-boxed', 'boxed': true,
   'data': 'Object',
diff --git a/tests/qapi-schema/doc-good.out b/tests/qapi-schema/doc-good.out
index 5773f1dd6d..dc8352eed4 100644
--- a/tests/qapi-schema/doc-good.out
+++ b/tests/qapi-schema/doc-good.out
@@ -217,7 +217,7 @@ another feature
 
-> "this example"
 
-   <- "has no title"
+   <- ... has no title ...
 doc symbol=EVT_BOXED
 body=
 
diff --git a/tests/qapi-schema/doc-good.txt b/tests/qapi-schema/doc-good.txt
index cb37db606a..17a1d56ef1 100644
--- a/tests/qapi-schema/doc-good.txt
+++ b/tests/qapi-schema/doc-good.txt
@@ -264,7 +264,7 @@ Example::
 
-> "this example"
 
-   <- "has no title"
+   <- ... has no title ...
 
 
 "EVT_BOXED" (Event)
-- 
2.48.1




[PULL 11/11] qga/qapi-schema: Add a proper introduction

2025-04-08 Thread Markus Armbruster
Contents adapted from qapi/qapi-schema.json.

Signed-off-by: Markus Armbruster 
Message-ID: <20250404121413.1743790-12-arm...@redhat.com>
Reviewed-by: Eric Blake 
---
 qga/qapi-schema.json | 20 +---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json
index 35ec0e7db3..5316bfacbf 100644
--- a/qga/qapi-schema.json
+++ b/qga/qapi-schema.json
@@ -2,10 +2,24 @@
 # vim: filetype=python
 
 ##
-# = QEMU guest agent protocol commands and structs
+# This manual describes the commands supported by the QEMU Guest
+# Agent Protocol.
 #
-# For a concise listing of all commands, events, and types in the QEMU
-# guest agent, please consult the `qapi-qga-index`.
+# For locating  a particular item, please see the `qapi-qga-index`.
+#
+# The following notation is used in examples:
+#
+# .. qmp-example::
+#
+#   -> ... text sent by client (commands) ...
+#   <- ... text sent by server (command responses and events) ...
+#
+# Example text is formatted for readability.  However, in real
+# protocol usage, its commonly emitted as a single line.
+#
+# Please refer to the
+# :doc:`QEMU Machine Protocol Specification `
+# for the general format of commands, responses, and events.
 ##
 
 { 'pragma': { 'doc-required': true } }
-- 
2.48.1




[PULL 01/11] docs/devel/qapi-code-gen: Tidy up whitespace

2025-04-08 Thread Markus Armbruster
Consistently use two spaces to separate sentences.

Put "::" on a line of its own when it's preceded by whitespace.

Signed-off-by: Markus Armbruster 
Message-ID: <20250404121413.1743790-2-arm...@redhat.com>
Reviewed-by: Eric Blake 
---
 docs/devel/qapi-code-gen.rst | 26 ++
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/docs/devel/qapi-code-gen.rst b/docs/devel/qapi-code-gen.rst
index f9cfe8721f..ad517349fc 100644
--- a/docs/devel/qapi-code-gen.rst
+++ b/docs/devel/qapi-code-gen.rst
@@ -763,8 +763,8 @@ Names beginning with ``x-`` used to signify "experimental". 
 This
 convention has been replaced by special feature "unstable".
 
 Pragmas ``command-name-exceptions`` and ``member-name-exceptions`` let
-you violate naming rules.  Use for new code is strongly discouraged. See
-`Pragma directives`_ for details.
+you violate naming rules.  Use for new code is strongly discouraged.
+See `Pragma directives`_ for details.
 
 
 Downstream extensions
@@ -1013,7 +1013,7 @@ like this::
 document the success and the error response, respectively.
 
 "Errors" sections should be formatted as an rST list, each entry
-detailing a relevant error condition. For example::
+detailing a relevant error condition.  For example::
 
  # Errors:
  # - If @device does not exist, DeviceNotFound
@@ -1026,13 +1026,13 @@ definition.
 QMP).  In other sections, the text is formatted, and rST markup can be
 used.
 
-QMP Examples can be added by using the ``.. qmp-example::``
-directive. In its simplest form, this can be used to contain a single
-QMP code block which accepts standard JSON syntax with additional server
+QMP Examples can be added by using the ``.. qmp-example::`` directive.
+In its simplest form, this can be used to contain a single QMP code
+block which accepts standard JSON syntax with additional server
 directionality indicators (``->`` and ``<-``), and elisions (``...``).
 
 Optionally, a plaintext title may be provided by using the ``:title:``
-directive option. If the title is omitted, the example title will
+directive option.  If the title is omitted, the example title will
 default to "Example:".
 
 A simple QMP example::
@@ -1043,10 +1043,10 @@ A simple QMP example::
   #-> { "execute": "query-block" }
   #<- { ... }
 
-More complex or multi-step examples where exposition is needed before or
-between QMP code blocks can be created by using the ``:annotated:``
-directive option. When using this option, nested QMP code blocks must be
-entered explicitly with rST's ``::`` syntax.
+More complex or multi-step examples where exposition is needed before
+or between QMP code blocks can be created by using the ``:annotated:``
+directive option.  When using this option, nested QMP code blocks must
+be entered explicitly with rST's ``::`` syntax.
 
 Highlighting in non-QMP languages can be accomplished by using the
 ``.. code-block:: lang`` directive, and non-highlighted text can be
@@ -1466,7 +1466,9 @@ As an example, we'll use the following schema, which 
describes a
 single complex user-defined type, along with command which takes a
 list of that type as a parameter, and returns a single element of that
 type.  The user is responsible for writing the implementation of
-qmp_my_command(); everything else is produced by the generator. ::
+qmp_my_command(); everything else is produced by the generator.
+
+::
 
 $ cat example-schema.json
 { 'struct': 'UserDefOne',
-- 
2.48.1




[PULL 04/11] docs/interop: Sanitize QMP reference manuals TOC

2025-04-08 Thread Markus Armbruster
The "QEMU QMP Reference Manual" and the "QEMU Storage Daemon QMP
Reference Manual" start with a table of contents that looks like this:

Contents

* Title of the manual
  * Title of first first-level section
* Title of its first second-level section
* Title of its second second-level section
...
  * Title of second first-level section
  ...

The first level is useless.  Drop it.

While there, delete the option that limits the TOC to depth 3.  Its
actual depth was 3 before the patch, and is now 2.

Signed-off-by: Markus Armbruster 
Message-ID: <20250404121413.1743790-5-arm...@redhat.com>
Reviewed-by: Eric Blake 
---
 docs/interop/qemu-qmp-ref.rst| 2 +-
 docs/interop/qemu-storage-daemon-qmp-ref.rst | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/interop/qemu-qmp-ref.rst b/docs/interop/qemu-qmp-ref.rst
index ef8792b53f..3bc1ca12b1 100644
--- a/docs/interop/qemu-qmp-ref.rst
+++ b/docs/interop/qemu-qmp-ref.rst
@@ -4,7 +4,7 @@ QEMU QMP Reference Manual
 =
 
 .. contents::
-   :depth: 3
+   :local:
 
 .. qapi-doc:: qapi/qapi-schema.json
:transmogrify:
diff --git a/docs/interop/qemu-storage-daemon-qmp-ref.rst 
b/docs/interop/qemu-storage-daemon-qmp-ref.rst
index d0228d63b8..dc7bde262a 100644
--- a/docs/interop/qemu-storage-daemon-qmp-ref.rst
+++ b/docs/interop/qemu-storage-daemon-qmp-ref.rst
@@ -2,7 +2,7 @@ QEMU Storage Daemon QMP Reference Manual
 
 
 .. contents::
-   :depth: 3
+   :local:
 
 .. qapi-doc:: storage-daemon/qapi/qapi-schema.json
:transmogrify:
-- 
2.48.1




[PULL 00/11] QAPI patches patches for 2025-04-08

2025-04-08 Thread Markus Armbruster
The following changes since commit dfaecc04c46d298e9ee81bd0ca96d8754f1c27ed:

  Merge tag 'pull-riscv-to-apply-20250407-1' of 
https://github.com/alistair23/qemu into staging (2025-04-07 09:18:33 -0400)

are available in the Git repository at:

  https://repo.or.cz/qemu/armbru.git tags/pull-qapi-2025-04-08

for you to fetch changes up to 8d41a7dfc2a8f21228b7f29314dd68ad0aa96d10:

  qga/qapi-schema: Add a proper introduction (2025-04-08 09:04:34 +0200)


QAPI patches patches for 2025-04-08


Markus Armbruster (11):
  docs/devel/qapi-code-gen: Tidy up whitespace
  qapi/rocker: Tidy up query-rocker-of-dpa-flows example
  docs/interop: Delete "QEMU Guest Agent Protocol Reference" TOC
  docs/interop: Sanitize QMP reference manuals TOC
  docs/devel/qapi-code-gen: Improve the part on qmp-example directive
  docs/sphinx/qmp_lexer: Generalize elision syntax
  docs/sphinx/qmp_lexer: Highlight elisions like comments, not prompts
  qapi/qapi-schema: Update introduction for example notation
  qapi/qapi-schema: Address the introduction's bit rot
  storage-daemon/qapi/qapi-schema: Add a proper introduction
  qga/qapi-schema: Add a proper introduction

 docs/devel/qapi-code-gen.rst | 53 
 docs/interop/qemu-ga-ref.rst |  3 --
 docs/interop/qemu-qmp-ref.rst|  2 +-
 docs/interop/qemu-storage-daemon-qmp-ref.rst |  2 +-
 docs/sphinx/qmp_lexer.py |  2 +-
 qapi/qapi-schema.json| 31 +---
 qapi/rocker.json |  2 +-
 qga/qapi-schema.json | 20 +--
 storage-daemon/qapi/qapi-schema.json | 22 ++--
 tests/qapi-schema/doc-good.json  |  2 +-
 tests/qapi-schema/doc-good.out   |  2 +-
 tests/qapi-schema/doc-good.txt   |  2 +-
 12 files changed, 83 insertions(+), 60 deletions(-)

-- 
2.48.1




[PULL 05/11] docs/devel/qapi-code-gen: Improve the part on qmp-example directive

2025-04-08 Thread Markus Armbruster
Signed-off-by: Markus Armbruster 
Message-ID: <20250404121413.1743790-6-arm...@redhat.com>
Reviewed-by: Eric Blake 
---
 docs/devel/qapi-code-gen.rst | 23 ++-
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/docs/devel/qapi-code-gen.rst b/docs/devel/qapi-code-gen.rst
index ad517349fc..25a46fafb6 100644
--- a/docs/devel/qapi-code-gen.rst
+++ b/docs/devel/qapi-code-gen.rst
@@ -1038,20 +1038,15 @@ default to "Example:".
 A simple QMP example::
 
   # .. qmp-example::
-  #:title: Using query-block
   #
-  #-> { "execute": "query-block" }
-  #<- { ... }
+  # -> { "execute": "query-name" }
+  # <- { "return": { "name": "Fred" } }
 
 More complex or multi-step examples where exposition is needed before
 or between QMP code blocks can be created by using the ``:annotated:``
 directive option.  When using this option, nested QMP code blocks must
 be entered explicitly with rST's ``::`` syntax.
 
-Highlighting in non-QMP languages can be accomplished by using the
-``.. code-block:: lang`` directive, and non-highlighted text can be
-achieved by omitting the language argument.
-
 For example::
 
   # .. qmp-example::
@@ -1061,11 +1056,21 @@ For example::
   #This is a more complex example that can use
   #``arbitrary rST syntax`` in its exposition::
   #
-  #  -> { "execute": "query-block" }
-  #  <- { ... }
+  # -> { "execute": "query-block" }
+  # <- { "return": [
+  # {
+  #   "device": "ide0-hd0",
+  #   ...
+  # }
+  # ...
+  #  ] }
   #
   #Above, lengthy output has been omitted for brevity.
 
+Highlighting in non-QMP languages can be accomplished by using the
+``.. code-block:: lang`` directive, and non-highlighted text can be
+achieved by omitting the language argument.
+
 
 Examples of complete definition documentation::
 
-- 
2.48.1




[PULL 08/11] qapi/qapi-schema: Update introduction for example notation

2025-04-08 Thread Markus Armbruster
The introduction explains example notation.  The series merged in
merge commit e6485190f77e (in 9.1) improved how they look in generated
docs, but neglected to update the introduction accordingly.  Do that
now.

Signed-off-by: Markus Armbruster 
Message-ID: <20250404121413.1743790-9-arm...@redhat.com>
Reviewed-by: Eric Blake 
---
 qapi/qapi-schema.json | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/qapi/qapi-schema.json b/qapi/qapi-schema.json
index c41c01eb2a..0d027d5017 100644
--- a/qapi/qapi-schema.json
+++ b/qapi/qapi-schema.json
@@ -26,10 +26,10 @@
 #
 # Example:
 #
-# ::
+# .. qmp-example::
 #
-#   -> data issued by the Client
-#   <- Server data response
+#   -> ... text sent by client (commands) ...
+#   <- ... text sent by server (command responses and events) ...
 #
 # Please refer to the
 # :doc:`QEMU Machine Protocol Specification `
-- 
2.48.1




[PATCH 2/3] add vnc h264 encoder

2025-04-08 Thread Dietmar Maurer
This patch implements H264 support for VNC. The RFB protocol
extension is defined in:

https://github.com/rfbproto/rfbproto/blob/master/rfbproto.rst#open-h-264-encoding

Currently the Gstreamer x264enc plugin (software encoder) is used
to encode the video stream.

The gstreamer pipe is:

appsrc -> videoconvert -> x264enc -> appsink

Note: videoconvert is required for RGBx to YUV420 conversion.

The code still use the VNC server framebuffer change detection,
and only encodes and sends video frames if there are changes.

Signed-off-by: Dietmar Maurer 
---
 ui/meson.build|   1 +
 ui/vnc-enc-h264.c | 269 ++
 ui/vnc-jobs.c |  49 ++---
 ui/vnc.c  |  21 
 ui/vnc.h  |  21 
 5 files changed, 346 insertions(+), 15 deletions(-)
 create mode 100644 ui/vnc-enc-h264.c

diff --git a/ui/meson.build b/ui/meson.build
index 35fb04cadf..34f1f33699 100644
--- a/ui/meson.build
+++ b/ui/meson.build
@@ -46,6 +46,7 @@ vnc_ss.add(files(
 ))
 vnc_ss.add(zlib, jpeg)
 vnc_ss.add(when: sasl, if_true: files('vnc-auth-sasl.c'))
+vnc_ss.add(when: gstreamer, if_true: files('vnc-enc-h264.c'))
 system_ss.add_all(when: [vnc, pixman], if_true: vnc_ss)
 system_ss.add(when: vnc, if_false: files('vnc-stubs.c'))
 
diff --git a/ui/vnc-enc-h264.c b/ui/vnc-enc-h264.c
new file mode 100644
index 00..ca8e206335
--- /dev/null
+++ b/ui/vnc-enc-h264.c
@@ -0,0 +1,269 @@
+#include "qemu/osdep.h"
+#include "vnc.h"
+
+#include 
+
+static void libavcodec_destroy_encoder_context(VncState *vs)
+{
+if (!vs->h264) {
+return;
+}
+
+if (vs->h264->source) {
+gst_object_unref(vs->h264->source);
+vs->h264->source = NULL;
+}
+
+if (vs->h264->convert) {
+gst_object_unref(vs->h264->convert);
+vs->h264->convert = NULL;
+}
+
+if (vs->h264->gst_encoder) {
+gst_object_unref(vs->h264->gst_encoder);
+vs->h264->sink = NULL;
+}
+
+if (vs->h264->sink) {
+gst_object_unref(vs->h264->sink);
+vs->h264->sink = NULL;
+}
+
+if (vs->h264->pipeline) {
+gst_object_unref(vs->h264->pipeline);
+vs->h264->pipeline = NULL;
+}
+}
+
+static bool libavcodec_create_encoder_context(VncState *vs, int w, int h)
+{
+g_assert(vs->h264 != NULL);
+
+if (vs->h264->sink) {
+if (w != vs->h264->width || h != vs->h264->height) {
+libavcodec_destroy_encoder_context(vs);
+}
+}
+
+if (vs->h264->sink) {
+return TRUE;
+}
+
+vs->h264->width = w;
+vs->h264->height = h;
+
+vs->h264->source = gst_element_factory_make("appsrc", "source");
+if (!vs->h264->source) {
+VNC_DEBUG("Could not create gst source\n");
+libavcodec_destroy_encoder_context(vs);
+return FALSE;
+}
+
+vs->h264->convert = gst_element_factory_make("videoconvert", "convert");
+if (!vs->h264->convert) {
+VNC_DEBUG("Could not create gst convert element\n");
+libavcodec_destroy_encoder_context(vs);
+return FALSE;
+}
+
+vs->h264->gst_encoder = gst_element_factory_make("x264enc", "gst-encoder");
+if (!vs->h264->gst_encoder) {
+VNC_DEBUG("Could not create gst x264 encoder\n");
+libavcodec_destroy_encoder_context(vs);
+return FALSE;
+}
+
+g_object_set(vs->h264->gst_encoder, "tune", 4, NULL); /* zerolatency */
+/* fix for zerolatency with novnc (without, noVNC displays green stripes) 
*/
+g_object_set(vs->h264->gst_encoder, "threads", 1, NULL);
+
+g_object_set(vs->h264->gst_encoder, "pass", 5, NULL); /* Constant Quality 
*/
+g_object_set(vs->h264->gst_encoder, "quantizer", 26, NULL);
+
+/* avoid access unit delimiters (Nal Unit Type 9) - not required */
+g_object_set(vs->h264->gst_encoder, "aud", false, NULL);
+
+vs->h264->sink = gst_element_factory_make("appsink", "sink");
+if (!vs->h264->sink) {
+VNC_DEBUG("Could not create gst sink\n");
+libavcodec_destroy_encoder_context(vs);
+return FALSE;
+}
+
+vs->h264->pipeline = gst_pipeline_new("vnc-h264-pipeline");
+if (!vs->h264->pipeline) {
+VNC_DEBUG("Could not create gst pipeline\n");
+libavcodec_destroy_encoder_context(vs);
+return FALSE;
+}
+
+gst_object_ref(vs->h264->source);
+if (!gst_bin_add(GST_BIN(vs->h264->pipeline), vs->h264->source)) {
+gst_object_unref(vs->h264->source);
+VNC_DEBUG("Could not add source to gst pipeline\n");
+libavcodec_destroy_encoder_context(vs);
+return FALSE;
+}
+
+gst_object_ref(vs->h264->convert);
+if (!gst_bin_add(GST_BIN(vs->h264->pipeline), vs->h264->convert)) {
+gst_object_unref(vs->h264->convert);
+VNC_DEBUG("Could not add convert to gst pipeline\n");
+libavcodec_destroy_encoder_context(vs);
+return FALSE;
+}
+
+gst_object_ref(vs->h264->gst_encoder);
+if (!gst_bin_add(GST_BIN(vs->h264->pipeline), vs

Re: [PATCH v3 03/12] qcow2: put discard requests in the common queue when discard-no-unref enabled

2025-04-08 Thread Jean-Louis Dupond

Hi,

I hope this patchset can get merged soon, as it contains some good 
improvements.


Next to that, the change below only improves the performance on 
discards? It's not that something is broken/can cause issues in the 
current code?
Otherwise it might be a good idea to have this one merged as soon as 
possible.


Thanks for the work on this!

Jean-Louis

On 9/13/24 18:39, Andrey Drobyshev wrote:

Normally discard requests are stored in the queue attached to BDRVQcow2State
to be processed later at once.  Currently discard-no-unref option handling
causes these requests to be processed straight away.  Let's fix that.

Note that when doing regular discards qcow2_free_any_cluster() would check
for the presence of external data files for us and redirect request to
underlying data_file.  Here we want to do the same but avoid refcount updates,
thus we perform the same checks.

Suggested-by: Hanna Czenczek 
Signed-off-by: Andrey Drobyshev 
Reviewed-by: Alexander Ivanov 
---
  block/qcow2-cluster.c | 39 +--
  1 file changed, 29 insertions(+), 10 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 5f057ba2fd..7dff0bd5a1 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1893,6 +1893,28 @@ again:
  return 0;
  }
  
+/*

+ * Helper for adding a discard request to the queue without any refcount
+ * modifications.  If external data file is used redirects the request to
+ * the corresponding BdrvChild.
+ */
+static inline void
+discard_no_unref_any_file(BlockDriverState *bs, uint64_t offset,
+  uint64_t length, QCow2ClusterType ctype,
+  enum qcow2_discard_type dtype)
+{
+BDRVQcow2State *s = bs->opaque;
+
+if (s->discard_passthrough[dtype] &&
+(ctype == QCOW2_CLUSTER_NORMAL || ctype == QCOW2_CLUSTER_ZERO_ALLOC)) {
+if (has_data_file(bs)) {
+bdrv_pdiscard(s->data_file, offset, length);
+} else {
+qcow2_queue_discard(bs, offset, length);
+}
+}
+}
+
  /*
   * This discards as many clusters of nb_clusters as possible at once (i.e.
   * all clusters in the same L2 slice) and returns the number of discarded
@@ -1974,12 +1996,10 @@ discard_in_l2_slice(BlockDriverState *bs, uint64_t 
offset, uint64_t nb_clusters,
  if (!keep_reference) {
  /* Then decrease the refcount */
  qcow2_free_any_cluster(bs, old_l2_entry, type);
-} else if (s->discard_passthrough[type] &&
-   (cluster_type == QCOW2_CLUSTER_NORMAL ||
-cluster_type == QCOW2_CLUSTER_ZERO_ALLOC)) {
+} else {
  /* If we keep the reference, pass on the discard still */
-bdrv_pdiscard(s->data_file, old_l2_entry & L2E_OFFSET_MASK,
-  s->cluster_size);
+discard_no_unref_any_file(bs, old_l2_entry & L2E_OFFSET_MASK,
+  s->cluster_size, cluster_type, type);
  }
  }
  
@@ -2088,12 +2108,11 @@ zero_in_l2_slice(BlockDriverState *bs, uint64_t offset,

  if (!keep_reference) {
  /* Then decrease the refcount */
  qcow2_free_any_cluster(bs, old_l2_entry, 
QCOW2_DISCARD_REQUEST);
-} else if (s->discard_passthrough[QCOW2_DISCARD_REQUEST] &&
-   (type == QCOW2_CLUSTER_NORMAL ||
-type == QCOW2_CLUSTER_ZERO_ALLOC)) {
+} else {
  /* If we keep the reference, pass on the discard still */
-bdrv_pdiscard(s->data_file, old_l2_entry & L2E_OFFSET_MASK,
-s->cluster_size);
+discard_no_unref_any_file(bs, old_l2_entry & L2E_OFFSET_MASK,
+  s->cluster_size, type,
+  QCOW2_DISCARD_REQUEST);
  }
  }
  }




Re: [PULL 00/11] QAPI patches patches for 2025-04-08

2025-04-08 Thread Markus Armbruster
Here's my argument for taking this into 10.0.

The changes can be divided into four parts:

1. Address deficiencies right at the beginning of the three QMP manuals:

   * "QEMU QMP Reference Manual" section "Introduction" suffers from bit
 rot: much of the contents is useless or misleading.

   * The "QEMU Storage Daemon QMP Reference Manual" and the "QEMU Guest
 Agent Protocol Reference" lack a proper introduction.

   * "QEMU Guest Agent Protocol Reference" has a useless table of
 contents.

   * "QEMU QMP Reference Manual" and "QEMU Storage Daemon QMP Reference
 Manual" both have a needlessly ugly table of contents.

2. Render the example notation in these introductions properly.  This
   involves changing *one* line of code in docs/sphinx/qmp_lexer.py.
   This is the only code change, everything else is documentation.

3. Minor cleanup of rocker QMP documentation

4. Minor improvement of QAPI developer documentation




Re: [PATCH rfcv2 03/20] HostIOMMUDevice: Introduce realize_late callback

2025-04-08 Thread Cédric Le Goater

On 4/7/25 13:19, Cédric Le Goater wrote:

On 2/19/25 09:22, Zhenzhong Duan wrote:

Currently we have realize() callback which is called before attachment.
But there are still some elements e.g., hwpt_id is not ready before
attachment. So we need a realize_late() callback to further initialize
them.


The relation between objects HostIOMMUDevice and VFIOIOMMU is starting
to look too complex for me.

I think it makes sense to realize HostIOMMUDevice after the device
is attached. Can't we move :

     hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
     vbasedev->hiod = hiod;

under ->attach_device() and also the call :

     if (!vfio_device_hiod_realize(vbasedev, errp)) {

later in the ->attach_device() patch ?

hiod_legacy_vfio_realize() doesn't do much. We might need to rework
hiod_iommufd_vfio_realize() which queries the iommufd hw caps, later
used by intel-iommu.


The only dependency I see on the IOMMUFD HostIOMMUDevice when attaching
the device to the container is in iommufd_cdev_autodomains_get(). The
flags for IOMMU_HWPT_ALLOC depends on the HW capability of the IOMMFD
backend and we rely on hiod_iommufd_vfio_realize() to have done the
query on the iommufd kernel device before.

Since this is not a hot path, I don't think it is a problem to add
a redundant call to iommufd_backend_get_device_info() in
iommufd_cdev_autodomains_get() and avoid the IOMMUFD HostIOMMUDevice
dependency. With that we can move the HostIOMMUDevice creation and
realize sequence at the end of the device attach sequence.

I think this makes the code cleaner when it comes to using the
vbasedev->hiod pointer too.


Anyway, it is good time to cleanup our interfaces before adding more.


On that topic, I think

   iommufd_cdev_attach_ioas_hwpt
   iommufd_cdev_detach_ioas_hwpt

belong to IOMMUFD backend.


Thanks,

C.

 





Re: [PATCH 00/16] Add Multi-Core Debug (MCD) API support

2025-04-08 Thread Mario Fleischmann
Apologies for the line wrapping in yesterday's answer. Should be fixed now.

On 08.04.2025 09:00, Markus Armbruster wrote:
> Mario Fleischmann  writes:
> 
>> Thanks a lot for the response, I really appreciate your time.
>>
>> On 07.04.2025 14:33, Markus Armbruster wrote:
>>
>>> Mario Fleischmann  writes:
>>>
 This patch series introduces support for the Multi-Core Debug (MCD) API, a
 commonly used debug interface by emulators. The MCD API, defined through a
 header file, consists of 54 functions for implementing debug and trace.
 However, since it is a header-file-only interface, MCD does not specify a
 communication protocol. We get around this limitation by following a remote
 procedure call approach using QMP. The client stub corresponding to this
 implementation can be found at https://gitlab.com/lauterbach/mcdrefsrv

 This series is the successor to:
 "[PATCH v5 00/18] first version of mcdstub"
 (https://patchew.org/QEMU/20231220162555.19545-1-nicolas.e...@lauterbach.com/)

 * Architecture-independent MCD implementation
 * QMP instead of custom TCP protocol
>>>
>>> Rationale?  There must be pros and cons.
>>
>> Assuming you're referring to the protocol of the previous patch series:
>> The previous TCP protocol only supported a subset of MCD. As the 
>> implementation progresses, the protocol eventually needs to be extended, 
>> possibly resulting in backwards compatibility problems.
>> Following an RPC approach and keeping the communication layer as close 
>> to the MCD API as possible results in a larger protocol at first, but 
>> does not need to be changed afterwards.
>> By directly mapping MCD functions onto QMP commands, the complexity in 
>> the server and client stubs can be minimized.
>>
>> Assuming you're referring to the QMP choice:
>> QMP is being described as the "protocol which allows applications to 
>> control a QEMU instance".
>> It provides a RPC framework which automatically (de)serializes methods 
>> and their parameters, even inside QTests.
>> The whole interface is automatically documented.
> 
> Let's see whether I understand.
> 
> MCD is an established C interface.
> 
> Your goal is to provide remote MCD for QEMU, i.e. the client uses the
> MCD C interface, and the interface's implementation talks to an MCD
> server integrated into QEMU via some remote transport.
> 
> The previous version connects the two with a bespoke protocol via TCP.
> The client software translates between the C interface and this
> protocol.  QEMU implements the protocol's server side.  Designing and
> maintaining a protocol is expensive.
> 
> This versions makes two changes:
> 
> 1. Instead of layering a protocol on top of MCD, you use MCD directly.
> This eliminates protocol design and maintenance.  Moreover, translation
> becomes straightforward marshaling / unmarshaling for the transport.
> 
> 2. You use QMP as a transport.  This gets you marshaling / unmarshaling
> for free.  It also provides some useful infrastructure for tests,
> documentation and such.
> 
> Fair?

Couldn't have put it better myself.

>>> How much data would you expect to flow in practical usage?  QMP isn't
>>> designed for bulk transfer...
>>
>> According to ifstat, the expected data rate in practical usage is around
>>
>> KB/s in  KB/s out
>> 100  100
>>
>> I fully understand your concern and agree that a JSON-based
>> protocol does not result in the lowest data rate.
>>
>> If the data rate is the highest priority: *Before* the QMP supported was 
>> implemented, the MCD interface was built on a custom RPC framework, 
>> generated with the code generator at:
>>
>> https://gitlab.com/lauterbach/mcdrefsrv/-/tree/main/codegen
>>
>> The resulting header file was basically a set of functions capable of 
>> serializing MCD's function arguments into a byte stream and vice-versa:
>>
>> https://gitlab.com/lauterbach/mcdrefsrv/-/blob/df754cef7f19ece2d00b6ce4e307ba37e91e5dcb/include/mcd_rpc.h
>>
>> The QMP support was added because of the advantages listed above and in 
>> order to evade yet another custom communication protocol.
>> As a user of the MCD interface, I haven't noticed any negative impact of 
>> the increased data rate in realistic debugging scenarios, even when 
>> trying to drive the data rate up. If that would have been the case, I 
>> would have sent this patch request with our custom RPC protocol.
> 
> I see.
> 
 qemu-system- [options] -qmp tcp::1235,server=on,wait=off

 * Architecture-independent QTest test suite

 V=1 QTEST_QEMU_BINARY="./qemu-system- [options]" tests/qtest/mcd-test

 * Architecture-specific tests can be found at the client stub
>>>
>>> [...]
>>>
   qapi/mcd.json | 2366 ++
>>>
>>> This is *massive*.  By non-blank, non-comment lines, it's the second
>>> largest module in qapi/, almost 9% of the entire schema.  It's larger
>>> than the entire QEMU guest agent QAPI schema.  The QAPI g

[PATCH 1/3] new configure option to enable gstreamer

2025-04-08 Thread Dietmar Maurer
GStreamer is required to implement H264 encoding for VNC. Please note
that QEMU already depends on this library when you enable Spice.

Signed-off-by: Dietmar Maurer 
---
 meson.build   | 10 ++
 meson_options.txt |  2 ++
 scripts/meson-buildoptions.sh |  5 -
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/meson.build b/meson.build
index 41f68d3806..28ca37855a 100644
--- a/meson.build
+++ b/meson.build
@@ -1348,6 +1348,14 @@ if not get_option('zstd').auto() or have_block
 required: get_option('zstd'),
 method: 'pkg-config')
 endif
+
+gstreamer = not_found
+if not get_option('gstreamer').auto() or have_block
+  gstreamer = dependency('gstreamer-1.0 gstreamer-base-1.0', version: 
'>=1.22.0',
+  required: get_option('gstreamer'),
+  method: 'pkg-config')
+endif
+
 qpl = not_found
 if not get_option('qpl').auto() or have_system
   qpl = dependency('qpl', version: '>=1.5.0',
@@ -2563,6 +2571,7 @@ config_host_data.set('CONFIG_MALLOC_TRIM', 
has_malloc_trim)
 config_host_data.set('CONFIG_STATX', has_statx)
 config_host_data.set('CONFIG_STATX_MNT_ID', has_statx_mnt_id)
 config_host_data.set('CONFIG_ZSTD', zstd.found())
+config_host_data.set('CONFIG_GSTREAMER', gstreamer.found())
 config_host_data.set('CONFIG_QPL', qpl.found())
 config_host_data.set('CONFIG_UADK', uadk.found())
 config_host_data.set('CONFIG_QATZIP', qatzip.found())
@@ -4836,6 +4845,7 @@ summary_info += {'snappy support':snappy}
 summary_info += {'bzip2 support': libbzip2}
 summary_info += {'lzfse support': liblzfse}
 summary_info += {'zstd support':  zstd}
+summary_info += {'gstreamer support': gstreamer}
 summary_info += {'Query Processing Library support': qpl}
 summary_info += {'UADK Library support': uadk}
 summary_info += {'qatzip support':qatzip}
diff --git a/meson_options.txt b/meson_options.txt
index 59d973bca0..11cd132be5 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -254,6 +254,8 @@ option('vnc_sasl', type : 'feature', value : 'auto',
description: 'SASL authentication for VNC server')
 option('vte', type : 'feature', value : 'auto',
description: 'vte support for the gtk UI')
+option('gstreamer', type : 'feature', value : 'auto',
+   description: 'for VNC H.264 encoding with gstreamer')
 
 # GTK Clipboard implementation is disabled by default, since it may cause hangs
 # of the guest VCPUs. See gitlab issue 1150:
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
index 3e8e00852b..b0c273d61e 100644
--- a/scripts/meson-buildoptions.sh
+++ b/scripts/meson-buildoptions.sh
@@ -229,6 +229,7 @@ meson_options_help() {
   printf "%s\n" '  Xen PCI passthrough support'
   printf "%s\n" '  xkbcommon   xkbcommon support'
   printf "%s\n" '  zstdzstd compression support'
+  printf "%s\n" '  gstreamer   gstreamer support (H264 for VNC)'
 }
 _meson_option_parse() {
   case $1 in
@@ -581,6 +582,8 @@ _meson_option_parse() {
 --disable-xkbcommon) printf "%s" -Dxkbcommon=disabled ;;
 --enable-zstd) printf "%s" -Dzstd=enabled ;;
 --disable-zstd) printf "%s" -Dzstd=disabled ;;
-*) return 1 ;;
+--enable-gstreamer) printf "%s" -Dgstreamer=enabled ;;
+--disable-gstreamer) printf "%s" -Dgstreamer=disabled ;;
+   *) return 1 ;;
   esac
 }
-- 
2.39.5




Re: [PATCH 1/2] accel/tcg: add get_virtual_clock for TCG

2025-04-08 Thread Mark Burton
In principle I like this, but 
1/ throughout the API can we please make everything consistent sure that all 
registrations take a handle (void *) and all callbacks functions pass that 
handle (and the ID)
 - right now, some things do, some things dont, and this specific case seems to 
take a handle on registration, but does not provide it on callback (!)

(This is the current implementation :
typedef int64_t (*qemu_plugin_time_cb_t) (void);
...
QEMU_PLUGIN_API void qemu_plugin_register_time_cb(qemu_plugin_id_t id, const 
void *handle, qemu_plugin_time_cb_t cb);
)

2/ The current implementation makes use of the callback _ONLY_ in the case of 
single TCG — it’s most interesting when we have MTTCG enabled (and I see no 
reason not to provide the same mechanism for any other accelerator if/when 
anything in QEMU requests ’the time’.


Cheers
Mark.


> On 3 Apr 2025, at 13:38, Alex Bennée  wrote:
> 
> WARNING: This email originated from outside of Qualcomm. Please be wary of 
> any links or attachments, and do not enable macros.
> 
> Rather than allowing cpus_get_virtual_clock() to fall through to
> cpu_get_clock() introduce a TCG handler so it can make a decision
> about what time it is.
> 
> Initially this just calls cpu_get_clock() as before but this will
> change in later commits.
> 
> Signed-off-by: Alex Bennée 
> ---
> accel/tcg/tcg-accel-ops.c | 6 ++
> 1 file changed, 6 insertions(+)
> 
> diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c
> index d9b662efe3..1432d1c5b1 100644
> --- a/accel/tcg/tcg-accel-ops.c
> +++ b/accel/tcg/tcg-accel-ops.c
> @@ -197,6 +197,11 @@ static inline void tcg_remove_all_breakpoints(CPUState 
> *cpu)
> cpu_watchpoint_remove_all(cpu, BP_GDB);
> }
> 
> +static int64_t tcg_get_virtual_clock(void)
> +{
> +return cpu_get_clock();
> +}
> +
> static void tcg_accel_ops_init(AccelOpsClass *ops)
> {
> if (qemu_tcg_mttcg_enabled()) {
> @@ -212,6 +217,7 @@ static void tcg_accel_ops_init(AccelOpsClass *ops)
> ops->get_virtual_clock = icount_get;
> ops->get_elapsed_ticks = icount_get;
> } else {
> +ops->get_virtual_clock = tcg_get_virtual_clock;
> ops->handle_interrupt = tcg_handle_interrupt;
> }
> }
> --
> 2.39.5
> 



[PATCH v2] hw/i2c/aspeed: Fix wrong I2CC_DMA_LEN when I2CM_DMA_TX/RX_ADDR set first

2025-04-08 Thread Jamin Lin via
In the previous design, the I2C model would update I2CC_DMA_LEN (0x54) based on
the value of I2CM_DMA_LEN (0x1C) when the firmware set either I2CM_DMA_TX_ADDR
(0x30) or I2CM_DMA_RX_ADDR (0x34). However, this only worked correctly if the
firmware set I2CM_DMA_LEN before setting I2CM_DMA_TX_ADDR or I2CM_DMA_RX_ADDR.

If the firmware instead set I2CM_DMA_TX_ADDR or I2CM_DMA_RX_ADDR before setting
I2CM_DMA_LEN, the value written to I2CC_DMA_LEN would be incorrect.

Ideally, this issue should be resolved by updating the model to set
I2CC_DMA_LEN (0x54) when the firmware writes to the I2CM_DMA_LEN (0x1C)
register, instead of when it writes to I2CM_DMA_TX_ADDR (0x30) or
I2CM_DMA_RX_ADDR (0x34).

Originally, the design of I2CM_DMA_LEN (0x1C) included buffer length
write-enable bits for the current command:
Bit 31 enabled the RX buffer length update
Bit 15 enabled the TX buffer length update

In other words, when the firmware set either bit 31 or bit 15, the I2C model
could safely update I2CC_DMA_LEN (0x54) with the value in I2CM_DMA_LEN (0x1C).

However, starting with the AST2700, the design of the I2CM_DMA_LEN (0x1C)
register was changed. The write-enable bits (bit 31 and bit 15) were removed,
meaning there is no longer an explicit indication of whether the firmware
intends to update the TX or RX length.

As a result, on AST2700 and newer SoCs, the model cannot reliably determine
whether a write to I2CM_DMA_LEN was meant for TX or RX. This ambiguity is
especially problematic when the value written is 0, which actually corresponds
to a DMA length of 1.

To ensure consistent behavior across all SoCs, the model now updates
I2CC_DMA_LEN when I2CM_CMD (0x18) is written, as this is the final command that
initiates a TX or RX transfer and reflects the firmware’s intent more clearly.

Signed-off-by: Jamin Lin 
Fixes: ba2cccd (aspeed: i2c: Add new mode support)
---
 hw/i2c/aspeed_i2c.c | 18 ++
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/hw/i2c/aspeed_i2c.c b/hw/i2c/aspeed_i2c.c
index a8fbb9f44a..c659099e9a 100644
--- a/hw/i2c/aspeed_i2c.c
+++ b/hw/i2c/aspeed_i2c.c
@@ -634,6 +634,20 @@ static void aspeed_i2c_bus_new_write(AspeedI2CBus *bus, 
hwaddr offset,
 break;
 }
 
+/* Handle DMA length */
+if (SHARED_FIELD_EX32(value, TX_DMA_EN) &&
+SHARED_FIELD_EX32(value, M_TX_CMD)) {
+bus->regs[R_I2CC_DMA_LEN] = ARRAY_FIELD_EX32(bus->regs,
+ I2CM_DMA_LEN,
+ TX_BUF_LEN) + 1;
+}
+if (SHARED_FIELD_EX32(value, RX_DMA_EN) &&
+SHARED_FIELD_EX32(value, M_RX_CMD)) {
+bus->regs[R_I2CC_DMA_LEN] = ARRAY_FIELD_EX32(bus->regs,
+ I2CM_DMA_LEN,
+ RX_BUF_LEN) + 1;
+}
+
 if (bus->regs[R_I2CM_INTR_STS] & 0x) {
 qemu_log_mask(LOG_UNIMP, "%s: Packet mode is not implemented\n",
   __func__);
@@ -656,8 +670,6 @@ static void aspeed_i2c_bus_new_write(AspeedI2CBus *bus, 
hwaddr offset,
 bus->dma_dram_offset =
 deposit64(bus->dma_dram_offset, 0, 32,
   FIELD_EX32(value, I2CM_DMA_TX_ADDR, ADDR));
-bus->regs[R_I2CC_DMA_LEN] = ARRAY_FIELD_EX32(bus->regs, I2CM_DMA_LEN,
- TX_BUF_LEN) + 1;
 break;
 case A_I2CM_DMA_RX_ADDR:
 bus->regs[R_I2CM_DMA_RX_ADDR] = FIELD_EX32(value, I2CM_DMA_RX_ADDR,
@@ -665,8 +677,6 @@ static void aspeed_i2c_bus_new_write(AspeedI2CBus *bus, 
hwaddr offset,
 bus->dma_dram_offset =
 deposit64(bus->dma_dram_offset, 0, 32,
   FIELD_EX32(value, I2CM_DMA_RX_ADDR, ADDR));
-bus->regs[R_I2CC_DMA_LEN] = ARRAY_FIELD_EX32(bus->regs, I2CM_DMA_LEN,
- RX_BUF_LEN) + 1;
 break;
 case A_I2CM_DMA_LEN:
 w1t = FIELD_EX32(value, I2CM_DMA_LEN, RX_BUF_LEN_W1T) ||
-- 
2.43.0




[PULL 1/2] hw/loongarch/virt: Fix cpuslot::cpu set at last in virt_cpu_plug()

2025-04-08 Thread Song Gao
From: Bibo Mao 

In function virt_cpu_plug(), Object cpuslot::cpu is set at last
only when there is no any error, otherwise it is problematic that
cpuslot::cpu is set in advance however it returns because of error.

Fixes: ab9935d2991e (hw/loongarch/virt: Implement cpu plug interface)
Signed-off-by: Bibo Mao 
Reviewed-by: Markus Armbruster 
Message-Id: <20250324030145.3037408-2-maob...@loongson.cn>
Signed-off-by: Song Gao 
---
 hw/loongarch/virt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c
index e25864214f..504f8755a0 100644
--- a/hw/loongarch/virt.c
+++ b/hw/loongarch/virt.c
@@ -973,8 +973,6 @@ static void virt_cpu_plug(HotplugHandler *hotplug_dev,
 LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(hotplug_dev);
 Error *err = NULL;
 
-cpu_slot = virt_find_cpu_slot(MACHINE(lvms), cpu->phy_id);
-cpu_slot->cpu = CPU(dev);
 if (lvms->ipi) {
 hotplug_handler_plug(HOTPLUG_HANDLER(lvms->ipi), dev, &err);
 if (err) {
@@ -998,6 +996,8 @@ static void virt_cpu_plug(HotplugHandler *hotplug_dev,
 }
 }
 
+cpu_slot = virt_find_cpu_slot(MACHINE(lvms), cpu->phy_id);
+cpu_slot->cpu = CPU(dev);
 return;
 }
 
-- 
2.34.1




Re: [PATCH 02/11] qapi/rocker: Tidy up query-rocker-of-dpa-flows example

2025-04-08 Thread Markus Armbruster
Eric Blake  writes:

> On Fri, Apr 04, 2025 at 02:14:04PM +0200, Markus Armbruster wrote:
>> The command can return any number of RockerOfDpaFlow objects.  The
>> example shows it returning exactly two, with the second objecy's
>
> object's

Fixing...  thanks!

>> members elided.  Tweak it so it elides elements after the first
>> instead.




Re: [PATCH v7 0/2] hw/loongarch/virt: Replace destination error with error_abort

2025-04-08 Thread gaosong
hi, Markus,  I had pick up this series for my 'loongarch bug fix for 
10.0. 'branch


thanks.
Song Gao
在 2025/3/24 上午11:01, Bibo Mao 写道:

In function virt_cpu_plug() and virt_cpu_unplug(), the error is
impossile. Destination error is not propagated and replaced with
error_abort. With this, the logic is simple.

Also there is bugfix in function virt_cpu_plug(), Object cpuslot::cpu is
set at last only when there is no any error.

---
   v6 ... v7:
 1. Replace dest error from parameter errp with error_abort since the
error is impossible.
 2. Some patches are merged, refresh the pending two patches.

   v5 ... v6:
 1. If there is nested error report when restore from error in function
virt_cpu_plug(), set output Error object with &error_abort rather
than NULL, since it is almost impossible now.
 2. If there is nested error report when restore from error in function
virt_cpu_unplug(), set output Error object with &error_abort rather
than NULL, since it is almost impossible now.

   v4 ... v5:
 1. Split patch2 in v4 into three small patches, two are fixup for error
handing when cpu plug/unplug fails so that system can continue to
run, one is to remove error_propagate() and refresh title.
 2. Refresh changelog in last patch and remove fixes information
since it is impossible to happen.

   v3 ... v4:
 1. Add missed this cleanup with error and remove some local error
object.
 2. Replace local error object with error_abort object in
virt_cpu_irq_init(), since its return value is not checked.

   v2 ... v3:
 1. Add missing modification replacing error_propagate() + error_setg()
   with error_setg().
 2. Some enhancement about error handling, handling error
symmetrically in many places

   v1 ... v2:
 1. Add fixes tag and change title with fix prefix in patch 1.
 2. Replace error_propagate() with error_setg(), and return directly
for any error.
---
Bibo Mao (2):
   hw/loongarch/virt: Fix cpuslot::cpu set at last in virt_cpu_plug()
   hw/loongarch/virt: Replace destination error with error_abort

  hw/loongarch/virt.c | 43 +--
  1 file changed, 9 insertions(+), 34 deletions(-)


base-commit: 71119ed3651622e1c531d1294839e9f3341adaf5





[PATCH 3/3] vnc: h264: send additional frames after the display is clean

2025-04-08 Thread Dietmar Maurer
The H264 implementation only sends frames when it detects changes in
the server's framebuffer. This leads to artifacts when there are no
further changes, as the internal H264 encoder may still contain data.

This patch modifies the code to send a few additional frames in such
situations to flush the H264 encoder data.

Signed-off-by: Dietmar Maurer 
---
 ui/vnc.c | 25 -
 ui/vnc.h |  3 +++
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/ui/vnc.c b/ui/vnc.c
index 2e60b55e47..4ba0b715fd 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -3239,7 +3239,30 @@ static void vnc_refresh(DisplayChangeListener *dcl)
 vnc_unlock_display(vd);
 
 QTAILQ_FOREACH_SAFE(vs, &vd->clients, next, vn) {
-rects += vnc_update_client(vs, has_dirty);
+int client_dirty = has_dirty;
+if (vs->h264) {
+if (client_dirty) {
+vs->h264->keep_dirty = VNC_H264_KEEP_DIRTY;
+} else {
+if (vs->h264->keep_dirty > 0) {
+client_dirty = 1;
+vs->h264->keep_dirty--;
+}
+}
+}
+
+int count = vnc_update_client(vs, client_dirty);
+rects += count;
+
+if (vs->h264 && !count && vs->h264->keep_dirty) {
+VncJob *job = vnc_job_new(vs);
+int height = pixman_image_get_height(vd->server);
+int width = pixman_image_get_width(vd->server);
+vs->job_update = vs->update;
+vs->update = VNC_STATE_UPDATE_NONE;
+vnc_job_add_rect(job, 0, 0, width, height);
+vnc_job_push(job);
+}
 /* vs might be free()ed here */
 }
 
diff --git a/ui/vnc.h b/ui/vnc.h
index 7e232f7dac..e1b81d6bcc 100644
--- a/ui/vnc.h
+++ b/ui/vnc.h
@@ -236,10 +236,13 @@ typedef struct VncZywrle {
 } VncZywrle;
 
 #ifdef CONFIG_GSTREAMER
+/* Number of frames we send after the display is clean. */
+#define VNC_H264_KEEP_DIRTY 10
 typedef struct VncH264 {
 GstElement *pipeline, *source, *gst_encoder, *sink, *convert;
 size_t width;
 size_t height;
+guint keep_dirty;
 } VncH264;
 #endif
 
-- 
2.39.5




[RFC 1/3] cxl_type3: Preparing information sharing between VMs

2025-04-08 Thread nifan . cxl
From: Fan Ni 

Add two data structures for sharing information between multiple VMs.
The global cci_map_buf is used to provide mappings between cci name and cci
pointer.
Each VM has its own cci_map_buf. However, since we expect the two VMs share
the same configuration, the same CCI name should points to its own CCI.
On the FM, we need to use the cci pointer to find the cci name.
While on the test VM, we use the name to find cci pointer for MCTP
message process.
The mctp_shared_buffer is used to pass MCTP command information between
FM and test VM for QMP command process.

Signed-off-by: Fan Ni 
---
 hw/mem/cxl_type3.c| 125 +-
 include/hw/cxl/cxl_device.h   |   6 ++
 include/hw/cxl/cxl_mctp_message.h |  40 ++
 3 files changed, 170 insertions(+), 1 deletion(-)
 create mode 100644 include/hw/cxl/cxl_mctp_message.h

diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 11c38a9292..7f85616ca1 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -29,6 +29,9 @@
 #include "system/numa.h"
 #include "hw/cxl/cxl.h"
 #include "hw/pci/msix.h"
+#include "hw/cxl/cxl_mctp_message.h"
+
+struct CXLCCINamePtrMaps *cci_map_buf;
 
 /* type3 device private */
 enum CXL_T3_MSIX_VECTOR {
@@ -998,6 +1001,97 @@ static void init_alert_config(CXLType3Dev *ct3d)
 };
 }
 
+static int ct3_mctp_buf_open(const char *filename, int flags)
+{
+char name[128];
+snprintf(name, sizeof(name), "/%s", filename);
+return shm_open(name, flags, 0666);
+}
+
+static int ct3_mctp_buf_unlink(const char *filename)
+{
+char name[128];
+snprintf(name, sizeof(name), "/%s", filename);
+return shm_unlink(name);
+}
+
+static struct CXLMCTPSharedBuf *ct3_mctp_buf_map(int fd, int size)
+{
+void *map;
+
+if (fd < 0) {
+return NULL;
+}
+
+map = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+if (map == MAP_FAILED) {
+return NULL;
+}
+
+return (CXLMCTPSharedBuf *)map;
+}
+
+
+static int ct3_mctp_buf_create(const char *filename, size_t size)
+{
+int fd, rc;
+
+fd = ct3_mctp_buf_open(filename, O_RDWR | O_CREAT);
+if (fd == -1) {
+return -1;
+}
+
+rc = ftruncate(fd, size);
+
+if (rc) {
+close(fd);
+return -1;
+}
+
+return fd;
+}
+
+static int ct3_setup_mctp_command_share_buffer(CXLType3Dev *ct3d, bool create)
+{
+int fd;
+int size = sizeof(*ct3d->mctp_shared_buffer);
+sprintf(ct3d->mctp_buf_name, MCTP_MESSAGE_BUF_NAME);
+
+if (create) {
+fd = ct3_mctp_buf_create(ct3d->mctp_buf_name, size);
+} else {
+fd = ct3_mctp_buf_open(ct3d->mctp_buf_name, O_RDWR | O_CREAT);
+}
+
+if (fd == -1) {
+return fd;
+}
+ct3d->mctp_buf_fd = fd;
+ct3d->mctp_shared_buffer = ct3_mctp_buf_map(ct3d->mctp_buf_fd, size);
+if (ct3d->mctp_shared_buffer) {
+return 0;
+}
+return -1;
+}
+
+static int init_cci_name_ptr_mapping(void)
+{
+if (!cci_map_buf) {
+cci_map_buf = g_malloc(sizeof(*cci_map_buf));
+}
+return 0;
+}
+
+static void add_cci_name_ptr_mapping(const char *name, void *p)
+{
+int n = cci_map_buf->num_mappings;
+struct CXLCCINamePtrMap *map = &cci_map_buf->maps[n];
+
+strcpy(map->cci_name, name);
+map->cci_pointer = p;
+cci_map_buf->num_mappings++;
+}
+
 void ct3_realize(PCIDevice *pci_dev, Error **errp)
 {
 ERRP_GUARD();
@@ -1108,6 +1202,14 @@ void ct3_realize(PCIDevice *pci_dev, Error **errp)
 ct3d->ecs_attrs.fru_attrs[count].ecs_flags = 0;
 }
 
+if (ct3d->allow_fm_attach) {
+init_cci_name_ptr_mapping();
+if (ct3d->mctp_buf_init) {
+ct3_setup_mctp_command_share_buffer(ct3d, true);
+} else {
+ct3_setup_mctp_command_share_buffer(ct3d, false);
+}
+}
 return;
 
 err_release_cdat:
@@ -1150,6 +1252,15 @@ void ct3_exit(PCIDevice *pci_dev)
 if (ct3d->hostvmem) {
 address_space_destroy(&ct3d->hostvmem_as);
 }
+
+if (ct3d->mctp_shared_buffer) {
+munmap(ct3d->mctp_shared_buffer, sizeof(*ct3d->mctp_shared_buffer));
+close(ct3d->mctp_buf_fd);
+ct3_mctp_buf_unlink(ct3d->mctp_buf_name);
+ct3d->mctp_shared_buffer = NULL;
+}
+g_free(cci_map_buf);
+cci_map_buf = NULL;
 }
 
 /*
@@ -1352,6 +1463,16 @@ void ct3d_reset(DeviceState *dev)
 }
 cxl_initialize_t3_ld_cci(&ct3d->ld0_cci, DEVICE(ct3d), DEVICE(ct3d),
  512); /* Max payload made up */
+if (ct3d->allow_fm_attach) {
+char name[64];
+
+memset(name, 0, 64);
+sprintf(name, "%lu:%s", ct3d->sn, "oob_mctp_cci");
+add_cci_name_ptr_mapping(name, &ct3d->oob_mctp_cci);
+cxl_initialize_t3_fm_owned_ld_mctpcci(&ct3d->oob_mctp_cci,
+  DEVICE(ct3d), DEVICE(ct3d),
+  MCTP_CXL_MAILBOX_BYTES);
+}
 }
 
 static const Property ct3_prop

[PATCH 0/3] Add VNC Open H.264 Encoding

2025-04-08 Thread Dietmar Maurer
As defined by:

https://github.com/rfbproto/rfbproto/blob/master/rfbproto.rst#open-h-264-encoding

The noVNC HTML application recently added support for this encoding. There is
also an open pull request to add audio support to noVNC:

https://github.com/novnc/noVNC/pull/1952

With that in place, the web based VNC console is good enough to display
a VM showing a video with reasonable bandwidth.

Possible improvements:

- Dynamic switching to/from H264 mode at high change rates
- Support for hardware encoders

We may also extend the RFB Audio protocol with "opus" encoding, because 
uncompressed
audio need too much bandwidth.


Dietmar Maurer (3):
  new configure option to enable gstreamer
  add vnc h264 encoder
  vnc: h264: send additional frames after the display is clean

 meson.build   |  10 ++
 meson_options.txt |   2 +
 scripts/meson-buildoptions.sh |   5 +-
 ui/meson.build|   1 +
 ui/vnc-enc-h264.c | 269 ++
 ui/vnc-jobs.c |  49 +--
 ui/vnc.c  |  46 +-
 ui/vnc.h  |  24 +++
 8 files changed, 389 insertions(+), 17 deletions(-)
 create mode 100644 ui/vnc-enc-h264.c

-- 
2.39.5




Re: [PATCH 2/3] add vnc h264 encoder

2025-04-08 Thread Marc-André Lureau
Hi

On Tue, Apr 8, 2025 at 1:54 PM Dietmar Maurer  wrote:
>
> > Please resend the series with a cover letter
> > (https://www.qemu.org/docs/master/devel/submitting-a-patch.html#use-git-format-patch)
>
> Ok, just resend this series with a cover letter and commit message.
> (patches unchanged)

ok, thanks

>
> > > +#include 
> > > +
> > > +static void libavcodec_destroy_encoder_context(VncState *vs)
> >
> > it's not libavcodec.
>
> I will fix that in v2.

What about encodebin suggestion?

>
>
> > > +#ifdef CONFIG_GSTREAMER
> > > +case VNC_ENCODING_H264:
> > > +if (vnc_h264_encoder_init(vs) == 0) {
> > > +vnc_set_feature(vs, VNC_FEATURE_H264);
> >
> > Before advertising support for the codec, it should actually check if
> > the encoder is present.
>
> ok.
>
> > It would also be useful to have an extra VNC
> > option like H264=on/off/auto.
>
> I thought it would be better to do that at the client?

Well, it can be worth it to prevent h264 usage from the server too. Or
to ensure the server is h264-capable. (this wasn't seen as much
necessary for other codecs that are low-resource and/or patent-free,
but may make sense too)


-- 
Marc-André Lureau



[PATCH v3 09/10] target/riscv: rvv: Apply vext_check_input_eew to vector indexed load/store instructions

2025-04-08 Thread Max Chou
Handle the overlap of source registers with different EEWs.

Co-authored-by: Anton Blanchard 
Reviewed-by: Daniel Henrique Barboza 
Signed-off-by: Max Chou 
---
 target/riscv/insn_trans/trans_rvv.c.inc | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index 2d067a59e27..445a0b72a56 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -1043,7 +1043,8 @@ static bool ld_index_check(DisasContext *s, arg_rnfvm* a, 
uint8_t eew)
 {
 return require_rvv(s) &&
vext_check_isa_ill(s) &&
-   vext_check_ld_index(s, a->rd, a->rs2, a->nf, a->vm, eew);
+   vext_check_ld_index(s, a->rd, a->rs2, a->nf, a->vm, eew) &&
+   vext_check_input_eew(s, -1, 0, a->rs2, eew, a->vm);
 }
 
 GEN_VEXT_TRANS(vlxei8_v,  MO_8,  rnfvm, ld_index_op, ld_index_check)
@@ -1095,7 +1096,8 @@ static bool st_index_check(DisasContext *s, arg_rnfvm* a, 
uint8_t eew)
 {
 return require_rvv(s) &&
vext_check_isa_ill(s) &&
-   vext_check_st_index(s, a->rd, a->rs2, a->nf, eew);
+   vext_check_st_index(s, a->rd, a->rs2, a->nf, eew) &&
+   vext_check_input_eew(s, a->rd, s->sew, a->rs2, eew, a->vm);
 }
 
 GEN_VEXT_TRANS(vsxei8_v,  MO_8,  rnfvm, st_index_op, st_index_check)
-- 
2.43.0




[PATCH v3 03/10] target/riscv: rvv: Apply vext_check_input_eew to vrgather instructions to check mismatched input EEWs encoding constraint

2025-04-08 Thread Max Chou
According to the v spec, a vector register cannot be used to provide source
operands with more than one EEW for a single instruction.
The vs1 EEW of vrgatherei16.vv is 16.

Co-authored-by: Anton Blanchard 
Reviewed-by: Daniel Henrique Barboza 
Signed-off-by: Max Chou 
---
 target/riscv/insn_trans/trans_rvv.c.inc | 32 +
 1 file changed, 32 insertions(+)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index e630f8661e1..4a0c9fbeff3 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -379,6 +379,35 @@ static bool vext_check_ld_index(DisasContext *s, int vd, 
int vs2,
 return ret;
 }
 
+/*
+ * Check whether a vector register is used to provide source operands with
+ * more than one EEW for the vector instruction.
+ * Returns true if the instruction has valid encoding
+ * Returns false if encoding violates the mismatched input EEWs constraint
+ */
+static bool vext_check_input_eew(DisasContext *s, int vs1, uint8_t eew_vs1,
+ int vs2, uint8_t eew_vs2, int vm)
+{
+bool is_valid = true;
+int8_t emul_vs1 = eew_vs1 - s->sew + s->lmul;
+int8_t emul_vs2 = eew_vs2 - s->sew + s->lmul;
+
+/* When vm is 0, vs1 & vs2(EEW!=1) group can't overlap v0 (EEW=1) */
+if ((vs1 != -1 && !require_vm(vm, vs1)) ||
+(vs2 != -1 && !require_vm(vm, vs2))) {
+is_valid = false;
+}
+
+/* When eew_vs1 != eew_vs2, check whether vs1 and vs2 are overlapped */
+if ((vs1 != -1 && vs2 != -1) && (eew_vs1 != eew_vs2) &&
+is_overlapped(vs1, 1 << MAX(emul_vs1, 0),
+  vs2, 1 << MAX(emul_vs2, 0))) {
+is_valid = false;
+}
+
+return is_valid;
+}
+
 static bool vext_check_ss(DisasContext *s, int vd, int vs, int vm)
 {
 return require_vm(vm, vd) &&
@@ -3449,6 +3478,7 @@ static bool vrgather_vv_check(DisasContext *s, arg_rmrr 
*a)
 {
 return require_rvv(s) &&
vext_check_isa_ill(s) &&
+   vext_check_input_eew(s, a->rs1, s->sew, a->rs2, s->sew, a->vm) &&
require_align(a->rd, s->lmul) &&
require_align(a->rs1, s->lmul) &&
require_align(a->rs2, s->lmul) &&
@@ -3461,6 +3491,7 @@ static bool vrgatherei16_vv_check(DisasContext *s, 
arg_rmrr *a)
 int8_t emul = MO_16 - s->sew + s->lmul;
 return require_rvv(s) &&
vext_check_isa_ill(s) &&
+   vext_check_input_eew(s, a->rs1, MO_16, a->rs2, s->sew, a->vm) &&
(emul >= -3 && emul <= 3) &&
require_align(a->rd, s->lmul) &&
require_align(a->rs1, emul) &&
@@ -3480,6 +3511,7 @@ static bool vrgather_vx_check(DisasContext *s, arg_rmrr 
*a)
 {
 return require_rvv(s) &&
vext_check_isa_ill(s) &&
+   vext_check_input_eew(s, -1, MO_64, a->rs2, s->sew, a->vm) &&
require_align(a->rd, s->lmul) &&
require_align(a->rs2, s->lmul) &&
(a->rd != a->rs2) &&
-- 
2.43.0




[PATCH v3 08/10] target/riscv: rvv: Apply vext_check_input_eew to vector narrow/widen instructions

2025-04-08 Thread Max Chou
Handle the overlap of source registers with different EEWs.
The vd of vector widening mul-add instructions is one of the input
operands.

Co-authored-by: Anton Blanchard 
Reviewed-by: Daniel Henrique Barboza 
Signed-off-by: Max Chou 
---
 target/riscv/insn_trans/trans_rvbf16.c.inc |  9 ++-
 target/riscv/insn_trans/trans_rvv.c.inc| 77 +-
 2 files changed, 68 insertions(+), 18 deletions(-)

diff --git a/target/riscv/insn_trans/trans_rvbf16.c.inc 
b/target/riscv/insn_trans/trans_rvbf16.c.inc
index 0a9cd1ec315..066dc364c5b 100644
--- a/target/riscv/insn_trans/trans_rvbf16.c.inc
+++ b/target/riscv/insn_trans/trans_rvbf16.c.inc
@@ -119,8 +119,11 @@ static bool trans_vfwmaccbf16_vv(DisasContext *ctx, 
arg_vfwmaccbf16_vv *a)
 REQUIRE_FPU;
 REQUIRE_ZVFBFWMA(ctx);
 
+uint8_t sew = ctx->sew;
 if (require_rvv(ctx) && vext_check_isa_ill(ctx) && (ctx->sew == MO_16) &&
-vext_check_dss(ctx, a->rd, a->rs1, a->rs2, a->vm)) {
+vext_check_dss(ctx, a->rd, a->rs1, a->rs2, a->vm) &&
+vext_check_input_eew(ctx, a->rd, sew + 1, a->rs1, sew, a->vm) &&
+vext_check_input_eew(ctx, a->rd, sew + 1, a->rs2, sew, a->vm)) {
 uint32_t data = 0;
 
 gen_set_rm_chkfrm(ctx, RISCV_FRM_DYN);
@@ -146,8 +149,10 @@ static bool trans_vfwmaccbf16_vf(DisasContext *ctx, 
arg_vfwmaccbf16_vf *a)
 REQUIRE_FPU;
 REQUIRE_ZVFBFWMA(ctx);
 
+uint8_t sew = ctx->sew;
 if (require_rvv(ctx) && (ctx->sew == MO_16) && vext_check_isa_ill(ctx) &&
-vext_check_ds(ctx, a->rd, a->rs2, a->vm)) {
+vext_check_ds(ctx, a->rd, a->rs2, a->vm) &&
+vext_check_input_eew(ctx, a->rd, sew + 1, a->rs2, sew, a->vm)) {
 uint32_t data = 0;
 
 gen_set_rm(ctx, RISCV_FRM_DYN);
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index 585ee98b27d..2d067a59e27 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -528,6 +528,7 @@ static bool vext_narrow_check_common(DisasContext *s, int 
vd, int vs2,
 static bool vext_check_ds(DisasContext *s, int vd, int vs, int vm)
 {
 return vext_wide_check_common(s, vd, vm) &&
+   vext_check_input_eew(s, vs, s->sew, -1, 0, vm) &&
require_align(vs, s->lmul) &&
require_noover(vd, s->lmul + 1, vs, s->lmul);
 }
@@ -535,6 +536,7 @@ static bool vext_check_ds(DisasContext *s, int vd, int vs, 
int vm)
 static bool vext_check_dd(DisasContext *s, int vd, int vs, int vm)
 {
 return vext_wide_check_common(s, vd, vm) &&
+   vext_check_input_eew(s, vs, s->sew + 1, -1, 0, vm) &&
require_align(vs, s->lmul + 1);
 }
 
@@ -553,6 +555,7 @@ static bool vext_check_dd(DisasContext *s, int vd, int vs, 
int vm)
 static bool vext_check_dss(DisasContext *s, int vd, int vs1, int vs2, int vm)
 {
 return vext_check_ds(s, vd, vs2, vm) &&
+   vext_check_input_eew(s, vs1, s->sew, vs2, s->sew, vm) &&
require_align(vs1, s->lmul) &&
require_noover(vd, s->lmul + 1, vs1, s->lmul);
 }
@@ -575,12 +578,14 @@ static bool vext_check_dss(DisasContext *s, int vd, int 
vs1, int vs2, int vm)
 static bool vext_check_dds(DisasContext *s, int vd, int vs1, int vs2, int vm)
 {
 return vext_check_ds(s, vd, vs1, vm) &&
+   vext_check_input_eew(s, vs1, s->sew, vs2, s->sew + 1, vm) &&
require_align(vs2, s->lmul + 1);
 }
 
 static bool vext_check_sd(DisasContext *s, int vd, int vs, int vm)
 {
-bool ret = vext_narrow_check_common(s, vd, vs, vm);
+bool ret = vext_narrow_check_common(s, vd, vs, vm) &&
+   vext_check_input_eew(s, vs, s->sew + 1, -1, 0, vm);
 if (vd != vs) {
 ret &= require_noover(vd, s->lmul, vs, s->lmul + 1);
 }
@@ -603,6 +608,7 @@ static bool vext_check_sd(DisasContext *s, int vd, int vs, 
int vm)
 static bool vext_check_sds(DisasContext *s, int vd, int vs1, int vs2, int vm)
 {
 return vext_check_sd(s, vd, vs2, vm) &&
+   vext_check_input_eew(s, vs1, s->sew, vs2, s->sew + 1, vm) &&
require_align(vs1, s->lmul);
 }
 
@@ -1531,6 +1537,16 @@ static bool opivv_widen_check(DisasContext *s, arg_rmrr 
*a)
vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm);
 }
 
+/* OPIVV with overwrite and WIDEN */
+static bool opivv_overwrite_widen_check(DisasContext *s, arg_rmrr *a)
+{
+return require_rvv(s) &&
+   vext_check_isa_ill(s) &&
+   vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm) &&
+   vext_check_input_eew(s, a->rd, s->sew + 1, a->rs1, s->sew, a->vm) &&
+   vext_check_input_eew(s, a->rd, s->sew + 1, a->rs2, s->sew, a->vm);
+}
+
 static bool do_opivv_widen(DisasContext *s, arg_rmrr *a,
gen_helper_gvec_4_ptr *fn,
bool (*checkfn)(DisasContext *, arg_rmrr *))
@@ -1578,6 +1594,14 @@ static bool opivx_widen_check(DisasContext *s, arg_rmrr 
*a)
vext_check_ds(s, a->rd, a->rs2, a->vm);
 }
 
+static b

[PATCH v3 07/10] target/riscv: rvv: Apply vext_check_input_eew to vector integer extension instructions(OPMVV)

2025-04-08 Thread Max Chou
Handle the overlap of source registers with different EEWs.

Co-authored-by: Anton Blanchard 
Reviewed-by: Daniel Henrique Barboza 
Signed-off-by: Max Chou 
---
 target/riscv/insn_trans/trans_rvv.c.inc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index d72792e46a7..585ee98b27d 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -3659,7 +3659,9 @@ static bool int_ext_check(DisasContext *s, arg_rmr *a, 
uint8_t div)
 require_align(a->rd, s->lmul) &&
 require_align(a->rs2, s->lmul - div) &&
 require_vm(a->vm, a->rd) &&
-require_noover(a->rd, s->lmul, a->rs2, s->lmul - div);
+require_noover(a->rd, s->lmul, a->rs2, s->lmul - div) &&
+vext_check_input_eew(s, -1, 0, a->rs2, s->sew, a->vm);
+
 return ret;
 }
 
-- 
2.43.0




[PATCH v3 02/10] target/riscv: rvv: Add CHECK arg to GEN_OPFVF_WIDEN_TRANS

2025-04-08 Thread Max Chou
From: Anton Blanchard 

Signed-off-by: Anton Blanchard 
Reviewed-by: Daniel Henrique Barboza 
Reviewed-by: Max Chou 
Signed-off-by: Max Chou 
---
 target/riscv/insn_trans/trans_rvv.c.inc | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index 20b1cb127b4..e630f8661e1 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -2403,10 +2403,10 @@ static bool opfvf_widen_check(DisasContext *s, arg_rmrr 
*a)
 }
 
 /* OPFVF with WIDEN */
-#define GEN_OPFVF_WIDEN_TRANS(NAME)  \
+#define GEN_OPFVF_WIDEN_TRANS(NAME, CHECK)   \
 static bool trans_##NAME(DisasContext *s, arg_rmrr *a)   \
 {\
-if (opfvf_widen_check(s, a)) {   \
+if (CHECK(s, a)) {   \
 uint32_t data = 0;   \
 static gen_helper_opfvf *const fns[2] = {\
 gen_helper_##NAME##_h, gen_helper_##NAME##_w,\
@@ -2422,8 +2422,8 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)
   \
 return false;\
 }
 
-GEN_OPFVF_WIDEN_TRANS(vfwadd_vf)
-GEN_OPFVF_WIDEN_TRANS(vfwsub_vf)
+GEN_OPFVF_WIDEN_TRANS(vfwadd_vf, opfvf_widen_check)
+GEN_OPFVF_WIDEN_TRANS(vfwsub_vf, opfvf_widen_check)
 
 static bool opfwv_widen_check(DisasContext *s, arg_rmrr *a)
 {
@@ -2505,7 +2505,7 @@ GEN_OPFVF_TRANS(vfrdiv_vf,  opfvf_check)
 
 /* Vector Widening Floating-Point Multiply */
 GEN_OPFVV_WIDEN_TRANS(vfwmul_vv, opfvv_widen_check)
-GEN_OPFVF_WIDEN_TRANS(vfwmul_vf)
+GEN_OPFVF_WIDEN_TRANS(vfwmul_vf, opfvf_widen_check)
 
 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
 GEN_OPFVV_TRANS(vfmacc_vv, opfvv_check)
@@ -2530,10 +2530,10 @@ GEN_OPFVV_WIDEN_TRANS(vfwmacc_vv, opfvv_widen_check)
 GEN_OPFVV_WIDEN_TRANS(vfwnmacc_vv, opfvv_widen_check)
 GEN_OPFVV_WIDEN_TRANS(vfwmsac_vv, opfvv_widen_check)
 GEN_OPFVV_WIDEN_TRANS(vfwnmsac_vv, opfvv_widen_check)
-GEN_OPFVF_WIDEN_TRANS(vfwmacc_vf)
-GEN_OPFVF_WIDEN_TRANS(vfwnmacc_vf)
-GEN_OPFVF_WIDEN_TRANS(vfwmsac_vf)
-GEN_OPFVF_WIDEN_TRANS(vfwnmsac_vf)
+GEN_OPFVF_WIDEN_TRANS(vfwmacc_vf, opfvf_widen_check)
+GEN_OPFVF_WIDEN_TRANS(vfwnmacc_vf, opfvf_widen_check)
+GEN_OPFVF_WIDEN_TRANS(vfwmsac_vf, opfvf_widen_check)
+GEN_OPFVF_WIDEN_TRANS(vfwnmsac_vf, opfvf_widen_check)
 
 /* Vector Floating-Point Square-Root Instruction */
 
-- 
2.43.0




[PATCH v3 04/10] target/riscv: rvv: Apply vext_check_input_eew to OPIVI/OPIVX/OPFVF(vext_check_ss) instructions

2025-04-08 Thread Max Chou
Handle the overlap of source registers with different EEWs.

Co-authored-by: Anton Blanchard 
Reviewed-by: Daniel Henrique Barboza 
Signed-off-by: Max Chou 
---
 target/riscv/insn_trans/trans_rvv.c.inc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index 4a0c9fbeff3..e8197f779e0 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -412,7 +412,8 @@ static bool vext_check_ss(DisasContext *s, int vd, int vs, 
int vm)
 {
 return require_vm(vm, vd) &&
require_align(vd, s->lmul) &&
-   require_align(vs, s->lmul);
+   require_align(vs, s->lmul) &&
+   vext_check_input_eew(s, vs, s->sew, -1, s->sew, vm);
 }
 
 /*
-- 
2.43.0




[PATCH v3 10/10] target/riscv: Fix the rvv reserved encoding of unmasked instructions

2025-04-08 Thread Max Chou
According to the v spec, the encodings of vcomoress.vm and vector
mask-register logical instructions with vm=0 are reserved.

Reviewed-by: Daniel Henrique Barboza 
Signed-off-by: Max Chou 
---
 target/riscv/insn32.decode | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index 6d1a13c8260..cd23b1f3a9b 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -703,14 +703,14 @@ vfredmax_vs 000111 . . . 001 . 1010111 
@r_vm
 # Vector widening ordered and unordered float reduction sum
 vfwredusum_vs   110001 . . . 001 . 1010111 @r_vm
 vfwredosum_vs   110011 . . . 001 . 1010111 @r_vm
-vmand_mm011001 - . . 010 . 1010111 @r
-vmnand_mm   011101 - . . 010 . 1010111 @r
-vmandn_mm   011000 - . . 010 . 1010111 @r
-vmxor_mm011011 - . . 010 . 1010111 @r
-vmor_mm 011010 - . . 010 . 1010111 @r
-vmnor_mm00 - . . 010 . 1010111 @r
-vmorn_mm011100 - . . 010 . 1010111 @r
-vmxnor_mm   01 - . . 010 . 1010111 @r
+vmand_mm011001 1 . . 010 . 1010111 @r
+vmnand_mm   011101 1 . . 010 . 1010111 @r
+vmandn_mm   011000 1 . . 010 . 1010111 @r
+vmxor_mm011011 1 . . 010 . 1010111 @r
+vmor_mm 011010 1 . . 010 . 1010111 @r
+vmnor_mm00 1 . . 010 . 1010111 @r
+vmorn_mm011100 1 . . 010 . 1010111 @r
+vmxnor_mm   01 1 . . 010 . 1010111 @r
 vcpop_m 01 . . 1 010 . 1010111 @r2_vm
 vfirst_m01 . . 10001 010 . 1010111 @r2_vm
 vmsbf_m 010100 . . 1 010 . 1010111 @r2_vm
@@ -732,7 +732,7 @@ vrgather_vv 001100 . . . 000 . 1010111 @r_vm
 vrgatherei16_vv 001110 . . . 000 . 1010111 @r_vm
 vrgather_vx 001100 . . . 100 . 1010111 @r_vm
 vrgather_vi 001100 . . . 011 . 1010111 @r_vm
-vcompress_vm010111 - . . 010 . 1010111 @r
+vcompress_vm010111 1 . . 010 . 1010111 @r
 vmv1r_v 100111 1 . 0 011 . 1010111 @r2rd
 vmv2r_v 100111 1 . 1 011 . 1010111 @r2rd
 vmv4r_v 100111 1 . 00011 011 . 1010111 @r2rd
-- 
2.43.0




[PULL 2/2] hw/loongarch/virt: Replace destination error with error_abort

2025-04-08 Thread Song Gao
From: Bibo Mao 

In function virt_cpu_plug() and virt_cpu_unplug(), the error is
impossile. Destination error is not propagated and replaced with
error_abort. With this, the logic is simple.

Signed-off-by: Bibo Mao 
Acked-by: Markus Armbruster 
Message-Id: <20250324030145.3037408-3-maob...@loongson.cn>
Signed-off-by: Song Gao 
---
 hw/loongarch/virt.c | 39 +++
 1 file changed, 7 insertions(+), 32 deletions(-)

diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c
index 504f8755a0..65c9027feb 100644
--- a/hw/loongarch/virt.c
+++ b/hw/loongarch/virt.c
@@ -936,29 +936,15 @@ static void virt_cpu_unplug(HotplugHandler *hotplug_dev,
 DeviceState *dev, Error **errp)
 {
 CPUArchId *cpu_slot;
-Error *err = NULL;
 LoongArchCPU *cpu = LOONGARCH_CPU(dev);
 LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(hotplug_dev);
 
 /* Notify ipi and extioi irqchip to remove interrupt routing to CPU */
-hotplug_handler_unplug(HOTPLUG_HANDLER(lvms->ipi), dev, &err);
-if (err) {
-error_propagate(errp, err);
-return;
-}
-
-hotplug_handler_unplug(HOTPLUG_HANDLER(lvms->extioi), dev, &err);
-if (err) {
-error_propagate(errp, err);
-return;
-}
+hotplug_handler_unplug(HOTPLUG_HANDLER(lvms->ipi), dev, &error_abort);
+hotplug_handler_unplug(HOTPLUG_HANDLER(lvms->extioi), dev, &error_abort);
 
 /* Notify acpi ged CPU removed */
-hotplug_handler_unplug(HOTPLUG_HANDLER(lvms->acpi_ged), dev, &err);
-if (err) {
-error_propagate(errp, err);
-return;
-}
+hotplug_handler_unplug(HOTPLUG_HANDLER(lvms->acpi_ged), dev, &error_abort);
 
 cpu_slot = virt_find_cpu_slot(MACHINE(lvms), cpu->phy_id);
 cpu_slot->cpu = NULL;
@@ -971,29 +957,18 @@ static void virt_cpu_plug(HotplugHandler *hotplug_dev,
 CPUArchId *cpu_slot;
 LoongArchCPU *cpu = LOONGARCH_CPU(dev);
 LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(hotplug_dev);
-Error *err = NULL;
 
 if (lvms->ipi) {
-hotplug_handler_plug(HOTPLUG_HANDLER(lvms->ipi), dev, &err);
-if (err) {
-error_propagate(errp, err);
-return;
-}
+hotplug_handler_plug(HOTPLUG_HANDLER(lvms->ipi), dev, &error_abort);
 }
 
 if (lvms->extioi) {
-hotplug_handler_plug(HOTPLUG_HANDLER(lvms->extioi), dev, &err);
-if (err) {
-error_propagate(errp, err);
-return;
-}
+hotplug_handler_plug(HOTPLUG_HANDLER(lvms->extioi), dev, &error_abort);
 }
 
 if (lvms->acpi_ged) {
-hotplug_handler_plug(HOTPLUG_HANDLER(lvms->acpi_ged), dev, &err);
-if (err) {
-error_propagate(errp, err);
-}
+hotplug_handler_plug(HOTPLUG_HANDLER(lvms->acpi_ged), dev,
+ &error_abort);
 }
 
 cpu_slot = virt_find_cpu_slot(MACHINE(lvms), cpu->phy_id);
-- 
2.34.1




[PATCH v3 05/10] target/riscv: rvv: Apply vext_check_input_eew to OPIVV/OPFVV(vext_check_sss) instructions

2025-04-08 Thread Max Chou
Handle the overlap of source registers with different EEWs.

Co-authored-by: Anton Blanchard 
Reviewed-by: Daniel Henrique Barboza 
Signed-off-by: Max Chou 
---
 target/riscv/insn_trans/trans_rvv.c.inc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index e8197f779e0..2a4bededd1e 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -432,6 +432,7 @@ static bool vext_check_ss(DisasContext *s, int vd, int vs, 
int vm)
 static bool vext_check_sss(DisasContext *s, int vd, int vs1, int vs2, int vm)
 {
 return vext_check_ss(s, vd, vs2, vm) &&
+   vext_check_input_eew(s, vs1, s->sew, vs2, s->sew, vm) &&
require_align(vs1, s->lmul);
 }
 
-- 
2.43.0




[PATCH v6 1/4] vhost_vdpa : Add a new parameter to enable check mac address

2025-04-08 Thread Cindy Lu
When using a VDPA device, it's important to ensure that the MAC
address is correctly set.
Add a new parameter in qemu cmdline to enable this check, default value
is false

The usage is:

-netdev 
type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-0,id=vhost-vdpa0,check-mac=true\
-device virtio-net-pci,netdev=vhost-vdpa0\


Signed-off-by: Cindy Lu 
---
 include/net/net.h | 1 +
 net/vhost-vdpa.c  | 4 
 qapi/net.json | 5 +
 3 files changed, 10 insertions(+)

diff --git a/include/net/net.h b/include/net/net.h
index cdd5b109b0..fac1951b6e 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -112,6 +112,7 @@ struct NetClientState {
 bool is_netdev;
 bool do_not_pad; /* do not pad to the minimum ethernet frame length */
 bool is_datapath;
+bool check_mac;
 QTAILQ_HEAD(, NetFilterState) filters;
 };
 
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 7ca8b46eee..ba1da31741 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -1870,6 +1870,8 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char 
*name,
  iova_range, features, shared, errp);
 if (!ncs[i])
 goto err;
+
+ncs[i]->check_mac = opts->check_mac;
 }
 
 if (has_cvq) {
@@ -1882,6 +1884,8 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char 
*name,
  errp);
 if (!nc)
 goto err;
+
+nc->check_mac = opts->check_mac;
 }
 
 return 0;
diff --git a/qapi/net.json b/qapi/net.json
index 310cc4fd19..a5c70d1df8 100644
--- a/qapi/net.json
+++ b/qapi/net.json
@@ -510,6 +510,10 @@
 # @queues: number of queues to be created for multiqueue vhost-vdpa
 # (default: 1)
 #
+# @check-mac: Enable the check for whether the device's MAC address
+# and the MAC in QEMU command line are acceptable for booting.
+# (default: false)
+#
 # @x-svq: Start device with (experimental) shadow virtqueue.  (Since
 # 7.1) (default: false)
 #
@@ -524,6 +528,7 @@
 '*vhostdev': 'str',
 '*vhostfd':  'str',
 '*queues':   'int',
+'*check-mac':'bool',
 '*x-svq':{'type': 'bool', 'features' : [ 'unstable'] } } }
 
 ##
-- 
2.45.0




[PATCH v3 2/5] util/qemu-sockets: Refactor setting client sockopts into a separate function

2025-04-08 Thread Juraj Marcin
From: Juraj Marcin 

This is done in preparation for enabling the SO_KEEPALIVE support for
server sockets and adding settings for more TCP keep-alive socket
options.

Signed-off-by: Juraj Marcin 
---
 util/qemu-sockets.c | 29 +++--
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c
index 77477c1cd5..d15f6aa4b0 100644
--- a/util/qemu-sockets.c
+++ b/util/qemu-sockets.c
@@ -205,6 +205,22 @@ static int try_bind(int socket, InetSocketAddress *saddr, 
struct addrinfo *e)
 #endif
 }
 
+static int inet_set_sockopts(int sock, InetSocketAddress *saddr, Error **errp)
+{
+if (saddr->keep_alive) {
+int keep_alive = 1;
+int ret = setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
+ &keep_alive, sizeof(keep_alive));
+
+if (ret < 0) {
+error_setg_errno(errp, errno,
+ "Unable to set keep-alive option on socket");
+return -1;
+}
+}
+return 0;
+}
+
 static int inet_listen_saddr(InetSocketAddress *saddr,
  int port_offset,
  int num,
@@ -475,16 +491,9 @@ int inet_connect_saddr(InetSocketAddress *saddr, Error 
**errp)
 return sock;
 }
 
-if (saddr->keep_alive) {
-int val = 1;
-int ret = setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
- &val, sizeof(val));
-
-if (ret < 0) {
-error_setg_errno(errp, errno, "Unable to set KEEPALIVE");
-close(sock);
-return -1;
-}
+if (inet_set_sockopts(sock, saddr, errp)) {
+close(sock);
+return -1;
 }
 
 return sock;
-- 
2.48.1




Re: [PATCH 00/16] Add Multi-Core Debug (MCD) API support

2025-04-08 Thread Alex Bennée
Markus Armbruster  writes:

> Mario Fleischmann  writes:
>
>> Apologies for the line wrapping in yesterday's answer. Should be fixed now.
>>
>> On 08.04.2025 09:00, Markus Armbruster wrote:
>>> Mario Fleischmann  writes:
>>> 
 Thanks a lot for the response, I really appreciate your time.

 On 07.04.2025 14:33, Markus Armbruster wrote:

> Mario Fleischmann  writes:
>
>> This patch series introduces support for the Multi-Core Debug (MCD) API, 
>> a
>> commonly used debug interface by emulators. The MCD API, defined through 
>> a
>> header file, consists of 54 functions for implementing debug and trace.
>> However, since it is a header-file-only interface, MCD does not specify a
>> communication protocol. We get around this limitation by following a 
>> remote
>> procedure call approach using QMP. The client stub corresponding to this
>> implementation can be found at https://gitlab.com/lauterbach/mcdrefsrv
>>
>> This series is the successor to:
>> "[PATCH v5 00/18] first version of mcdstub"
>> (https://patchew.org/QEMU/20231220162555.19545-1-nicolas.e...@lauterbach.com/)
>>
>> * Architecture-independent MCD implementation
>> * QMP instead of custom TCP protocol
>
> Rationale?  There must be pros and cons.

 Assuming you're referring to the protocol of the previous patch series:
 The previous TCP protocol only supported a subset of MCD. As the 
 implementation progresses, the protocol eventually needs to be extended, 
 possibly resulting in backwards compatibility problems.
 Following an RPC approach and keeping the communication layer as close 
 to the MCD API as possible results in a larger protocol at first, but 
 does not need to be changed afterwards.
 By directly mapping MCD functions onto QMP commands, the complexity in 
 the server and client stubs can be minimized.

 Assuming you're referring to the QMP choice:
 QMP is being described as the "protocol which allows applications to 
 control a QEMU instance".
 It provides a RPC framework which automatically (de)serializes methods 
 and their parameters, even inside QTests.
 The whole interface is automatically documented.
>>> 
>>> Let's see whether I understand.
>>> 
>>> MCD is an established C interface.
>>> 
>>> Your goal is to provide remote MCD for QEMU, i.e. the client uses the
>>> MCD C interface, and the interface's implementation talks to an MCD
>>> server integrated into QEMU via some remote transport.
>>> 
>>> The previous version connects the two with a bespoke protocol via TCP.
>>> The client software translates between the C interface and this
>>> protocol.  QEMU implements the protocol's server side.  Designing and
>>> maintaining a protocol is expensive.
>>> 
>>> This versions makes two changes:
>>> 
>>> 1. Instead of layering a protocol on top of MCD, you use MCD directly.
>>> This eliminates protocol design and maintenance.  Moreover, translation
>>> becomes straightforward marshaling / unmarshaling for the transport.
>>> 
>>> 2. You use QMP as a transport.  This gets you marshaling / unmarshaling
>>> for free.  It also provides some useful infrastructure for tests,
>>> documentation and such.
>>> 
>>> Fair?
>>
>> Couldn't have put it better myself.
>>

>>> What about providing the MCD interface as a separate QMP-like protocol?
>>> It gets its own QAPI schema, just like for qemu-ga.  Simplifies
>>> compiling it out when not needed.
>>>
>>> It gets its own socket, just like the GDB stub.  Might reduce
>>> interference between debugging and QMP.
>>> 
>>> Thoughts?  Alex, Philippe, care to chime in?
>>
>> Sound reasonable to me. Keeping in mind the size of generated QAPI code,
>> an option to `./configure [...] --enable-mcd` is definitely advisable.
>
> Alex, Philippe?

When I spoke to Mario at DVCon last year I liked the idea of re-using
QMP instead of inventing yet another RPC interface for QEMU. QMP
certainly has nicer properties than the gdbstub which has a very
"organic" and "serial" feel to it.

Are you suggesting we re-use the machinery but use an entirely separate
socket with just the MCD namespace in it? I don't see that being a
problem as long as we can test it properly in the CI.

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro



[PULL 2/4] qcow2: Don't crash qemu-img info with missing crypto header

2025-04-08 Thread Kevin Wolf
qcow2_refresh_limits() assumes that s->crypto is non-NULL whenever
bs->encrypted is true. This is actually not the case: qcow2_do_open()
allows to open an image with a missing crypto header for BDRV_O_NO_IO,
and then bs->encrypted is true, but s->crypto is still NULL.

It doesn't make sense to open an invalid image, so remove the exception
for BDRV_O_NO_IO. This catches the problem early and any code that makes
the same assumption is safe now.

At the same time, in the name of defensive programming, we shouldn't
make the assumption in the first place. Let qcow2_refresh_limits() check
s->crypto rather than bs->encrypted. If s->crypto is NULL, it also can't
make any requirement on request alignment.

Finally, start a qcow2-encryption test case that only serves as a
regression test for this crash for now.

Reported-by: Leonid Reviakin 
Reported-by: Denis Rastyogin 
Signed-off-by: Kevin Wolf 
Message-ID: <20250318201143.70657-1-kw...@redhat.com>
Reviewed-by: Daniel P. Berrangé 
Signed-off-by: Kevin Wolf 
---
 block/qcow2.c |  4 +-
 tests/qemu-iotests/tests/qcow2-encryption | 75 +++
 tests/qemu-iotests/tests/qcow2-encryption.out | 32 
 3 files changed, 109 insertions(+), 2 deletions(-)
 create mode 100755 tests/qemu-iotests/tests/qcow2-encryption
 create mode 100644 tests/qemu-iotests/tests/qcow2-encryption.out

diff --git a/block/qcow2.c b/block/qcow2.c
index dd6bcafbd8..7774e7f090 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1721,7 +1721,7 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int 
flags,
 ret = -EINVAL;
 goto fail;
 }
-} else if (!(flags & BDRV_O_NO_IO)) {
+} else {
 error_setg(errp, "Missing CRYPTO header for crypt method %d",
s->crypt_method_header);
 ret = -EINVAL;
@@ -1976,7 +1976,7 @@ static void qcow2_refresh_limits(BlockDriverState *bs, 
Error **errp)
 {
 BDRVQcow2State *s = bs->opaque;
 
-if (bs->encrypted) {
+if (s->crypto) {
 /* Encryption works on a sector granularity */
 bs->bl.request_alignment = qcrypto_block_get_sector_size(s->crypto);
 }
diff --git a/tests/qemu-iotests/tests/qcow2-encryption 
b/tests/qemu-iotests/tests/qcow2-encryption
new file mode 100755
index 00..95f6195ab8
--- /dev/null
+++ b/tests/qemu-iotests/tests/qcow2-encryption
@@ -0,0 +1,75 @@
+#!/usr/bin/env bash
+# group: rw quick
+#
+# Test case for encryption support in qcow2
+#
+# Copyright (C) 2025 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+# creator
+owner=kw...@redhat.com
+
+seq="$(basename $0)"
+echo "QA output created by $seq"
+
+status=1   # failure is the default!
+
+_cleanup()
+{
+   _cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ../common.rc
+. ../common.filter
+
+# This tests qcow2-specific low-level functionality
+_supported_fmt qcow2
+_supported_proto file
+_require_working_luks
+
+IMG_SIZE=64M
+
+echo
+echo "=== Create an encrypted image ==="
+echo
+
+_make_test_img --object secret,id=sec0,data=123456 -o 
encrypt.format=luks,encrypt.key-secret=sec0 $IMG_SIZE
+$PYTHON ../qcow2.py "$TEST_IMG" dump-header-exts
+_img_info
+$QEMU_IMG check \
+--object secret,id=sec0,data=123456 \
+--image-opts file.filename="$TEST_IMG",encrypt.key-secret=sec0 \
+| _filter_qemu_img_check
+
+echo
+echo "=== Remove the header extension ==="
+echo
+
+$PYTHON ../qcow2.py "$TEST_IMG" del-header-ext 0x0537be77
+$PYTHON ../qcow2.py "$TEST_IMG" dump-header-exts
+_img_info
+$QEMU_IMG check \
+--object secret,id=sec0,data=123456 \
+--image-opts file.filename="$TEST_IMG",encrypt.key-secret=sec0 2>&1 \
+| _filter_qemu_img_check \
+| _filter_testdir
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/tests/qcow2-encryption.out 
b/tests/qemu-iotests/tests/qcow2-encryption.out
new file mode 100644
index 00..9b549dc2ab
--- /dev/null
+++ b/tests/qemu-iotests/tests/qcow2-encryption.out
@@ -0,0 +1,32 @@
+QA output created by qcow2-encryption
+
+=== Create an encrypted image ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+Header extension:
+magic 0x537be77 (Crypto header)
+length16
+data   

Re: [PATCH] ui/curses: Fix infinite loop on windows

2025-04-08 Thread Stefan Hajnoczi
On Thu, Apr 03, 2025 at 01:07:56AM +, William Hu via wrote:
> >From a42046272f0544dd18ed58661e53ea17d1584c2c Mon Sep 17 00:00:00 2001
> From: William Hu 
> Date: Wed, 2 Apr 2025 12:00:00 -0400
> Subject: [PATCH] ui/curses: Fix infinite loop on windows
> 
> Replace -1 comparisons for wint_t with WEOF to fix infinite loop caused by a
> 65535 == -1 comparison.
> 
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2905
> Signed-off-by: William Hu 
> ---
>  ui/curses.c | 10 --
>  1 file changed, 8 insertions(+), 2 deletions(-)

I have CCed Gerd Hoffmann (git-shortlog(1) shows he is the most frequent
committer to this source file) and Marc-André Lureau (ui/ maintainer
according to the ./MAINTAINERS file) so they can also review your patch.

> 
> diff --git a/ui/curses.c b/ui/curses.c
> index a39aee8762..3f5c5adf78 100644
> --- a/ui/curses.c
> +++ b/ui/curses.c
> @@ -265,7 +265,12 @@ static int curses2foo(const int _curses2foo[], const int 
> _curseskey2foo[],
>  
>  static void curses_refresh(DisplayChangeListener *dcl)
>  {
> -int chr, keysym, keycode, keycode_alt;
> +/*
> + * DO NOT MAKE chr AN INT:
> + * Causes silent conversion errors on Windows where wint_t is unsigned 
> short.
> + */
> +wint_t chr = 0;
> +int keysym, keycode, keycode_alt;
>  enum maybe_keycode maybe_keycode = CURSES_KEYCODE;
>  
>  curses_winch_check();
> @@ -284,8 +289,9 @@ static void curses_refresh(DisplayChangeListener *dcl)
>  /* while there are any pending key strokes to process */
>  chr = console_getch(&maybe_keycode);
>  
> -if (chr == -1)
> +if (chr == WEOF) {
>  break;
> +}

Further below there appears to be another instance of the same bug:

  /* alt or esc key */
  if (keycode == 1) {
  enum maybe_keycode next_maybe_keycode = CURSES_KEYCODE;
  int nextchr = console_getch(&next_maybe_keycode);

  if (nextchr != -1) {
  ^

>  
>  #ifdef KEY_RESIZE
>  /* this shouldn't occur when we use a custom SIGWINCH handler */
> -- 
> 2.47.0
> 
> 


signature.asc
Description: PGP signature


Re: [PATCH] Revert "virtio-net: Copy received header to buffer"

2025-04-08 Thread Stefan Hajnoczi
On Fri, Apr 04, 2025 at 05:18:21PM +0200, Antoine Damhet wrote:
> This reverts commit 7987d2be5a8bc3a502f89ba8cf3ac3e09f64d1ce.
> 
> The goal was to remove the need to patch the (const) input buffer
> with a recomputed UDP checksum by copying headers to a RW region and
> inject the checksum there. The patch computed the checksum only from the
> header fields (missing the rest of the payload) producing an invalid one
> and making guests fail to acquire a DHCP lease.
> 
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2727
> Cc: qemu-sta...@nongnu.org
> Signed-off-by: Antoine Damhet 
> ---
>  hw/net/virtio-net.c | 85 +
>  1 file changed, 39 insertions(+), 46 deletions(-)

This patch fails to apply due to a conflict with:

  commit c17ad4b11bd268a35506cd976884562df6ca69d7
  Author: Akihiko Odaki 
  Date:   Wed Jan 8 21:13:29 2025 +0900

virtio-net: Fix num_buffers for version 1

Please rebase.

Michael or Jason: Are you still sending a pull request for 10.0.0-rc3?
It's being tagged today.

Stefan

> 
> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> index de87cfadffe1..028e7e873c42 100644
> --- a/hw/net/virtio-net.c
> +++ b/hw/net/virtio-net.c
> @@ -1702,44 +1702,41 @@ static void virtio_net_hdr_swap(VirtIODevice *vdev, 
> struct virtio_net_hdr *hdr)
>   * cache.
>   */
>  static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
> -size_t *hdr_len, const uint8_t *buf,
> -size_t buf_size, size_t *buf_offset)
> +uint8_t *buf, size_t size)
>  {
>  size_t csum_size = ETH_HLEN + sizeof(struct ip_header) +
> sizeof(struct udp_header);
>  
> -buf += *buf_offset;
> -buf_size -= *buf_offset;
> -
>  if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
> -(buf_size >= csum_size && buf_size < 1500) && /* normal sized MTU */
> +(size >= csum_size && size < 1500) && /* normal sized MTU */
>  (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
>  (buf[23] == 17) && /* ip.protocol == UDP */
>  (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
> -memcpy((uint8_t *)hdr + *hdr_len, buf, csum_size);
> -net_checksum_calculate((uint8_t *)hdr + *hdr_len, csum_size, 
> CSUM_UDP);
> +net_checksum_calculate(buf, size, CSUM_UDP);
>  hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
> -*hdr_len += csum_size;
> -*buf_offset += csum_size;
>  }
>  }
>  
> -static size_t receive_header(VirtIONet *n, struct virtio_net_hdr *hdr,
> - const void *buf, size_t buf_size,
> - size_t *buf_offset)
> +static void receive_header(VirtIONet *n, const struct iovec *iov, int 
> iov_cnt,
> +   const void *buf, size_t size)
>  {
> -size_t hdr_len = n->guest_hdr_len;
> -
> -memcpy(hdr, buf, sizeof(struct virtio_net_hdr));
> -
> -*buf_offset = n->host_hdr_len;
> -work_around_broken_dhclient(hdr, &hdr_len, buf, buf_size, buf_offset);
> +if (n->has_vnet_hdr) {
> +/* FIXME this cast is evil */
> +void *wbuf = (void *)buf;
> +work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
> +size - n->host_hdr_len);
>  
> -if (n->needs_vnet_hdr_swap) {
> -virtio_net_hdr_swap(VIRTIO_DEVICE(n), hdr);
> +if (n->needs_vnet_hdr_swap) {
> +virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
> +}
> +iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
> +} else {
> +struct virtio_net_hdr hdr = {
> +.flags = 0,
> +.gso_type = VIRTIO_NET_HDR_GSO_NONE
> +};
> +iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
>  }
> -
> -return hdr_len;
>  }
>  
>  static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
> @@ -1907,13 +1904,6 @@ static int virtio_net_process_rss(NetClientState *nc, 
> const uint8_t *buf,
>  return (index == new_index) ? -1 : new_index;
>  }
>  
> -typedef struct Header {
> -struct virtio_net_hdr_v1_hash virtio_net;
> -struct eth_header eth;
> -struct ip_header ip;
> -struct udp_header udp;
> -} Header;
> -
>  static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
>size_t size)
>  {
> @@ -1923,15 +1913,15 @@ static ssize_t virtio_net_receive_rcu(NetClientState 
> *nc, const uint8_t *buf,
>  VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
>  size_t lens[VIRTQUEUE_MAX_SIZE];
>  struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
> -Header hdr;
> +struct virtio_net_hdr_v1_hash extra_hdr;
>  unsigned mhdr_cnt = 0;
>  size_t offset, i, guest_offset, j;
>  ssize_t err;
>  
> -memset(&hdr.virtio_net, 0, sizeof(hdr.virtio_net));

[PULL 0/2] loongarch bug fix for 10.0

2025-04-08 Thread Song Gao
The following changes since commit dfaecc04c46d298e9ee81bd0ca96d8754f1c27ed:

  Merge tag 'pull-riscv-to-apply-20250407-1' of 
https://github.com/alistair23/qemu into staging (2025-04-07 09:18:33 -0400)

are available in the Git repository at:

  https://github.com/gaosong715/qemu.git tags/pull-loongarch-20250408

for you to fetch changes up to fa0dde12db045ddc84f69b1aa0a4c98d176c903d:

  hw/loongarch/virt: Replace destination error with error_abort (2025-04-08 
14:44:53 +0800)


loongarch bug fix for 10.0


Bibo Mao (2):
  hw/loongarch/virt: Fix cpuslot::cpu set at last in virt_cpu_plug()
  hw/loongarch/virt: Replace destination error with error_abort

 hw/loongarch/virt.c | 43 +--
 1 file changed, 9 insertions(+), 34 deletions(-)




Re: [PATCH 00/16] Add Multi-Core Debug (MCD) API support

2025-04-08 Thread Alex Bennée
Markus Armbruster  writes:

> Alex Bennée  writes:
>
>> Markus Armbruster  writes:
>>
>>> Mario Fleischmann  writes:
>>>
 Apologies for the line wrapping in yesterday's answer. Should be fixed now.

 On 08.04.2025 09:00, Markus Armbruster wrote:
>
> [...]
>
> What about providing the MCD interface as a separate QMP-like protocol?
> It gets its own QAPI schema, just like for qemu-ga.  Simplifies
> compiling it out when not needed.
>
> It gets its own socket, just like the GDB stub.  Might reduce
> interference between debugging and QMP.
> 
> Thoughts?  Alex, Philippe, care to chime in?

 Sound reasonable to me. Keeping in mind the size of generated QAPI code,
 an option to `./configure [...] --enable-mcd` is definitely advisable.
>>>
>>> Alex, Philippe?
>>
>> When I spoke to Mario at DVCon last year I liked the idea of re-using
>> QMP instead of inventing yet another RPC interface for QEMU. QMP
>> certainly has nicer properties than the gdbstub which has a very
>> "organic" and "serial" feel to it.
>>
>> Are you suggesting we re-use the machinery but use an entirely separate
>> socket with just the MCD namespace in it? I don't see that being a
>> problem as long as we can test it properly in the CI.
>
> Yes.
>
> "Keep them separate" is only a gut feeling, though.  While I pay
> attention to my gut feelings, I know they can be wrong.  I am soliciting
> opinions.

I forgot to add isn't the flexibility of the QMP API something we need
to handle for single binary anyway?

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro



[PULL 02/11] qapi/rocker: Tidy up query-rocker-of-dpa-flows example

2025-04-08 Thread Markus Armbruster
The command can return any number of RockerOfDpaFlow objects.  The
example shows it returning exactly two, with the second object's
members elided.  Tweak it so it elides elements after the first
instead.

Signed-off-by: Markus Armbruster 
Message-ID: <20250404121413.1743790-3-arm...@redhat.com>
Reviewed-by: Eric Blake 
[Commit message typo fixed]
---
 qapi/rocker.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/qapi/rocker.json b/qapi/rocker.json
index 51aa5b4930..0c7ef1f77c 100644
--- a/qapi/rocker.json
+++ b/qapi/rocker.json
@@ -254,7 +254,7 @@
 #   "action": {"goto-tbl": 10},
 #   "mask": {"in-pport": 4294901760}
 #  },
-#  {...},
+#  ...
 #]}
 ##
 { 'command': 'query-rocker-of-dpa-flows',
-- 
2.48.1




Re: [PATCH] hw/nvme: fix attachment of private namespaces

2025-04-08 Thread alan . adamson



On 4/8/25 3:20 AM, Klaus Jensen wrote:

From: Klaus Jensen 

Fix regression when attaching private namespaces that gets attached to
the wrong controller.

Keep track of the original controller "owner" of private namespaces, and
only attach if this matches on controller enablement.


Tested-by: Alan Adamson 

Reviewed-by: Alan Adamson 




Re: [PATCH] scripts/checkpatch: Fix typo in SPDX-License-Identifier keyword

2025-04-08 Thread Alex Bennée
Zhao Liu  writes:

> Fix the typo in the error message to help `grep` the example:
>
> ERROR: New file '***' requires 'SPDX-License-Identifer'
>
> Fixes: fa4d79c64dae ("scripts: mandate that new files have 
> SPDX-License-Identifier")
> Signed-off-by: Zhao Liu 

Reviewed-by: Alex Bennée 

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro



[PATCH v3 06/10] target/riscv: rvv: Apply vext_check_input_eew to vector slide instructions(OPIVI/OPIVX)

2025-04-08 Thread Max Chou
Handle the overlap of source registers with different EEWs.

Co-authored-by: Anton Blanchard 
Reviewed-by: Daniel Henrique Barboza 
Signed-off-by: Max Chou 
---
 target/riscv/insn_trans/trans_rvv.c.inc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index 2a4bededd1e..d72792e46a7 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -638,7 +638,9 @@ static bool vext_check_slide(DisasContext *s, int vd, int 
vs2,
 {
 bool ret = require_align(vs2, s->lmul) &&
require_align(vd, s->lmul) &&
-   require_vm(vm, vd);
+   require_vm(vm, vd) &&
+   vext_check_input_eew(s, -1, 0, vs2, s->sew, vm);
+
 if (is_over) {
 ret &= (vd != vs2);
 }
-- 
2.43.0




[PATCH v3 01/10] target/riscv: rvv: Source vector registers cannot overlap mask register

2025-04-08 Thread Max Chou
From: Anton Blanchard 

Add the relevant ISA paragraphs explaining why source (and destination)
registers cannot overlap the mask register.

Signed-off-by: Anton Blanchard 
Reviewed-by: Daniel Henrique Barboza 
Reviewed-by: Max Chou 
Signed-off-by: Max Chou 
---
 target/riscv/insn_trans/trans_rvv.c.inc | 29 ++---
 1 file changed, 26 insertions(+), 3 deletions(-)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index b9883a5d323..20b1cb127b4 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -100,10 +100,33 @@ static bool require_scale_rvfmin(DisasContext *s)
 }
 }
 
-/* Destination vector register group cannot overlap source mask register. */
-static bool require_vm(int vm, int vd)
+/*
+ * Source and destination vector register groups cannot overlap source mask
+ * register:
+ *
+ * A vector register cannot be used to provide source operands with more than
+ * one EEW for a single instruction. A mask register source is considered to
+ * have EEW=1 for this constraint. An encoding that would result in the same
+ * vector register being read with two or more different EEWs, including when
+ * the vector register appears at different positions within two or more vector
+ * register groups, is reserved.
+ * (Section 5.2)
+ *
+ * A destination vector register group can overlap a source vector
+ * register group only if one of the following holds:
+ *  1. The destination EEW equals the source EEW.
+ *  2. The destination EEW is smaller than the source EEW and the overlap
+ * is in the lowest-numbered part of the source register group.
+ *  3. The destination EEW is greater than the source EEW, the source EMUL
+ * is at least 1, and the overlap is in the highest-numbered part of
+ * the destination register group.
+ * For the purpose of determining register group overlap constraints, mask
+ * elements have EEW=1.
+ * (Section 5.2)
+ */
+static bool require_vm(int vm, int v)
 {
-return (vm != 0 || vd != 0);
+return (vm != 0 || v != 0);
 }
 
 static bool require_nf(int vd, int nf, int lmul)
-- 
2.43.0




Re: [PATCH 2/3] add vnc h264 encoder

2025-04-08 Thread Dietmar Maurer
> > > > +#include 
> > > > +
> > > > +static void libavcodec_destroy_encoder_context(VncState *vs)
> > >
> > > it's not libavcodec.
> >
> > I will fix that in v2.
> 
> What about encodebin suggestion?

I found no way to configure codec specific option (i.e. x264 zerolatency). I 
there a way? It is crucial to set those option
to get reasonable quality.

Anyway, I can test a view option, for example x264, openh264, and
maybe vah264enc (hardware support), and then simply use what is available.

I think we should also have the option to allow h264 if we detect
HW support, but disable software encoders. Or let the user select
the list of allowed codec?

> >
> > > > +#ifdef CONFIG_GSTREAMER
> > > > +case VNC_ENCODING_H264:
> > > > +if (vnc_h264_encoder_init(vs) == 0) {
> > > > +vnc_set_feature(vs, VNC_FEATURE_H264);
> > >
> > > Before advertising support for the codec, it should actually check if
> > > the encoder is present.
> >
> > ok.
> >
> > > It would also be useful to have an extra VNC
> > > option like H264=on/off/auto.
> >
> > I thought it would be better to do that at the client?
> 
> Well, it can be worth it to prevent h264 usage from the server too. Or
> to ensure the server is h264-capable. (this wasn't seen as much
> necessary for other codecs that are low-resource and/or patent-free,
> but may make sense too)

Maybe H264=on,off,codec-list

on: automatically select the codec
off: disable h264
codec-list: a list of allowed codecs


- Dietmar




Configuring onboard devices, in particular memory contents (was: [PATCH v1 0/1] hw/misc/aspeed_sbc: Implement OTP memory and controller)

2025-04-08 Thread Markus Armbruster
Cédric Le Goater  writes:

> Hello Kane,
>
> + Markus (for ebc29e1beab0 implementation)
>
> On 4/7/25 09:33, Kane Chen wrote:
>> Hi Cédric/Philippe,
>> OTP (One-Time Programmable) memory is a type of non-volatile memory
>> in which each bit can be programmed only once. It is typically used
>> to store critical and permanent information, such as the chip ID and
>> secure boot keys. The structure and behavior of OTP memory are
>> consistent across both the AST1030 and AST2600 platforms.
>> As Philippe pointed out, this proposal models the OTP memory as a
>> flash device and utilizes a block backend for persistent storage. In
>> contrast, existing implementations such as NPCM7xxOTPState,
>> BCM2835OTPState, and SiFiveUOTPState expose OTP memory via MMIO and
>> always initialize it in a blank state. 
>
> AFAIU, Aspeed SBC is also MMIO based or is there another device,
> an eeprom, accessible through an external bus ? How is it
> implemented in HW ?
>
>> The goal of this design is to
>> allow the guest system to boot with a pre-configured OTP memory
>> state. 
>
> Yes. This is a valid request. It's not the first time we've had
> this kind of requests. The initial content of EEPROM devices are
> an example and some machines, like the rainier, have a lot.
>
> If the device can be defined on the command line, like would be
> an EEPROM device attached to an I2C bus or a flash device attached
> to a SPI bus, we can use a 'drive' property. Something like :
>
>   qemu-system-arm -M ast2600-evb \
>   -blockdev node-name=fmc0,driver=file,filename=/path/to/fmc0.img \
>   -device mx66u51235f,bus=ssi.0,cs=0x0,drive=fmc0 \
>   -blockdev node-name=fmc1,driver=file,filename=/path/to/fmc1.img \
>   -device mx66u51235f,bus=ssi.0,cs=0x1,drive=fmc1 \
>   -blockdev node-name=spi1,driver=file,filename=/path/to/spi1.img \
>   -device mx66u51235f,cs=0x0,bus=ssi.1,drive=spi1 \
>   ...
>
> However, the Aspeed SBC device is a platform device and it makes
> things more complex : it can not be created on the command line,
> it is directly created by the machine and the soc and passing
> device properties to specify a blockdev it is not possible :
>
>   qemu-system-arm -M ast2600-evb \
>   -blockdev node-name=otpmem,driver=file,filename=/path/to/otpmem.img \
>   -device aspeed-sbc,drive=otpmem \
>   ...

Configuring onboard devices is an old problem, and so far we have failed
at solving it adequately.

-device / device_add let you configure the new device in a general way,
but these work only for device the user creates, not for devices the
board creates automatically.

We have a bunch of ad hoc and mostly ancient ways to configure them, but
they're all limited.  For example:

* A number of old command line options, such as -drive, -serial, -net
  nic, create device backends and additionally deposit configuration in
  some global table the board may elect to use however it sees fit.  The
  intended use is to create frontends connected to these backends.

  Some boards error out when they can't honor something in the table.
  Others silently ignore parts of the table, or all of it.  Bad UI.

  Device configuration the table doesn't support is not accessible this
  way.  If you extend the table (and the associated option) to provide
  access to some device-specific configuration, all the other devices
  will silently ignore the new configuration bits.  Again, bad UI.

  There's another serious issue with block devices: -drive is obsolete
  for configurating complex block backends.  But its replacement
  -blockdev is for backend configuration only.  If you use -blockdev,
  you can't add to the table.

* Command line option -global lets you change property defaults.  This
  can be used to configure an onboard device as long as it is the only
  such device in the system.  Limited use, and also bad UI.

A modern attempt at a solution is to have machine properties alias
properties of onboard devices, so you can specify them with -machine.
For instance, a few machines expose the "drive" property of two onboard
pflash devices as machine properties "pflash0" and "pflash1".

Commits

e0561e60f170 (hw/arm/virt: Support firmware configuration with -blockdev)
ebc29e1beab0 (pc: Support firmware configuration with -blockdev) 

explain this in a lot more detail in their commit messages.

Sadly, this solution does not scale.  Adding alias properties to the
machine object is work, sometimes a lot of work (evidence: the two
commits above).  There are simply too many onboard devices with too many
properties to all manually alias.

Of course, even an insufficiently general / scalable solution like this
one can work well enough for specific cases.

>> To support this, the OTP memory is backed by a file,
>> simulating persistent flash behavior.
>
> The idea is good but the implementation is problematic.
>
> +static BlockBackend *init_otpmem(int64_t size_bytes)
> +{
> +Error *local_err = NULL;
> +

Re: [PATCH v3 2/2] vfio/spapr: Fix L2 crash with PCI device passthrough and memory > 128G

2025-04-08 Thread Cédric Le Goater

On 4/8/25 14:40, Amit Machhiwal wrote:

An L2 KVM guest fails to boot inside a pSeries LPAR when booted with a
memory more than 128 GB and PCI device passthrough. The L2 guest also
crashes when it is booted with a memory greater than 128 GB and a PCI
device is hotplugged later.

The issue arises from a conditional check for `levels > 1` in
`spapr_tce_create_table()` within L1 KVM. This check is meant to prevent
multi-level TCEs, which are not supported by the PowerVM hypervisor. As
a result, when QEMU makes a `VFIO_IOMMU_SPAPR_TCE_CREATE` ioctl call
with `levels > 1`, it triggers the conditional check and returns
`EINVAL`, causing the guest to crash with the following errors:

  2025-03-04T06:36:36.133117Z qemu-system-ppc64: Failed to create a window, ret 
= -1 (Invalid argument)
  2025-03-04T06:36:36.133176Z qemu-system-ppc64: Failed to create SPAPR window: 
Invalid argument
  qemu: hardware error: vfio: DMA mapping failed, unable to continue

Fix this by checking the supported DDW "levels" returned by the
VFIO_IOMMU_SPAPR_TCE_GET_INFO ioctl before attempting the TCE create
ioctl in KVM.

The patch has been tested on KVM guests with memory configurations of up
to 390GB, and 450GB on PowerVM and bare-metal environments respectively.

Signed-off-by: Amit Machhiwal 



Reviewed-by: Cédric Le Goater 

Thanks,

C.



---
  hw/vfio/spapr.c | 36 +++-
  1 file changed, 27 insertions(+), 9 deletions(-)

diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c
index dd9207679dbe..32611096fa29 100644
--- a/hw/vfio/spapr.c
+++ b/hw/vfio/spapr.c
@@ -26,6 +26,7 @@ typedef struct VFIOSpaprContainer {
  VFIOContainer container;
  MemoryListener prereg_listener;
  QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
+unsigned int levels;
  } VFIOSpaprContainer;
  
  OBJECT_DECLARE_SIMPLE_TYPE(VFIOSpaprContainer, VFIO_IOMMU_SPAPR);

@@ -236,9 +237,11 @@ static bool vfio_spapr_create_window(VFIOContainer 
*container,
  {
  int ret = 0;
  VFIOContainerBase *bcontainer = &container->bcontainer;
+VFIOSpaprContainer *scontainer = container_of(container, 
VFIOSpaprContainer,
+  container);
  IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr);
  uint64_t pagesize = memory_region_iommu_get_min_page_size(iommu_mr), 
pgmask;
-unsigned entries, bits_total, bits_per_level, max_levels;
+unsigned entries, bits_total, bits_per_level, max_levels, ddw_levels;
  struct vfio_iommu_spapr_tce_create create = { .argsz = sizeof(create) };
  long rampagesize = qemu_minrampagesize();
  
@@ -291,16 +294,29 @@ static bool vfio_spapr_create_window(VFIOContainer *container,

   */
  bits_per_level = ctz64(qemu_real_host_page_size()) + 8;
  create.levels = bits_total / bits_per_level;
-if (bits_total % bits_per_level) {
-++create.levels;
-}
-max_levels = (64 - create.page_shift) / ctz64(qemu_real_host_page_size());
-for ( ; create.levels <= max_levels; ++create.levels) {
-ret = ioctl(container->fd, VFIO_IOMMU_SPAPR_TCE_CREATE, &create);
-if (!ret) {
-break;
+
+ddw_levels = scontainer->levels;
+if (ddw_levels > 1) {
+if (bits_total % bits_per_level) {
+++create.levels;
  }
+max_levels = (64 - create.page_shift) / 
ctz64(qemu_real_host_page_size());
+for ( ; create.levels <= max_levels; ++create.levels) {
+ret = ioctl(container->fd, VFIO_IOMMU_SPAPR_TCE_CREATE, &create);
+if (!ret) {
+break;
+}
+}
+} else { /* ddw_levels == 1 */
+if (create.levels > ddw_levels) {
+error_setg_errno(errp, EINVAL, "Host doesn't support multi-level TCE 
tables"
+ ". Use larger IO page size. Supported mask is 
0x%lx",
+ bcontainer->pgsizes);
+return false;
+}
+ret = ioctl(container->fd, VFIO_IOMMU_SPAPR_TCE_CREATE, &create);
  }
+
  if (ret) {
  error_setg_errno(errp, errno, "Failed to create a window, ret = %d", 
ret);
  return false;
@@ -501,6 +517,8 @@ static bool vfio_spapr_container_setup(VFIOContainerBase 
*bcontainer,
  goto listener_unregister_exit;
  }
  
+scontainer->levels = info.ddw.levels;

+
  if (v2) {
  bcontainer->pgsizes = info.ddw.pgsizes;
  /*





Re: Query on the dirty bitmap

2025-04-08 Thread prashant patil
 Thank you, Eric, for the thorough information—truly appreciate it.

Just to confirm what I understood, when we are reading a bitmap with
'x-dirty-bitmap' (for powered on vm of course), the 'start' is always a
logical offset no matter whether the record has 'offset' value or not. Is
this correct?

Also, I came across a case wherein we get the entire disk as allocated for
a raw format disk which is present on lvm or lvm-thin storage (the disk has
just a few MB data added, and the vm is in running state). Here is an
example of 1Gb data. Is this expected behaviour?
[{ "start": 0, "length": 1073741824, "depth": 0, "present": true, "zero":
false, "data": true, "compressed": false, "offset": 0}]

Regards,
Prashant

On Mon, Apr 7, 2025 at 8:24 PM Eric Blake  wrote:

> On Mon, Apr 07, 2025 at 02:46:17PM +0530, prashant patil wrote:
> > Thanks Eric.
>
> [top-posting makes conversations harder to follow, so on this list we
> typically reply inline]
>
> > I have a few questions about the bitmap content shown by 'qemu-img map'.
> > From below sample bitmap data:
> > 1. Why only some of the extents have start and offset values? And why are
> > they the same values?
> > 2. What does the start value indicate? Is it logical offset or physical
> > offset of data into qcow2?
>
> Normally (when there is no x-dirty-bitmap in play), 'start' denotes
> the logical offset being reported on (you'd expect a map to list every
> logical offset; so the start of entry N+1 should be the sum of start +
> length of entry N), while 'offset' is where that extent begins in the
> underlying file.  For a raw source, offset and start will be
> identical; for other sources, like qcow2, start is obviously logical,
> while offset is physical.  Entries without 'offset' are places where
> the logical contents are compressed, synthesized, or otherwise have no
> 1:1 correspondence to an offset in the physical file.  'present'
> indicates whether the data is synthesized or not; 'offset' is going to
> be absent if 'present' is false; although it can also be absent even
> when 'present' is true such as in the case of compression.
>
> When it comes to exposing a qcow2 file over NBD, you generally want to
> have:
>
> qemu-nbd using '-f qcow2' => raw view => qemu-img using '-f raw'
>
> to expose only the logical contents over the wire.  It is also
> possible to flip the responsibility:
>
> qemu-nbd using '-f raw' => qcow2 view => qemu-img using '-f qcow2'
>
> to expose the bare-metal qcow2 contents over the wire, but that gets
> less testing, in part because if you make the image writable, it tends
> to cause problems if the client writing to the qcow2 layer needs to
> allocate (since NBD does not have a resize command).  Having the
> server open the file as qcow2 and only serving raw contents means the
> server can resize transparently.  What's more, block status commands
> over NBD only work when the server is aware of the qcow2 nature of the
> file it is serving (if you expose qcow2 bits over the wire, the server
> treats the entire file as allocated, and there is is no dirty bitmap
> context for the client to read over NBD).
>
> Therefore, 'start' and 'offset' are going to be identical if you are
> using qemu-img map to read bitmaps from a server, since bitmaps can
> only be read when the wire has the raw view (not the qcow2 view).
>
> But when you add x-dirty-bitmap into the mix, you are asking qemu to
> DISREGARD the normal rules of backing file information and instead
> report on dirty bitmap information as if it were backing file info.
> This means that anywhere the dirty bitmap response differs from a
> normal backing file response, the output of qemu-img is reporting
> garbage data (for example, the 'present' bit is now bogus, which
> explains why the rows where qemu-img claims 'present' is false omit an
> 'offset').  Only 'start', 'length', and 'data' matter when using
> x-dirty-bitmap, with 'data=false' meaning the section was reported
> dirty, and 'data=true' meaning the section was reported unchanged.
>
> And if that is confusing, well yeah. Which is why I recommend using
> libnbd's nbdinfo --map, where the output is more obvious.
>
> >
> > root@be-proxmox1:/# qemu-img map --output=json --image-opts
> > "$IMG,x-dirty-bitmap=qemu:dirty-bitmap:bitmap1"
> >   [{ "start": 0, "length": 196608, "depth": 0, "present": true, "zero":
> > false, "data": true, "compressed": false, "offset": 0},
>
> The extent starting at logical offset 0 and lasting 196608 bytes is
> unchanged.
>
> > { "start": 196608, "length": 65536, "depth": 0, "present": false, "zero":
> > false, "data": false, "compressed": false},
>
> The extent starting at 196608 and lasting 65536 bytes is dirty
> (altered since the point in time when the bitmap was created).
>
> > > > qemu-img
> > > > map with x-dirty-bitmap image-opts.
> > >
> > > Here, this works, but feels like a hack, because it is relying on the
> > > x-dirty-bitmap feature of qemu.  The libnbd project ships with an
> > > ap

[PATCH v1 12/24] hw/s390x/ipl: Add IPIB flags to IPL Parameter Block

2025-04-08 Thread Zhuoying Cai
Add IPIB flags to IPL Parameter Block to determine if IPL needs to
perform securely and if IPL Information Report Block (IIRB) exists.

Secure boot in audit mode will perform if certificate(s) exist in the
key store. IIRB will exist and results of verification will be stored in
IIRB.

Signed-off-by: Zhuoying Cai 
---
 hw/s390x/ipl.c  | 20 
 include/hw/s390x/ipl/qipl.h |  6 +-
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c
index b0810c9191..59ec81181d 100644
--- a/hw/s390x/ipl.c
+++ b/hw/s390x/ipl.c
@@ -431,6 +431,13 @@ S390IPLCertificateStore 
*s390_ipl_get_certificate_store(void)
 return &ipl->cert_store;
 }
 
+static bool s390_has_certificate(void)
+{
+S390IPLState *ipl = get_ipl_device();
+
+return ipl->cert_store.count > 0;
+}
+
 static bool s390_build_iplb(DeviceState *dev_st, IplParameterBlock *iplb)
 {
 CcwDevice *ccw_dev = NULL;
@@ -488,6 +495,19 @@ static bool s390_build_iplb(DeviceState *dev_st, 
IplParameterBlock *iplb)
 s390_ipl_convert_loadparm((char *)lp, iplb->loadparm);
 iplb->flags |= DIAG308_FLAGS_LP_VALID;
 
+/*
+ * Secure boot in audit mode will perform
+ * if certificate(s) exist in the key store.
+ *
+ * IPL Information Report Block (IIRB) will exist
+ * for secure boot in audit mode.
+ *
+ * Results of secure boot will be stored in IIRB.
+ */
+if (s390_has_certificate()) {
+iplb->hdr_flags |= DIAG308_IPIB_FLAGS_IPLIR;
+}
+
 return true;
 }
 
diff --git a/include/hw/s390x/ipl/qipl.h b/include/hw/s390x/ipl/qipl.h
index b8e7d1da71..2355fcecbb 100644
--- a/include/hw/s390x/ipl/qipl.h
+++ b/include/hw/s390x/ipl/qipl.h
@@ -23,6 +23,9 @@
 #define MAX_CERTIFICATES 64
 #define CERT_MAX_SIZE (1024 * 8)
 
+#define DIAG308_IPIB_FLAGS_SIPL 0x40
+#define DIAG308_IPIB_FLAGS_IPLIR 0x20
+
 /*
  * The QEMU IPL Parameters will be stored at absolute address
  * 204 (0xcc) which means it is 32-bit word aligned but not
@@ -104,7 +107,8 @@ typedef struct IplBlockQemuScsi IplBlockQemuScsi;
 union IplParameterBlock {
 struct {
 uint32_t len;
-uint8_t  reserved0[3];
+uint8_t  hdr_flags;
+uint8_t  reserved0[2];
 uint8_t  version;
 uint32_t blk0_len;
 uint8_t  pbt;
-- 
2.49.0




[PATCH v3 00/10] Fix RVV encoding corner cases

2025-04-08 Thread Max Chou
This patch series fixes several corner cases of RISC-V vector
instruction's encoding constraints.

This v3 series addresses:
- Merge v2 patches (3 & 4, 9 & 10)
- Remove extra blank line in v2 patch 5
- Remove redundant co-authored-by tags

Thank for Daniel Henrique Barboza's suggestions and review.

Anton Blanchard (2):
  target/riscv: rvv: Source vector registers cannot overlap mask
register
  target/riscv: rvv: Add CHECK arg to GEN_OPFVF_WIDEN_TRANS

Max Chou (8):
  target/riscv: rvv: Apply vext_check_input_eew to vrgather instructions
to check mismatched input EEWs encoding constraint
  target/riscv: rvv: Apply vext_check_input_eew to
OPIVI/OPIVX/OPFVF(vext_check_ss) instructions
  target/riscv: rvv: Apply vext_check_input_eew to
OPIVV/OPFVV(vext_check_sss) instructions
  target/riscv: rvv: Apply vext_check_input_eew to vector slide
instructions(OPIVI/OPIVX)
  target/riscv: rvv: Apply vext_check_input_eew to vector integer
extension instructions(OPMVV)
  target/riscv: rvv: Apply vext_check_input_eew to vector narrow/widen
instructions
  target/riscv: rvv: Apply vext_check_input_eew to vector indexed
load/store instructions
  target/riscv: Fix the rvv reserved encoding of unmasked instructions

 target/riscv/insn32.decode |  18 +--
 target/riscv/insn_trans/trans_rvbf16.c.inc |   9 +-
 target/riscv/insn_trans/trans_rvv.c.inc| 166 +
 3 files changed, 153 insertions(+), 40 deletions(-)

-- 
2.43.0




Re: [PATCH] hw/nvme: fix attachment of private namespaces

2025-04-08 Thread Keith Busch
On Tue, Apr 08, 2025 at 12:20:46PM +0200, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Fix regression when attaching private namespaces that gets attached to
> the wrong controller.
> 
> Keep track of the original controller "owner" of private namespaces, and
> only attach if this matches on controller enablement.

Looks good.

Reviewed-by: Keith Busch 



[PATCH v3 5/5] utils/qemu-sockets: Introduce inet socket options controlling TCP keep-alive

2025-04-08 Thread Juraj Marcin
From: Juraj Marcin 

With the default TCP stack configuration, it could be even 2 hours
before the connection times out due to the other side not being
reachable. However, in some cases, the application needs to be aware of
a connection issue much sooner.

This is the case, for example, for postcopy live migration. If there is
no traffic from the migration destination guest (server-side) to the
migration source guest (client-side), the destination keeps waiting for
pages indefinitely and does not switch to the postcopy-paused state.
This can happen, for example, if the destination QEMU instance is
started with the '-S' command line option and the machine is not started
yet, or if the machine is idle and produces no new page faults for
not-yet-migrated pages.

This patch introduces new inet socket parameters that control count,
idle period, and interval of TCP keep-alive packets before the
connection is considered broken. These parameters are available on
systems where the respective TCP socket options are defined
(TCP_KEEPCNT, TCP_KEEPIDLE, TCP_KEEPINTVL).

The default value for all is 0, which means the system configuration is
used.

Signed-off-by: Juraj Marcin 
---
 meson.build |  6 
 qapi/sockets.json   | 15 
 util/qemu-sockets.c | 88 +
 3 files changed, 109 insertions(+)

diff --git a/meson.build b/meson.build
index 41f68d3806..680f47cf42 100644
--- a/meson.build
+++ b/meson.build
@@ -2734,6 +2734,12 @@ if linux_io_uring.found()
   config_host_data.set('HAVE_IO_URING_PREP_WRITEV2',
cc.has_header_symbol('liburing.h', 
'io_uring_prep_writev2'))
 endif
+config_host_data.set('HAVE_TCP_KEEPCNT',
+ cc.has_header_symbol('netinet/tcp.h', 'TCP_KEEPCNT'))
+config_host_data.set('HAVE_TCP_KEEPIDLE',
+ cc.has_header_symbol('netinet/tcp.h', 'TCP_KEEPIDLE'))
+config_host_data.set('HAVE_TCP_KEEPINTVL',
+ cc.has_header_symbol('netinet/tcp.h', 'TCP_KEEPINTVL'))
 
 # has_member
 config_host_data.set('HAVE_SIGEV_NOTIFY_THREAD_ID',
diff --git a/qapi/sockets.json b/qapi/sockets.json
index 62797cd027..bb9d298635 100644
--- a/qapi/sockets.json
+++ b/qapi/sockets.json
@@ -59,6 +59,18 @@
 # @keep-alive: enable keep-alive when connecting to/listening on this socket.
 # (Since 4.2, not supported for listening sockets until 10.1)
 #
+# @keep-alive-count: number of keep-alive packets sent before the connection is
+# closed.  Only supported for TCP sockets on systems where TCP_KEEPCNT
+# socket option is defined.  (Since 10.1)
+#
+# @keep-alive-idle: time in seconds the connection needs to be idle before
+# sending a keepalive packet.  Only supported for TCP sockets on systems
+# where TCP_KEEPIDLE socket option is defined.  (Since 10.1)
+#
+# @keep-alive-interval: time in secods between keep-alive packets.  Only
+# supported for TCP sockets on systems where TCP_KEEPINTVL is defined.
+# (Since 10.1)
+#
 # @mptcp: enable multi-path TCP.  (Since 6.1)
 #
 # Since: 1.3
@@ -71,6 +83,9 @@
 '*ipv4': 'bool',
 '*ipv6': 'bool',
 '*keep-alive': 'bool',
+'*keep-alive-count': { 'type': 'uint32', 'if': 'HAVE_TCP_KEEPCNT' },
+'*keep-alive-idle': { 'type': 'uint32', 'if': 'HAVE_TCP_KEEPIDLE' },
+'*keep-alive-interval': { 'type': 'uint32', 'if': 'HAVE_TCP_KEEPINTVL' },
 '*mptcp': { 'type': 'bool', 'if': 'HAVE_IPPROTO_MPTCP' } } }
 
 ##
diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c
index fed17a1ffb..8e355b097c 100644
--- a/util/qemu-sockets.c
+++ b/util/qemu-sockets.c
@@ -217,6 +217,45 @@ static int inet_set_sockopts(int sock, InetSocketAddress 
*saddr, Error **errp)
  "Unable to set keep-alive option on socket");
 return -1;
 }
+#ifdef HAVE_TCP_KEEPCNT
+if (saddr->has_keep_alive_count &&
+saddr->keep_alive_count) {
+int keep_count = saddr->has_keep_alive_count;
+ret = setsockopt(sock, IPPROTO_TCP, TCP_KEEPCNT, &keep_count,
+ sizeof(keep_count));
+if (ret < 0) {
+error_setg_errno(errp, errno,
+ "Unable to set TCP keep-alive count option on 
socket");
+return -1;
+}
+}
+#endif
+#ifdef HAVE_TCP_KEEPIDLE
+if (saddr->has_keep_alive_idle &&
+saddr->keep_alive_idle) {
+int keep_idle = saddr->has_keep_alive_idle;
+ret = setsockopt(sock, IPPROTO_TCP, TCP_KEEPIDLE, &keep_idle,
+ sizeof(keep_idle));
+if (ret < 0) {
+error_setg_errno(errp, errno,
+ "Unable to set TCP keep-alive idle option on 
socket");
+return -1;
+}
+}
+#endif
+#ifdef HAVE_TCP_KEEPINTVL
+if (saddr->has_keep_alive_interval &&
+saddr->keep_alive_interval) {
+int keep_interval =

[PATCH v3 0/5] util/qemu-sockets: Introduce inet socket options controlling TCP keep-alive

2025-04-08 Thread Juraj Marcin
This series extends the work introduced by commit aec21d3175 ("qapi: Add
InetSocketAddress member keep-alive"). [1]

First, the series fixes an issue in qio_dns_resolver_lookup_sync_inet(),
where the InetSocketAddress structure is only partially copied. Next, it
refactors setting client socket options into a separate function and the
success and failure paths in inet_listen_saddr() in preparation for
keep-alive support on server sockets and the addition of new TCP
keep-alive options.

Then, the series adds support for keep-alive on server sockets and adds
three new InetSocketAddress options for control of TCP keep-alive
settings. By default, the value of all new settings is 0, which means no
custom socket option value is set.

This is useful, for example, for live migration. In case there is no
traffic from the destination to the source machine during postcopy, the
destination cannot detect a failed connection due to a lack of
non-acknowledged packets and stays in the postcopy-active state until
paused by the management of the QEMU instance.

[1]: 
https://lore.kernel.org/all/20190725094937.32454-1-vsement...@virtuozzo.com/

---
V3:
- moved the InetSocketAddress struct copy fix and the common function
  setting socket options into a separate commit
- refactored inet_listen_saddr()

V2:
- moved socket options setting into a common function for both server
  and client sockets (suggested by Vladimir)

Juraj Marcin (5):
  io: Fix partial struct copy in qio_dns_resolver_lookup_sync_inet()
  util/qemu-sockets: Refactor setting client sockopts into a separate
function
  util/qemu-sockets: Refactor success and failure paths in
inet_listen_saddr()
  util/qemu-sockets: Add support for keep-alive flag to passive sockets
  utils/qemu-sockets: Introduce inet socket options controlling TCP
keep-alive

 io/dns-resolver.c   |  21 ++
 meson.build |   6 ++
 qapi/sockets.json   |  19 -
 util/qemu-sockets.c | 177 ++--
 4 files changed, 165 insertions(+), 58 deletions(-)

-- 
2.48.1




[PATCH v3 4/5] util/qemu-sockets: Add support for keep-alive flag to passive sockets

2025-04-08 Thread Juraj Marcin
From: Juraj Marcin 

Commit aec21d3175 (qapi: Add InetSocketAddress member keep-alive)
introduces the keep-alive flag, which enables the SO_KEEPALIVE socket
option, but only on client-side sockets. However, this option is also
useful for server-side sockets, so they can check if a client is still
reachable or drop the connection otherwise.

This patch enables the SO_KEEPALIVE socket option on passive server-side
sockets if the keep-alive flag is enabled. This socket option is then
inherited by active server-side sockets communicating with connected
clients.

Signed-off-by: Juraj Marcin 
---
 qapi/sockets.json   | 4 ++--
 util/qemu-sockets.c | 9 +++--
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/qapi/sockets.json b/qapi/sockets.json
index 6a95023315..62797cd027 100644
--- a/qapi/sockets.json
+++ b/qapi/sockets.json
@@ -56,8 +56,8 @@
 # @ipv6: whether to accept IPv6 addresses, default try both IPv4 and
 # IPv6
 #
-# @keep-alive: enable keep-alive when connecting to this socket.  Not
-# supported for passive sockets.  (Since 4.2)
+# @keep-alive: enable keep-alive when connecting to/listening on this socket.
+# (Since 4.2, not supported for listening sockets until 10.1)
 #
 # @mptcp: enable multi-path TCP.  (Since 6.1)
 #
diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c
index a86964786a..fed17a1ffb 100644
--- a/util/qemu-sockets.c
+++ b/util/qemu-sockets.c
@@ -236,12 +236,6 @@ static int inet_listen_saddr(InetSocketAddress *saddr,
 int saved_errno = 0;
 bool socket_created = false;
 
-if (saddr->keep_alive) {
-error_setg(errp, "keep-alive option is not supported for passive "
-   "sockets");
-return -1;
-}
-
 memset(&ai,0, sizeof(ai));
 ai.ai_flags = AI_PASSIVE;
 if (saddr->has_numeric && saddr->numeric) {
@@ -349,6 +343,9 @@ static int inet_listen_saddr(InetSocketAddress *saddr,
 goto fail;
 }
 /* We have a listening socket */
+if (inet_set_sockopts(slisten, saddr, errp)) {
+goto fail;
+}
 freeaddrinfo(res);
 return slisten;
 }
-- 
2.48.1




[PULL 4/4] test-bdrv-drain: Fix data races

2025-04-08 Thread Kevin Wolf
From: Vitalii Mordan 

This patch addresses potential data races involving access to Job fields
in the test-bdrv-drain test.

Fixes: 7253220de4 ("test-bdrv-drain: Test drain vs. block jobs")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2900
Signed-off-by: Vitalii Mordan 
Message-ID: <20250402102119.3345626-1-mor...@ispras.ru>
[kwolf: Fixed up coding style and one missing atomic access]
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 include/qemu/job.h   |  3 +++
 job.c|  6 ++
 tests/unit/test-bdrv-drain.c | 32 +++-
 3 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/include/qemu/job.h b/include/qemu/job.h
index 2b873f2576..a5a04155ea 100644
--- a/include/qemu/job.h
+++ b/include/qemu/job.h
@@ -545,6 +545,9 @@ bool job_is_ready(Job *job);
 /* Same as job_is_ready(), but called with job lock held. */
 bool job_is_ready_locked(Job *job);
 
+/** Returns whether the job is paused. Called with job_mutex *not* held. */
+bool job_is_paused(Job *job);
+
 /**
  * Request @job to pause at the next pause point. Must be paired with
  * job_resume(). If the job is supposed to be resumed by user action, call
diff --git a/job.c b/job.c
index 660ce22c56..0653bc2ba6 100644
--- a/job.c
+++ b/job.c
@@ -251,6 +251,12 @@ bool job_is_cancelled_locked(Job *job)
 return job->force_cancel;
 }
 
+bool job_is_paused(Job *job)
+{
+JOB_LOCK_GUARD();
+return job->paused;
+}
+
 bool job_is_cancelled(Job *job)
 {
 JOB_LOCK_GUARD();
diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
index 7410e6f352..290cd2a70e 100644
--- a/tests/unit/test-bdrv-drain.c
+++ b/tests/unit/test-bdrv-drain.c
@@ -632,6 +632,8 @@ typedef struct TestBlockJob {
 BlockDriverState *bs;
 int run_ret;
 int prepare_ret;
+
+/* Accessed with atomics */
 bool running;
 bool should_complete;
 } TestBlockJob;
@@ -667,10 +669,10 @@ static int coroutine_fn test_job_run(Job *job, Error 
**errp)
 
 /* We are running the actual job code past the pause point in
  * job_co_entry(). */
-s->running = true;
+qatomic_set(&s->running, true);
 
 job_transition_to_ready(&s->common.job);
-while (!s->should_complete) {
+while (!qatomic_read(&s->should_complete)) {
 /* Avoid job_sleep_ns() because it marks the job as !busy. We want to
  * emulate some actual activity (probably some I/O) here so that drain
  * has to wait for this activity to stop. */
@@ -685,7 +687,7 @@ static int coroutine_fn test_job_run(Job *job, Error **errp)
 static void test_job_complete(Job *job, Error **errp)
 {
 TestBlockJob *s = container_of(job, TestBlockJob, common.job);
-s->should_complete = true;
+qatomic_set(&s->should_complete, true);
 }
 
 BlockJobDriver test_job_driver = {
@@ -791,7 +793,7 @@ static void test_blockjob_common_drain_node(enum drain_type 
drain_type,
 /* job_co_entry() is run in the I/O thread, wait for the actual job
  * code to start (we don't want to catch the job in the pause point in
  * job_co_entry(). */
-while (!tjob->running) {
+while (!qatomic_read(&tjob->running)) {
 aio_poll(qemu_get_aio_context(), false);
 }
 }
@@ -799,7 +801,7 @@ static void test_blockjob_common_drain_node(enum drain_type 
drain_type,
 WITH_JOB_LOCK_GUARD() {
 g_assert_cmpint(job->job.pause_count, ==, 0);
 g_assert_false(job->job.paused);
-g_assert_true(tjob->running);
+g_assert_true(qatomic_read(&tjob->running));
 g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */
 }
 
@@ -825,7 +827,7 @@ static void test_blockjob_common_drain_node(enum drain_type 
drain_type,
  *
  * paused is reset in the I/O thread, wait for it
  */
-while (job->job.paused) {
+while (job_is_paused(&job->job)) {
 aio_poll(qemu_get_aio_context(), false);
 }
 }
@@ -858,7 +860,7 @@ static void test_blockjob_common_drain_node(enum drain_type 
drain_type,
  *
  * paused is reset in the I/O thread, wait for it
  */
-while (job->job.paused) {
+while (job_is_paused(&job->job)) {
 aio_poll(qemu_get_aio_context(), false);
 }
 }
@@ -1411,10 +1413,12 @@ static void test_set_aio_context(void)
 
 typedef struct TestDropBackingBlockJob {
 BlockJob common;
-bool should_complete;
 bool *did_complete;
 BlockDriverState *detach_also;
 BlockDriverState *bs;
+
+/* Accessed with atomics */
+bool should_complete;
 } TestDropBackingBlockJob;
 
 static int coroutine_fn test_drop_backing_job_run(Job *job, Error **errp)
@@ -1422,7 +1426,7 @@ static int coroutine_fn test_drop_backing_job_run(Job 
*job, Error **errp)
 TestDropBackingBlockJob *s =
 container_of(job, TestDropBackingBlockJob, common.job);
 
-while (!s->should_complete) {
+while (!qatomic_read(

[PULL 8/8] scripts/checkpatch: Fix typo in SPDX-License-Identifier keyword

2025-04-08 Thread Philippe Mathieu-Daudé
From: Zhao Liu 

Fix the typo in the error message to help `grep` the example:

ERROR: New file '***' requires 'SPDX-License-Identifer'

Fixes: fa4d79c64dae ("scripts: mandate that new files have 
SPDX-License-Identifier")
Signed-off-by: Zhao Liu 
Reviewed-by: Alex Bennée 
Message-ID: <20250408162702.2350565-1-zhao1@intel.com>
Signed-off-by: Philippe Mathieu-Daudé 
---
 scripts/checkpatch.pl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 6ae9d7febee..365892de042 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -1688,11 +1688,11 @@ sub process {
/\.(c|h|py|pl|sh|json|inc|Makefile)$/) {
# source code files MUST have SPDX license declared
ERROR("New file '$expect_spdx_file' requires " .
- "'SPDX-License-Identifer'");
+ "'SPDX-License-Identifier'");
} else {
# Other files MAY have SPDX license if appropriate
WARN("Does new file '$expect_spdx_file' need " .
-"'SPDX-License-Identifer'?");
+"'SPDX-License-Identifier'?");
}
}
$expect_spdx = 1;
-- 
2.47.1




[PULL 7/8] hw/nvme: fix attachment of private namespaces

2025-04-08 Thread Philippe Mathieu-Daudé
From: Klaus Jensen 

Fix regression when attaching private namespaces that gets attached to
the wrong controller.

Keep track of the original controller "owner" of private namespaces, and
only attach if this matches on controller enablement.

Fixes: 6ccca4b6bb9f ("hw/nvme: rework csi handling")
Reported-by: Alan Adamson 
Suggested-by: Alan Adamson 
Signed-off-by: Klaus Jensen 
Tested-by: Alan Adamson 
Reviewed-by: Alan Adamson 
Reviewed-by: Keith Busch 
Message-ID: <20250408-fix-private-ns-v1-1-28e169b6b...@samsung.com>
Signed-off-by: Philippe Mathieu-Daudé 
---
 hw/nvme/nvme.h   | 3 +++
 hw/nvme/ctrl.c   | 7 ++-
 hw/nvme/ns.c | 4 
 hw/nvme/subsys.c | 9 +
 4 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
index 6f782ba1882..b5c9378ea4e 100644
--- a/hw/nvme/nvme.h
+++ b/hw/nvme/nvme.h
@@ -268,6 +268,9 @@ typedef struct NvmeNamespace {
 NvmeSubsystem *subsys;
 NvmeEnduranceGroup *endgrp;
 
+/* NULL for shared namespaces; set to specific controller if private */
+NvmeCtrl *ctrl;
+
 struct {
 uint32_t err_rec;
 } features;
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 518d02dc667..d6b77d4fbc9 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -7755,7 +7755,11 @@ static int nvme_start_ctrl(NvmeCtrl *n)
 for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) {
 NvmeNamespace *ns = nvme_subsys_ns(n->subsys, i);
 
-if (ns && nvme_csi_supported(n, ns->csi) && !ns->params.detached) {
+if (!ns || (!ns->params.shared && ns->ctrl != n)) {
+continue;
+}
+
+if (nvme_csi_supported(n, ns->csi) && !ns->params.detached) {
 if (!ns->attached || ns->params.shared) {
 nvme_attach_ns(n, ns);
 }
@@ -8988,6 +8992,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
 if (n->namespace.blkconf.blk) {
 ns = &n->namespace;
 ns->params.nsid = 1;
+ns->ctrl = n;
 
 if (nvme_ns_setup(ns, errp)) {
 return;
diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
index 98c1e75a5d2..4ab8ba74f51 100644
--- a/hw/nvme/ns.c
+++ b/hw/nvme/ns.c
@@ -763,6 +763,10 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
 
 ns->id_ns.endgid = cpu_to_le16(0x1);
 ns->id_ns_ind.endgrpid = cpu_to_le16(0x1);
+
+if (!ns->params.shared) {
+ns->ctrl = n;
+}
 }
 
 static const Property nvme_ns_props[] = {
diff --git a/hw/nvme/subsys.c b/hw/nvme/subsys.c
index 2ae56f12a59..b617ac3892a 100644
--- a/hw/nvme/subsys.c
+++ b/hw/nvme/subsys.c
@@ -56,7 +56,7 @@ int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp)
 {
 NvmeSubsystem *subsys = n->subsys;
 NvmeSecCtrlEntry *sctrl = nvme_sctrl(n);
-int cntlid, nsid, num_rsvd, num_vfs = n->params.sriov_max_vfs;
+int cntlid, num_rsvd, num_vfs = n->params.sriov_max_vfs;
 
 if (pci_is_vf(&n->parent_obj)) {
 cntlid = le16_to_cpu(sctrl->scid);
@@ -92,13 +92,6 @@ int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp)
 
 subsys->ctrls[cntlid] = n;
 
-for (nsid = 1; nsid < ARRAY_SIZE(subsys->namespaces); nsid++) {
-NvmeNamespace *ns = subsys->namespaces[nsid];
-if (ns && ns->params.shared && !ns->params.detached) {
-nvme_attach_ns(n, ns);
-}
-}
-
 return cntlid;
 }
 
-- 
2.47.1




[PULL 3/8] docs/arm: Add apple HVF host for supported guest CPU type

2025-04-08 Thread Philippe Mathieu-Daudé
From: Zhang Chen 

In my test, latest QEMU already support Apple HVF for -cpu host and max.

>From guest VM lscpu:

Architecture: aarch64
  CPU op-mode(s): 64-bit
  Byte Order: Little Endian
CPU(s):   11
  On-line CPU(s) list:0-10
Vendor ID:Apple
  Model name: -
Model:0
Thread(s) per core:   1
Core(s) per socket:   11
Socket(s):1
Stepping: 0x0
BogoMIPS: 48.00
Flags:fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics 
fphp asimdhp cpuid asimdrdm jscvt fcma lrcpc dcpop sha3 asimddp sha512 asim
  dfhm dit uscat ilrcpc flagm ssbs sb paca pacg dcpodp 
flagm2 frint

Signed-off-by: Zhang Chen 
Reviewed-by: Alex Bennée 
Message-ID: <20250401083102.72845-1-zhangc...@gmail.com>
Signed-off-by: Philippe Mathieu-Daudé 
---
 docs/system/arm/virt.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst
index adf446c0a29..6a719b95863 100644
--- a/docs/system/arm/virt.rst
+++ b/docs/system/arm/virt.rst
@@ -70,11 +70,11 @@ Supported guest CPU types:
 - ``cortex-a76`` (64-bit)
 - ``cortex-a710`` (64-bit)
 - ``a64fx`` (64-bit)
-- ``host`` (with KVM only)
+- ``host`` (with KVM and HVF only)
 - ``neoverse-n1`` (64-bit)
 - ``neoverse-v1`` (64-bit)
 - ``neoverse-n2`` (64-bit)
-- ``max`` (same as ``host`` for KVM; best possible emulation with TCG)
+- ``max`` (same as ``host`` for KVM and HVF; best possible emulation with TCG)
 
 Note that the default is ``cortex-a15``, so for an AArch64 guest you must
 specify a CPU type.
-- 
2.47.1




Re: [PATCH] ui/curses: Fix infinite loop on windows

2025-04-08 Thread Philippe Mathieu-Daudé

On 3/4/25 03:07, William Hu via wrote:

 From a42046272f0544dd18ed58661e53ea17d1584c2c Mon Sep 17 00:00:00 2001
From: William Hu 
Date: Wed, 2 Apr 2025 12:00:00 -0400
Subject: [PATCH] ui/curses: Fix infinite loop on windows

Replace -1 comparisons for wint_t with WEOF to fix infinite loop caused by a
65535 == -1 comparison.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2905
Signed-off-by: William Hu 
---
  ui/curses.c | 10 --
  1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/ui/curses.c b/ui/curses.c
index a39aee8762..3f5c5adf78 100644
--- a/ui/curses.c
+++ b/ui/curses.c
@@ -265,7 +265,12 @@ static int curses2foo(const int _curses2foo[], const int 
_curseskey2foo[],
  
  static void curses_refresh(DisplayChangeListener *dcl)

  {
-int chr, keysym, keycode, keycode_alt;
+/*
+ * DO NOT MAKE chr AN INT:
+ * Causes silent conversion errors on Windows where wint_t is unsigned 
short.
+ */
+wint_t chr = 0;
+int keysym, keycode, keycode_alt;
  enum maybe_keycode maybe_keycode = CURSES_KEYCODE;
  
  curses_winch_check();

@@ -284,8 +289,9 @@ static void curses_refresh(DisplayChangeListener *dcl)
  /* while there are any pending key strokes to process */
  chr = console_getch(&maybe_keycode);
  
-if (chr == -1)

+if (chr == WEOF) {
  break;
+}


Correct but incomplete, also missing the same check few lines below:

-- >8 --
diff --git a/ui/curses.c b/ui/curses.c
index a39aee87623..9c33de331cd 100644
--- a/ui/curses.c
+++ b/ui/curses.c
@@ -304,9 +304,9 @@ static void curses_refresh(DisplayChangeListener *dcl)
 /* alt or esc key */
 if (keycode == 1) {
 enum maybe_keycode next_maybe_keycode = CURSES_KEYCODE;
-int nextchr = console_getch(&next_maybe_keycode);
+wint_t nextchr = console_getch(&next_maybe_keycode);

-if (nextchr != -1) {
+if (nextchr != WEOF) {
 chr = nextchr;
 maybe_keycode = next_maybe_keycode;
 keycode_alt = ALT;
---

With that:
Reviewed-by: Philippe Mathieu-Daudé 




Re: [PATCH 0/2] Add property to support writing ERSTBA in high-low order

2025-04-08 Thread Bernhard Beschow



Am 5. April 2025 14:00:00 UTC schrieb Guenter Roeck :
>This series is needed to support the USB interface on imx8mp-evk when
>booting the Linux kernel.
>
>According to the XHCI specification, ERSTBA should be written in Low-High
>order. The Linux kernel writes the high word first. This results in an
>initialization failure.
>
>The following information is found in the Linux kernel commit log.
>
>[Synopsys]- The host controller was design to support ERST setting
>during the RUN state. But since there is a limitation in controller
>in supporting separate ERSTBA_HI and ERSTBA_LO programming,
>It is supported when the ERSTBA is programmed in 64bit,
>or in 32 bit mode ERSTBA_HI before ERSTBA_LO
>
>[Synopsys]- The internal initialization of event ring fetches
>the "Event Ring Segment Table Entry" based on the indication of
>ERSTBA_LO written.
>
>Add property to support writing the high word first. Enable it
>for dwc3.
>
>
>Guenter Roeck (2):
>  hw: usb: xhci: Add property to support writing ERSTBA in high-low order
>  hw/usb/hcd-dwc3: Set erstba-hi-lo property
>
> hw/usb/hcd-dwc3.c | 1 +
> hw/usb/hcd-xhci.c | 8 +++-
> hw/usb/hcd-xhci.h | 1 +
> 3 files changed, 9 insertions(+), 1 deletion(-)

Series:
Tested-by: Bernhard Beschow 

... on imx8mp-evk board with 6.14 defconfig kernel. It indeed helps to get USB 
working with this kernel.

Best regards,
Bernhard



Re: [PATCH 0/2] Add property to support writing ERSTBA in high-low order

2025-04-08 Thread Guenter Roeck

On 4/8/25 12:57, Bernhard Beschow wrote:



Am 8. April 2025 16:09:58 UTC schrieb Guenter Roeck :

On 4/6/25 11:08, Bernhard Beschow wrote:
[ .. ]


Yeah, it works with Buildroot as described in the handbook. When I append 
`-netdev user,id=net0 -device virtio-net-pci,netdev=net0` on the cli I can 
`wget http://www.google.com` successfully. When I omit it there is no network 
connectivity. This is with a 6.6.23 vendor kernel.



I had no luck with virtio-net-pci. virtio-pci works for me, but I can not get 
real PCI devices
(such as nvme or scsi adapters) to work.


I now tested with the latest Buildroot recipe, changing to upstream kernel 
version 6.14 and using the defconfig. The `wget` command still works for me 
with virtio-net-pci. However, I can confirm that I need your xhci patches for 
the usb storage device to be detected.



Following up on this, my problem is that adding "-netdev user,id=net0 -device 
virtio-net-pci,netdev=net0"
to the command line adds a _second_ Ethernet interface, in addition to the 
default one.
This results in
qemu-system-arm: warning: nic imx.enet.0 has no peer
reported when qemu starts.


I get this too when using virtio-net-pci successfully.



I can not get that second interface to work, probably because of some userspace 
issue.

Anyway, I never see any interrupts on the virtual PCI interface. From 
/proc/interrupts:

277:  0  PCI-MSI 524288 Edge  virtio0-config
278:  0  PCI-MSI 524289 Edge  virtio0-input.0
279:  0  PCI-MSI 524290 Edge  virtio0-output.0


I get:

206:  0  0  0  0  PCI-MSI 524288 Edge  
virtio0-config
207:  3  0  0  0  PCI-MSI 524289 Edge  
virtio0-input.0
208:  8  0  0  0  PCI-MSI 524290 Edge  
virtio0-output.0

Note that I'm using four CPUs, i.e. `-smp 4`.



I must be missing something. Can you send me your complete qemu command line ?
I'll also try building a buildroot image to see where it gets me.



That may work for virtio-net-pci, but it doesn't work for other PCI(e) drivers.
If I try to attach any other PCIe devices, the device is reported with lspci but
then its initialization times out because it does not get any interrupts.


Indeed, trying with e1000e:

205:  0  0  0  0  PCI-MSI   0 Edge  PCIe PME
206: 74  0  0  0  PCI-MSI 524288 Edge  
eth1-rx-0
207: 20  0  0  0  PCI-MSI 524289 Edge  
eth1-tx-0
208: 32  0  0  0  PCI-MSI 524290 Edge  eth1

But I get this repeatedly with varying CPUs:

[   14.657163] e1000e :01:00.0 eth1: NIC Link is Up 1000 Mbps Full Duplex, 
Flow Control: Rx/Tx
[   19.980452] e1000e :01:00.0 eth1: NETDEV WATCHDOG: CPU: 0: transmit 
queue 0 timed out 5312 ms
[   19.982491] e1000e :01:00.0 eth1: Reset adapter unexpectedly



Tt turns out that sabrelite has the same problem.


Did it work with QEMU 9.2?



No, the pcie interfaces on sabrelite don't instantiate for me with qemu 9.2 
(9.2.3,
more specifically). I see the pcie root port, but nothing behind it.

Guenter




Re: [PATCH preview 0/3] rust: update build system for Meson 1.8.0

2025-04-08 Thread Bernhard Beschow



Am 5. April 2025 10:06:00 UTC schrieb Paolo Bonzini :
>Meson 1.7.0 and 1.8.0 include improved support for Rust, namely:
>* support for "objects" in Rust executables
>* support for doctest targets

Using Meson 1.7.2 (shipped with my distro) I didn't succeed with either of 
these. If just applying the first patch with --enable-modules I get linker 
errors again. With all patches applied, "doctest" isn't recognized. Is this 
perhaps 1.8-only material?

Best regards,
Bernhard

>
>Use it to remove BQL-related hacks, fix --enable-modules --enable-rust
>and also simplify the Meson logic for building the qemu-api crate
>(which may help splitting the crate, too).
>
>Meson also supports clippy and rustdoc but there are some bugs in the
>prerelease.  I'll try to get them fixed before 1.8.0.
>
>Paolo
>
>Paolo Bonzini (3):
>  rust: use "objects" for Rust executables as well
>  rust: add qemu-api doctests to "meson test"
>  rust: cell: remove support for running doctests with "cargo test --doc"
>
> docs/devel/rust.rst|  2 --
> .gitlab-ci.d/buildtest.yml |  5 -
> rust/qemu-api/meson.build  | 35 +++
> rust/qemu-api/src/cell.rs  | 22 +-
> 4 files changed, 24 insertions(+), 40 deletions(-)
>



Re: [PATCH-for-10.0 0/3] More imx8mp-evk improvements

2025-04-08 Thread Bernhard Beschow



Am 8. April 2025 18:57:45 UTC schrieb "Philippe Mathieu-Daudé" 
:
>On 5/4/25 23:48, Bernhard Beschow wrote:
>
>> Guenter Roeck (2):
>>hw/arm/imx8mp-evk: Remove unimplemented cpu-idle-states properties
>>  from devicetree
>>hw/arm/imx8mp-evk: Remove unimplemented nxp,imx8mp-fspi node from
>>  devicetree
>
>Patches 2 & 3 queued, thanks!

Thanks! This is a real quality of life improvement.

Best regards,
Bermhard



Re: [PATCH 1/2] accel/tcg: add get_virtual_clock for TCG

2025-04-08 Thread Alex Bennée
Mark Burton  writes:

> In principle I like this, but 
> 1/ throughout the API can we please make everything consistent sure that all 
> registrations take a handle (void *) and all callbacks functions pass that 
> handle (and the ID)
>  - right now, some things do, some things dont, and this specific case
> seems to take a handle on registration, but does not provide it on
> callback (!)

The handle is something the plugin should have already. The plugin id is
needed so the framework knows who to deliver the callback back to.

>
> (This is the current implementation :
> typedef int64_t (*qemu_plugin_time_cb_t) (void);
> ...
> QEMU_PLUGIN_API void qemu_plugin_register_time_cb(qemu_plugin_id_t id, const 
> void *handle, qemu_plugin_time_cb_t cb);
> )
>
> 2/ The current implementation makes use of the callback _ONLY_ in the
> case of single TCG — it’s most interesting when we have MTTCG enabled

Ahh - as I said compile tested only ;-)

I can fix that for v2.


> (and I see no reason not to provide the same mechanism for any other
> accelerator if/when anything in QEMU requests ’the time’.

That would mean making a clear separation in plugins for things that are
"events" which we do do from other hypervisors and "instrumentation"
which can only be done under TCG.


> 
>
> Cheers
> Mark.
>
>
>> On 3 Apr 2025, at 13:38, Alex Bennée  wrote:
>> 
>> WARNING: This email originated from outside of Qualcomm. Please be wary of 
>> any links or attachments, and do not enable macros.
>> 
>> Rather than allowing cpus_get_virtual_clock() to fall through to
>> cpu_get_clock() introduce a TCG handler so it can make a decision
>> about what time it is.
>> 
>> Initially this just calls cpu_get_clock() as before but this will
>> change in later commits.
>> 
>> Signed-off-by: Alex Bennée 
>> ---
>> accel/tcg/tcg-accel-ops.c | 6 ++
>> 1 file changed, 6 insertions(+)
>> 
>> diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c
>> index d9b662efe3..1432d1c5b1 100644
>> --- a/accel/tcg/tcg-accel-ops.c
>> +++ b/accel/tcg/tcg-accel-ops.c
>> @@ -197,6 +197,11 @@ static inline void tcg_remove_all_breakpoints(CPUState 
>> *cpu)
>> cpu_watchpoint_remove_all(cpu, BP_GDB);
>> }
>> 
>> +static int64_t tcg_get_virtual_clock(void)
>> +{
>> +return cpu_get_clock();
>> +}
>> +
>> static void tcg_accel_ops_init(AccelOpsClass *ops)
>> {
>> if (qemu_tcg_mttcg_enabled()) {
>> @@ -212,6 +217,7 @@ static void tcg_accel_ops_init(AccelOpsClass *ops)
>> ops->get_virtual_clock = icount_get;
>> ops->get_elapsed_ticks = icount_get;
>> } else {
>> +ops->get_virtual_clock = tcg_get_virtual_clock;
>> ops->handle_interrupt = tcg_handle_interrupt;
>> }
>> }
>> --
>> 2.39.5
>> 

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro



Re: [PATCH v2 1/2] vfio/spapr: Enhance error handling in vfio_spapr_create_window()

2025-04-08 Thread Cédric Le Goater

I think following change should be enough along with your suggested changes:

diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c
index 1a5d1611f2cd..27fed3cd463c 100644
--- a/hw/vfio/spapr.c
+++ b/hw/vfio/spapr.c
@@ -378,8 +378,7 @@ vfio_spapr_container_add_section_window(VFIOContainerBase 
*bcontainer,
  }
  
  ret = vfio_spapr_create_window(container, section, &pgsize);

-if (ret) {
-error_setg_errno(errp, -ret, "Failed to create SPAPR window");
+if (!ret) {
  return false;
  }


I think you mean :

if (!vfio_spapr_create_window(container, section, &pgsize, errp)) {
return false;
}

if so, yes. This is the current practice in QEMU.

Thanks,

C.




Re: [PATCH v8 08/28] vfio: add region cache

2025-04-08 Thread John Levon
On Fri, Apr 04, 2025 at 06:18:20PM +0100, John Levon wrote:

> On Fri, Apr 04, 2025 at 06:57:46PM +0200, Cédric Le Goater wrote:
> 
> > > > why not populate vbasedev->regions[index] in vfio_get_all_regions() ?
> > > 
> > > Good question. I presume it's not possible for us to ever look up a region
> > > that has somehow appeared *after* vfio_prepare_device() ?
> > > 
> > > We'd end up off the end of the array in that case anyway.
> > 
> > I was confused. I thought we were caching VFIORegions ...
> > 
> > Anyway, this is an optimisation and I fail to understand where
> > the VFIO_DEVICE_GET_REGION_INFO ioctl is called on a hot path.
> > 
> > Is it for interrupts ? Please explain.
> > 
> > Do you have figures ?
> 
> That's a great question that I don't know the answer to (like much of this 
> code
> I just inherited it). Let me try to investigate.

I found one reason. hw/vfio/pci.c stores VFIOPCIDevice::config_offset so it
doesn't need to do a get region info on every config space access.

But after the refactoring, vfio_io_region_read() gets passed a region index (the
idea of a "region offset" isn't meaningful to vfio-user).

Without the cache, the kernel vfio implementation:

```
867 static int vfio_io_region_write(VFIODevice *vbasedev, uint8_t index, off_t 
off,  
868 uint32_t size, void *data, bool post)   
 
869 {   
 
870 struct vfio_region_info *info = vbasedev->regions[index];   
 
871 int ret;
 
872 
 
873 ret = pwrite(vbasedev->fd, data, size, info->offset + off); 
 
```

would have to look up the region offset every time.

regards
john



[PATCH v1 02/24] hw/s390x/ipl: Create certificate store

2025-04-08 Thread Zhuoying Cai
Create a certificate store for boot certificates used for secure IPL.

Load certificates from the -boot-certificate option into the cert store.

Currently, only x509 certificates in DER format and uses SHA-256 hashing
algorithm are supported, as these are the types required for secure boot
on s390.

Signed-off-by: Zhuoying Cai 
---
 hw/s390x/cert-store.c   | 249 
 hw/s390x/cert-store.h   |  50 
 hw/s390x/ipl.c  |   9 ++
 hw/s390x/ipl.h  |   3 +
 hw/s390x/meson.build|   1 +
 include/hw/s390x/ipl/qipl.h |   3 +
 6 files changed, 315 insertions(+)
 create mode 100644 hw/s390x/cert-store.c
 create mode 100644 hw/s390x/cert-store.h

diff --git a/hw/s390x/cert-store.c b/hw/s390x/cert-store.c
new file mode 100644
index 00..1aa8aea040
--- /dev/null
+++ b/hw/s390x/cert-store.c
@@ -0,0 +1,249 @@
+/*
+ * S390 certificate store implementation
+ *
+ * Copyright 2025 IBM Corp.
+ * Author(s): Zhuoying Cai 
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "cert-store.h"
+#include "qemu/error-report.h"
+#include "qemu/option.h"
+#include "qemu/config-file.h"
+#include "hw/s390x/ebcdic.h"
+#include "qemu/cutils.h"
+#include "cert-store.h"
+
+#ifdef CONFIG_GNUTLS
+#include 
+#include 
+#endif /* #define CONFIG_GNUTLS */
+
+static const char *s390_get_boot_certificates(void)
+{
+QemuOpts *opts;
+const char *path;
+
+opts = qemu_find_opts_singleton("boot-certificates");
+path = qemu_opt_get(opts, "boot-certificates");
+
+return path;
+}
+
+static size_t cert2buf(char *path, size_t max_size, char **cert_buf)
+{
+size_t size;
+g_autofree char *buf;
+buf = g_malloc(max_size);
+
+if (!g_file_get_contents(path, &buf, &size, NULL) ||
+size == 0 || size > max_size) {
+return 0;
+}
+
+*cert_buf = g_steal_pointer(&buf);
+
+return size;
+}
+
+#ifdef CONFIG_GNUTLS
+int g_init_cert(uint8_t *raw_cert, size_t cert_size, gnutls_x509_crt_t *g_cert)
+{
+int rc;
+
+if (gnutls_x509_crt_init(g_cert) < 0) {
+return -1;
+}
+
+gnutls_datum_t datum_cert = {raw_cert, cert_size};
+rc = gnutls_x509_crt_import(*g_cert, &datum_cert, GNUTLS_X509_FMT_DER);
+if (rc) {
+gnutls_x509_crt_deinit(*g_cert);
+return rc;
+}
+
+return 0;
+}
+#endif /* CONFIG_GNUTLS */
+
+static int init_cert_x509_der(size_t size, char *raw, S390IPLCertificate 
**qcert)
+{
+#ifdef CONFIG_GNUTLS
+gnutls_x509_crt_t g_cert = NULL;
+g_autofree S390IPLCertificate *q_cert;
+size_t key_id_size;
+size_t hash_size;
+int rc;
+
+rc = g_init_cert((uint8_t *)raw, size, &g_cert);
+if (rc) {
+if (rc == GNUTLS_E_ASN1_TAG_ERROR) {
+error_report("The certificate is not in DER format");
+}
+return -1;
+}
+
+rc = gnutls_x509_crt_get_key_id(g_cert, GNUTLS_KEYID_USE_SHA256, NULL, 
&key_id_size);
+if (rc != GNUTLS_E_SHORT_MEMORY_BUFFER) {
+error_report("Failed to get certificate key ID size");
+goto out;
+}
+
+rc = gnutls_x509_crt_get_fingerprint(g_cert, GNUTLS_DIG_SHA256, NULL, 
&hash_size);
+if (rc != GNUTLS_E_SHORT_MEMORY_BUFFER) {
+error_report("Failed to get certificate hash size");
+goto out;
+}
+
+q_cert = g_malloc(sizeof(*q_cert));
+q_cert->size = size;
+q_cert->key_id_size = key_id_size;
+q_cert->hash_size = hash_size;
+q_cert->raw = raw;
+q_cert->format = GNUTLS_X509_FMT_DER;
+*qcert = g_steal_pointer(&q_cert);
+
+gnutls_x509_crt_deinit(g_cert);
+
+return 0;
+out:
+gnutls_x509_crt_deinit(g_cert);
+return -1;
+#else
+error_report("Cryptographic library is not enabled")
+return -1;
+#endif /* #define CONFIG_GNUTLS */
+}
+
+static int check_path_type(const char *path)
+{
+struct stat path_stat;
+
+stat(path, &path_stat);
+
+if (S_ISDIR(path_stat.st_mode)) {
+return S_IFDIR;
+} else if (S_ISREG(path_stat.st_mode)) {
+return S_IFREG;
+} else {
+return -1;
+}
+}
+
+static int init_cert(char *paths, S390IPLCertificate **qcert)
+{
+char *buf;
+char vc_name[VC_NAME_LEN_BYTES];
+const gchar *filename;
+size_t size;
+
+filename = g_path_get_basename(paths);
+
+size = cert2buf(paths, CERT_MAX_SIZE, &buf);
+if (size == 0) {
+error_report("Failed to load certificate: %s", paths);
+return -1;
+}
+
+if (init_cert_x509_der(size, buf, qcert) < 0) {
+error_report("Failed to initialize certificate: %s", paths);
+return -1;
+}
+
+/*
+ * Left justified certificate name with padding on the right with blanks.
+ * Convert certificate name to EBCDIC.
+ */
+strpadcpy(vc_name, VC_NAME_LEN_BYTES, filename, ' ');
+ebcdic_put((*qcert)->vc_name, vc_name, VC_NAME_LEN_BYTES);
+
+return 0;
+}
+
+static void update_cert_store(S390IPLCertificateStore *cert_store,
+ 

[PATCH v1 09/24] s390x/diag: Implement DIAG 508 subcode 2 for signature verification

2025-04-08 Thread Zhuoying Cai
From: Collin Walling 

DIAG 508 subcode 2 performs signature-verfication on signed components.
A signed component may be a Linux kernel image, or any other signed
binary. **Verification of initrd is not supported.**

The instruction call expects two item-pairs: an address of a device
component, an address of the analogous signature file (in PKCS#7 format),
and their respective lengths. All of this data should be encapsulated
within a Diag508SignatureVerificationBlock, with the CertificateStoreInfo
fields ignored. The DIAG handler will read from the provided addresses
to retrieve the necessary data, parse the signature file, then
perform the signature-verification. Because there is no way to
correlate a specific certificate to a component, each certificate
in the store is tried until either verification succeeds, or all
certs have been exhausted.

The subcode value is denoted by setting the second-to-left-most bit of
a 2-byte field.

A return code of 1 indicates success, and the index and length of the
corresponding certificate will be set in the CertificateStoreInfo
portion of the SigVerifBlock. The following values indicate failure:

0x0402: component data is invalid
0x0502: certificate is not in x509 format
0x0602: signature is not in PKCS#7 format
0x0702: signature-verification failed

Signed-off-by: Collin Walling 
---
 include/hw/s390x/ipl/diag508.h |  25 +++
 target/s390x/diag.c| 131 -
 2 files changed, 155 insertions(+), 1 deletion(-)

diff --git a/include/hw/s390x/ipl/diag508.h b/include/hw/s390x/ipl/diag508.h
index 83c4439cb2..f8f4b6398e 100644
--- a/include/hw/s390x/ipl/diag508.h
+++ b/include/hw/s390x/ipl/diag508.h
@@ -13,5 +13,30 @@
 #define S390X_DIAG508_H
 
 #define DIAG_508_SUBC_QUERY_SUBC0x
+#define DIAG_508_SUBC_SIG_VERIF 0x4000
+
+#define DIAG_508_RC_OK  0x0001
+#define DIAG_508_RC_NO_CERTS0x0102
+#define DIAG_508_RC_CERT_NOT_FOUND  0x0202
+#define DIAG_508_RC_NO_MEM_FOR_CERT 0x0302
+#define DIAG_508_RC_INVAL_COMP_DATA 0x0402
+#define DIAG_508_RC_INVAL_X509_CERT 0x0502
+#define DIAG_508_RC_INVAL_PKCS7_SIG 0x0602
+#define DIAG_508_RC_FAIL_VERIF  0x0702
+
+struct Diag508CertificateStoreInfo {
+uint8_t  idx;
+uint64_t len;
+} QEMU_PACKED;
+typedef struct Diag508CertificateStoreInfo Diag508CertificateStoreInfo;
+
+struct Diag508SignatureVerificationBlock {
+Diag508CertificateStoreInfo csi;
+uint64_t comp_len;
+uint64_t comp_addr;
+uint64_t sig_len;
+uint64_t sig_addr;
+} QEMU_PACKED;
+typedef struct Diag508SignatureVerificationBlock 
Diag508SignatureVerificationBlock;
 
 #endif
diff --git a/target/s390x/diag.c b/target/s390x/diag.c
index ad7f4b5025..cecb8bf130 100644
--- a/target/s390x/diag.c
+++ b/target/s390x/diag.c
@@ -25,6 +25,11 @@
 #include "target/s390x/kvm/pv.h"
 #include "qemu/error-report.h"
 
+#ifdef CONFIG_GNUTLS
+#include 
+#include 
+#include 
+#endif /* CONFIG_GNUTLS */
 
 int handle_diag_288(CPUS390XState *env, uint64_t r1, uint64_t r3)
 {
@@ -489,9 +494,67 @@ void handle_diag_320(CPUS390XState *env, uint64_t r1, 
uint64_t r3, uintptr_t ra)
 env->regs[r1 + 1] = rc;
 }
 
+#ifdef CONFIG_GNUTLS
+#define datum_init(datum, data, size) \
+datum = (gnutls_datum_t){data, size}
+
+static int diag_508_init_comp(gnutls_datum_t *comp,
+  Diag508SignatureVerificationBlock *svb)
+{
+uint8_t *svb_comp = NULL;
+
+if (!svb->comp_len || !svb->comp_addr) {
+error_report("No component data.");
+return -1;
+}
+
+/*
+ * corrupted size vs. prev_size in fastbins, occurs during 2nd iteration,
+ * allocating 1mil bytes.
+ */
+svb_comp = g_malloc0(svb->comp_len);
+cpu_physical_memory_read(svb->comp_addr, svb_comp, svb->comp_len);
+
+/*
+ * Component data is not written back to the caller,
+ * so no need to do a deep copy. Comp is freed when
+ * svb is freed.
+ */
+datum_init(*comp, svb_comp, svb->comp_len);
+return 0;
+}
+
+static int diag_508_init_signature(gnutls_pkcs7_t *sig,
+   Diag508SignatureVerificationBlock *svb)
+{
+gnutls_datum_t datum_sig;
+uint8_t *svb_sig = NULL;
+
+if (!svb->sig_len || !svb->sig_addr) {
+error_report("No signature data");
+return -1;
+}
+
+svb_sig = g_malloc0(svb->sig_len);
+cpu_physical_memory_read(svb->sig_addr, svb_sig, svb->sig_len);
+
+if (gnutls_pkcs7_init(sig) < 0) {
+error_report("Failed to initalize pkcs7 data.");
+return -1;
+}
+
+datum_init(datum_sig, svb_sig, svb->sig_len);
+return gnutls_pkcs7_import(*sig, &datum_sig, GNUTLS_X509_FMT_DER);
+
+}
+#endif /* CONFIG_GNUTLS */
+
 void handle_diag_508(CPUS390XState *env, uint64_t r1, uint64_t r3, uintptr_t 
ra)
 {
+S390IPLCertificateStore *qcs = s390_ipl_get_certificate_store();
+size_t csi_size = sizeof(Diag508CertificateStoreInfo);
 uint64_t su

[PATCH v1 10/24] pc-bios/s390-ccw: Introduce IPL Information Report Block (IIRB)

2025-04-08 Thread Zhuoying Cai
The IPL information report block (IIRB) contains information used
to locate IPL records and to report the results of signature verification
of one or more secure components of the load device.

IIRB is stored immediately following the IPL Parameter Block. Results on
component verification in any case (failure or success) are stored.

Signed-off-by: Zhuoying Cai 
---
 pc-bios/s390-ccw/iplb.h | 62 +
 1 file changed, 62 insertions(+)

diff --git a/pc-bios/s390-ccw/iplb.h b/pc-bios/s390-ccw/iplb.h
index 08f259ff31..bdbc733e16 100644
--- a/pc-bios/s390-ccw/iplb.h
+++ b/pc-bios/s390-ccw/iplb.h
@@ -23,6 +23,68 @@ extern QemuIplParameters qipl;
 extern IplParameterBlock iplb __attribute__((__aligned__(PAGE_SIZE)));
 extern bool have_iplb;
 
+struct IplInfoReportBlockHeader {
+uint32_t len;
+uint8_t  iirb_flags;
+uint8_t  reserved1[2];
+uint8_t  version;
+uint8_t  reserved2[8];
+} __attribute__ ((packed));
+typedef struct IplInfoReportBlockHeader IplInfoReportBlockHeader;
+
+struct IplInfoBlockHeader {
+uint32_t len;
+uint8_t  ibt;
+uint8_t  reserved1[3];
+uint8_t  reserved2[8];
+} __attribute__ ((packed));
+typedef struct IplInfoBlockHeader IplInfoBlockHeader;
+
+enum IplIbt {
+IPL_IBT_CERTIFICATES = 1,
+IPL_IBT_COMPONENTS = 2,
+};
+
+struct IplSignatureCertificateEntry {
+uint64_t addr;
+uint64_t len;
+} __attribute__ ((packed));
+typedef struct IplSignatureCertificateEntry IplSignatureCertificateEntry;
+
+struct IplSignatureCertificateList {
+IplInfoBlockHeaderipl_info_header;
+IplSignatureCertificateEntry  cert_entries[MAX_CERTIFICATES];
+} __attribute__ ((packed));
+typedef struct IplSignatureCertificateList IplSignatureCertificateList;
+
+#define S390_IPL_COMPONENT_FLAG_SC  0x80
+#define S390_IPL_COMPONENT_FLAG_CSV 0x40
+
+struct IplDeviceComponentEntry {
+uint64_t addr;
+uint64_t len;
+uint8_t  flags;
+uint8_t  reserved1[5];
+uint16_t cert_index;
+uint8_t  reserved2[8];
+} __attribute__ ((packed));
+typedef struct IplDeviceComponentEntry IplDeviceComponentEntry;
+
+struct IplDeviceComponentList {
+IplInfoBlockHeader   ipl_info_header;
+IplDeviceComponentEntry  device_entries[MAX_CERTIFICATES];
+} __attribute__ ((packed));
+typedef struct IplDeviceComponentList IplDeviceComponentList;
+
+#define COMP_LIST_MAX   sizeof(IplDeviceComponentList)
+#define CERT_LIST_MAX   sizeof(IplSignatureCertificateList)
+
+struct IplInfoReportBlock {
+IplInfoReportBlockHeader hdr;
+uint8_t  info_blks[COMP_LIST_MAX + CERT_LIST_MAX];
+} __attribute__ ((packed));
+typedef struct IplInfoReportBlock IplInfoReportBlock;
+
 #define S390_IPL_TYPE_FCP 0x00
 #define S390_IPL_TYPE_CCW 0x02
 #define S390_IPL_TYPE_QEMU_SCSI 0xff
-- 
2.49.0




[PATCH v1 22/24] pc-bios/s390-ccw: Handle true secure IPL mode

2025-04-08 Thread Zhuoying Cai
When secure boot is enabled (-secure-boot on) and certificate(s) are
provided, the boot operates in True Secure IPL mode.

Any verification error during True Secure IPL mode will cause the
entire boot process to terminate.

Secure IPL in audit mode requires at least one certificate provided in
the key store along with necessary facilities. If secure boot is enabled
but no certificate is provided, the boot process will also terminate, as
this is not a valid secure boot configuration.

Note: True Secure IPL mode is implemented for the SCSI scheme of
virtio-blk/virtio-scsi devices.

Signed-off-by: Zhuoying Cai 
---
 pc-bios/s390-ccw/bootmap.c  | 25 -
 pc-bios/s390-ccw/iplb.h |  7 +++
 pc-bios/s390-ccw/main.c |  6 +-
 pc-bios/s390-ccw/s390-ccw.h |  2 ++
 4 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c
index 4bc6311802..a22061e1ad 100644
--- a/pc-bios/s390-ccw/bootmap.c
+++ b/pc-bios/s390-ccw/bootmap.c
@@ -757,6 +757,10 @@ static void valid_sclab_check(SclabOriginLocator 
*sclab_locator,
 comps->device_entries[comp_index].cei |= 
S390_IPL_COMPONENT_CEI_INVALID_SCLAB;
 
 /* a missing SCLAB will not be reported in audit mode */
+if (boot_mode == ZIPL_SECURE_MODE) {
+print_func(is_magic_match, "Magic is not matched. SCLAB does not 
exist");
+}
+
 return;
 }
 
@@ -1164,7 +1168,11 @@ static int zipl_run_secure(ComponentEntry *entry, 
uint8_t *tmp_sec)
 int addr_range_index = 0;
 
 void (*print_func)(bool, const char *) = NULL;
-print_func = &IPL_check;
+if (boot_mode == ZIPL_SECURE_MODE) {
+print_func = &IPL_assert;
+} else if (boot_mode == ZIPL_SECURE_AUDIT_MODE) {
+print_func = &IPL_check;
+}
 
 if (!secure_ipl_supported()) {
 return -1;
@@ -1321,6 +1329,7 @@ static int zipl_run(ScsiBlockPtr *pte)
 entry = (ComponentEntry *)(&header[1]);
 
 switch (boot_mode) {
+case ZIPL_SECURE_MODE:
 case ZIPL_SECURE_AUDIT_MODE:
 if (zipl_run_secure(entry, tmp_sec)) {
 return -1;
@@ -1692,10 +1701,16 @@ static int zipl_load_vscsi(void)
 int zipl_mode(void)
 {
 uint32_t cert_len;
+bool secure;
 
 cert_len = request_certificate((uint64_t *)certs_sec, 0);
+secure = is_secure_boot_on(iplb->hdr_flags);
 
-return (cert_len > 0) ? ZIPL_SECURE_AUDIT_MODE : ZIPL_NORMAL_MODE;
+if (secure) {
+return (cert_len > 0) ? ZIPL_SECURE_MODE : ZIPL_SECURE_INVALID_MODE;
+} else {
+return (cert_len > 0) ? ZIPL_SECURE_AUDIT_MODE : ZIPL_NORMAL_MODE;
+}
 }
 
 void zipl_load(void)
@@ -1703,7 +1718,7 @@ void zipl_load(void)
 VDev *vdev = virtio_get_device();
 
 if (vdev->is_cdrom) {
-if (boot_mode == ZIPL_SECURE_AUDIT_MODE) {
+if (boot_mode == ZIPL_SECURE_AUDIT_MODE || boot_mode == 
ZIPL_SECURE_MODE) {
 panic("Secure boot from ISO image is not supported!");
 }
 ipl_iso_el_torito();
@@ -1712,7 +1727,7 @@ void zipl_load(void)
 }
 
 if (virtio_get_device_type() == VIRTIO_ID_NET) {
-if (boot_mode == ZIPL_SECURE_AUDIT_MODE) {
+if (boot_mode == ZIPL_SECURE_AUDIT_MODE || boot_mode == 
ZIPL_SECURE_MODE) {
 panic("Virtio net boot device does not support secure boot!");
 }
 netmain();
@@ -1725,7 +1740,7 @@ void zipl_load(void)
 return;
 }
 
-if (boot_mode == ZIPL_SECURE_AUDIT_MODE) {
+if (boot_mode == ZIPL_SECURE_AUDIT_MODE || boot_mode == ZIPL_SECURE_MODE) {
 panic("ECKD boot device does not support secure boot!");
 }
 
diff --git a/pc-bios/s390-ccw/iplb.h b/pc-bios/s390-ccw/iplb.h
index 42a9e081fe..734d049f42 100644
--- a/pc-bios/s390-ccw/iplb.h
+++ b/pc-bios/s390-ccw/iplb.h
@@ -185,6 +185,13 @@ static inline bool load_next_iplb(void)
 return true;
 }
 
+static inline bool is_secure_boot_on(uint8_t hdr_flags)
+{
+/* If secure boot is on, SIPL bit and IPLIR bit must be on. */
+return (hdr_flags & DIAG308_IPIB_FLAGS_SIPL) &&
+   (hdr_flags & DIAG308_IPIB_FLAGS_IPLIR);
+}
+
 static inline uint64_t diag320(void *data, unsigned long subcode)
 {
 register unsigned long addr asm("0") = (unsigned long)data;
diff --git a/pc-bios/s390-ccw/main.c b/pc-bios/s390-ccw/main.c
index 92004a6f82..6189a5a7ba 100644
--- a/pc-bios/s390-ccw/main.c
+++ b/pc-bios/s390-ccw/main.c
@@ -277,10 +277,14 @@ static void ipl_boot_device(void)
 boot_mode = zipl_mode();
 }
 
+if (boot_mode == ZIPL_SECURE_INVALID_MODE) {
+panic("Need at least one certificate for secure boot!");
+}
+
 switch (cutype) {
 case CU_TYPE_DASD_3990:
 case CU_TYPE_DASD_2107:
-if (boot_mode == ZIPL_SECURE_AUDIT_MODE) {
+if (boot_mode == ZIPL_SECURE_AUDIT_MODE || boot_mode == 
ZIPL_SECURE_MODE) {
 panic("Passthrough (vfio) device does not support secure boot!");
 }
 
diff --git a/pc-bios/s390-cc

[PATCH v1 17/24] pc-bios/s390-ccw: Add signature verification for secure boot in audit mode

2025-04-08 Thread Zhuoying Cai
Enable secure IPL in audit mode, which performs signature verification,
but any error does not terminate the boot process. Only warnings will be
logged to the console instead.

Add a comp_len variable to store the length of a segment in
zipl_load_segment. comp_len variable is necessary to store the
calculated segment length and is used during signature verification.
Return the length on success, or a negative return code on failure.

Secure IPL in audit mode requires at least one certificate provided in
the key store along with necessary facilities (Secure IPL Facility,
Certificate Store Facility and secure IPL extension support).

Note: Secure IPL in audit mode is implemented for the SCSI scheme of
virtio-blk/virtio-scsi devices.

Signed-off-by: Zhuoying Cai 
---
 pc-bios/s390-ccw/bootmap.c  | 344 +++-
 pc-bios/s390-ccw/bootmap.h  |   9 +
 pc-bios/s390-ccw/iplb.h |  68 +++
 pc-bios/s390-ccw/main.c |   9 +
 pc-bios/s390-ccw/s390-ccw.h |  10 ++
 pc-bios/s390-ccw/sclp.c |  43 +
 pc-bios/s390-ccw/sclp.h |   6 +
 7 files changed, 486 insertions(+), 3 deletions(-)

diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c
index 4fb3e99f4b..bdbd6ccd96 100644
--- a/pc-bios/s390-ccw/bootmap.c
+++ b/pc-bios/s390-ccw/bootmap.c
@@ -30,6 +30,13 @@
 
 /* Scratch space */
 static uint8_t sec[MAX_SECTOR_SIZE*4] __attribute__((__aligned__(PAGE_SIZE)));
+/* sector for storing certificates */
+static uint8_t certs_sec[CERT_MAX_SIZE * MAX_CERTIFICATES];
+/* sector for storing signatures */
+static uint8_t sig_sec[MAX_SECTOR_SIZE] 
__attribute__((__aligned__(PAGE_SIZE)));
+
+uint8_t vcb_data[MAX_SECTOR_SIZE * 4] __attribute__((__aligned__(PAGE_SIZE)));
+uint8_t vcssb_data[VCSSB_MAX_LEN] __attribute__((__aligned__(PAGE_SIZE)));
 
 const uint8_t el_torito_magic[] = "EL TORITO SPECIFICATION"
   "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
@@ -622,6 +629,7 @@ static int zipl_load_segment(ComponentEntry *entry, 
uint64_t address)
 int i;
 char err_msg[] = "zIPL failed to read BPRS at 0x";
 char *blk_no = &err_msg[30]; /* where to print blockno in (those ZZs) */
+int comp_len = 0;
 
 blockno = entry->data.blockno;
 
@@ -660,6 +668,9 @@ static int zipl_load_segment(ComponentEntry *entry, 
uint64_t address)
  */
 break;
 }
+
+comp_len += (uint64_t)bprs->size * ((uint64_t)bprs[i].blockct + 1);
+
 address = virtio_load_direct(cur_desc[0], cur_desc[1], 0,
  (void *)address);
 if (!address) {
@@ -669,6 +680,305 @@ static int zipl_load_segment(ComponentEntry *entry, 
uint64_t address)
 }
 } while (blockno);
 
+return comp_len;
+}
+
+int get_vcssb(VerificationCertificateStorageSizeBlock *vcssb)
+{
+int rc;
+
+/* avoid retrieving vcssb multiple times */
+if (vcssb->length == VCSSB_MAX_LEN) {
+return 0;
+}
+
+rc = diag320(vcssb, DIAG_320_SUBC_QUERY_VCSI);
+if (rc != DIAG_320_RC_OK) {
+return -1;
+}
+
+return 0;
+}
+
+static inline uint32_t request_certificate(uint64_t *cert, uint8_t index)
+{
+VerificationCertificateStorageSizeBlock *vcssb;
+VerficationCertificateBlock *vcb;
+VerificationCertificateEntry *vce;
+uint64_t rc = 0;
+uint32_t cert_len = 0;
+
+vcssb = (VerificationCertificateStorageSizeBlock *)vcssb_data;
+vcb = (VerficationCertificateBlock *)vcb_data;
+
+/* Get Verification Certificate Storage Size block with DIAG320 subcode 1 
*/
+if (get_vcssb(vcssb)) {
+return 0;
+}
+
+/*
+ * Request single entry
+ * Fill input fields of single-entry VCB
+ */
+vcb->vcb_hdr.vcbinlen = ROUND_UP(vcssb->largestvcblen, PAGE_SIZE);
+vcb->vcb_hdr.fvci = index + 1;
+vcb->vcb_hdr.lvci = index + 1;
+
+rc = diag320(vcb, DIAG_320_SUBC_STORE_VC);
+if (rc == DIAG_320_RC_OK) {
+vce = (VerificationCertificateEntry *)vcb->vcb_buf;
+cert_len = vce->vce_hdr.certlen;
+memcpy(cert, (uint8_t *)vce + vce->vce_hdr.certoffset, 
vce->vce_hdr.certlen);
+/* clear out region for next cert(s) */
+memcpy(vcb_data, 0, sizeof(vcb_data));
+}
+
+return cert_len;
+}
+
+static int cert_table_add(uint64_t **cert_table, uint64_t **cert,
+uint64_t cert_len, uint8_t cert_idx)
+{
+if (request_certificate(*cert, cert_idx)) {
+/* save certificate address to cert_table */
+cert_table[cert_idx] = *cert;
+/* update cert address for the next certificate */
+*cert += cert_len;
+} else {
+puts("Could not get certificate");
+return -1;
+}
+
+return 0;
+}
+
+static void cert_list_add(IplSignatureCertificateList *certs, int cert_index,
+   uint64_t *cert, uint64_t cert_len)
+{
+if (cert_index > MAX_CERTIFICATES - 1) {
+printf("Warning: Ignoring cert entr

[PATCH v1 20/24] Add -secure-boot on|off option in QEMU command line

2025-04-08 Thread Zhuoying Cai
The `-secure-boot on|off` command line option is implemented
to enable secure IPL.

By default, -secure-boot is set to false if not specified in
the command line.

Signed-off-by: Zhuoying Cai 
---
 qemu-options.hx |  8 
 system/vl.c | 21 +
 2 files changed, 29 insertions(+)

diff --git a/qemu-options.hx b/qemu-options.hx
index b460c63490..02d2f4d513 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1262,6 +1262,14 @@ SRST
 A colon may be used to delineate multiple paths.
 ERST
 
+DEF("secure-boot", HAS_ARG, QEMU_OPTION_secure_boot,
+"-secure-boot on|off\n"
+" enable/disable secure boot\n", QEMU_ARCH_S390X)
+SRST
+``-secure-boot on|off``
+Enable/disable secure boot. Default is off.
+ERST
+
 DEFHEADING()
 
 DEFHEADING(Block device options:)
diff --git a/system/vl.c b/system/vl.c
index bd6197c887..5bdc35516c 100644
--- a/system/vl.c
+++ b/system/vl.c
@@ -524,6 +524,19 @@ static QemuOptsList qemu_boot_certificates_opts = {
 },
 };
 
+static QemuOptsList qemu_secure_boot_opts = {
+.name = "secure-boot",
+.implied_opt_name = "secure-boot",
+.head = QTAILQ_HEAD_INITIALIZER(qemu_secure_boot_opts.head),
+.desc = {
+{
+.name = "secure-boot",
+.type = QEMU_OPT_BOOL,
+},
+{ /* end of list */ }
+},
+};
+
 const char *qemu_get_vm_name(void)
 {
 return qemu_name;
@@ -2894,6 +2907,7 @@ void qemu_init(int argc, char **argv)
 qemu_add_opts(&qemu_fw_cfg_opts);
 qemu_add_opts(&qemu_action_opts);
 qemu_add_opts(&qemu_boot_certificates_opts);
+qemu_add_opts(&qemu_secure_boot_opts);
 qemu_add_run_with_opts();
 module_call_init(MODULE_INIT_OPTS);
 
@@ -3046,6 +3060,13 @@ void qemu_init(int argc, char **argv)
 exit(1);
 }
 break;
+case QEMU_OPTION_secure_boot:
+opts = qemu_opts_parse_noisily(qemu_find_opts("secure-boot"),
+   optarg, true);
+if (!opts) {
+exit(1);
+}
+break;
 case QEMU_OPTION_fda:
 case QEMU_OPTION_fdb:
 drive_add(IF_FLOPPY, popt->index - QEMU_OPTION_fda,
-- 
2.49.0




[PATCH v1 14/24] s390x: Guest support for Secure-IPL Facility

2025-04-08 Thread Zhuoying Cai
Introduce Secure-IPL (SIPL) facility.

Use the abbreviation CBL (Consolidated-Boot-Loader facility at bit 0 of
byte 136) to represent bytes 136 and 137 for IPL device facilities of the
SCLP Read Info block.

Availability of SIPL facility is determined by byte 136 bit 1 of the
SCLP Read Info block.

When SIPL facility is installed, the IPL Parameter Block length must
contains value that is multiple of 8 bytes.

Signed-off-by: Zhuoying Cai 
---
 hw/s390x/sclp.c | 2 ++
 include/hw/s390x/sclp.h | 4 +++-
 target/s390x/cpu_features.c | 3 +++
 target/s390x/cpu_features.h | 1 +
 target/s390x/cpu_features_def.h.inc | 3 +++
 target/s390x/cpu_models.c   | 2 ++
 target/s390x/gen-features.c | 1 +
 target/s390x/kvm/kvm.c  | 3 +++
 8 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c
index 5945c9b1d8..bab65955b7 100644
--- a/hw/s390x/sclp.c
+++ b/hw/s390x/sclp.c
@@ -145,6 +145,8 @@ static void read_SCP_info(SCLPDevice *sclp, SCCB *sccb)
 if (s390_has_feat(S390_FEAT_EXTENDED_LENGTH_SCCB)) {
 s390_get_feat_block(S390_FEAT_TYPE_SCLP_FAC134,
 &read_info->fac134);
+s390_get_feat_block(S390_FEAT_TYPE_SCLP_CBL,
+read_info->cbl);
 }
 
 read_info->facilities = cpu_to_be64(SCLP_HAS_CPU_INFO |
diff --git a/include/hw/s390x/sclp.h b/include/hw/s390x/sclp.h
index d32f6180e0..97af95a78d 100644
--- a/include/hw/s390x/sclp.h
+++ b/include/hw/s390x/sclp.h
@@ -136,7 +136,9 @@ typedef struct ReadInfo {
 uint32_t hmfai;
 uint8_t  _reserved7[134 - 128]; /* 128-133 */
 uint8_t  fac134;
-uint8_t  _reserved8[144 - 135]; /* 135-143 */
+uint8_t  _reserved8;
+uint8_t  cbl[2];/* 136-137 */
+uint8_t  _reserved9[144 - 137]; /* 138-143 */
 struct CPUEntry entries[];
 /*
  * When the Extended-Length SCCB (ELS) feature is enabled the
diff --git a/target/s390x/cpu_features.c b/target/s390x/cpu_features.c
index 99089ab3f5..e9371569cc 100644
--- a/target/s390x/cpu_features.c
+++ b/target/s390x/cpu_features.c
@@ -149,6 +149,9 @@ void s390_fill_feat_block(const S390FeatBitmap features, 
S390FeatType type,
 clear_be_bit(s390_feat_def(S390_FEAT_DIAG_318)->bit, data);
 clear_be_bit(s390_feat_def(S390_FEAT_DIAG_320)->bit, data);
 break;
+case S390_FEAT_TYPE_SCLP_CBL:
+clear_be_bit(s390_feat_def(S390_FEAT_SIPL)->bit, data);
+break;
 default:
 return;
 }
diff --git a/target/s390x/cpu_features.h b/target/s390x/cpu_features.h
index 5635839d03..823fcf8b90 100644
--- a/target/s390x/cpu_features.h
+++ b/target/s390x/cpu_features.h
@@ -24,6 +24,7 @@ typedef enum {
 S390_FEAT_TYPE_SCLP_CONF_CHAR,
 S390_FEAT_TYPE_SCLP_CONF_CHAR_EXT,
 S390_FEAT_TYPE_SCLP_FAC134,
+S390_FEAT_TYPE_SCLP_CBL,
 S390_FEAT_TYPE_SCLP_CPU,
 S390_FEAT_TYPE_MISC,
 S390_FEAT_TYPE_PLO,
diff --git a/target/s390x/cpu_features_def.h.inc 
b/target/s390x/cpu_features_def.h.inc
index 65d38f546d..f874b9da6f 100644
--- a/target/s390x/cpu_features_def.h.inc
+++ b/target/s390x/cpu_features_def.h.inc
@@ -140,6 +140,9 @@ DEF_FEAT(SIE_IBS, "ibs", SCLP_CONF_CHAR_EXT, 10, "SIE: 
Interlock-and-broadcast-s
 DEF_FEAT(DIAG_318, "diag318", SCLP_FAC134, 0, "Control program name and 
version codes")
 DEF_FEAT(DIAG_320, "diag320", SCLP_FAC134, 5, "Provide Certificate Store 
functions")
 
+/* Features exposed via SCLP SCCB Facilities byte 136 - 137 (bit numbers 
relative to byte-136) */
+DEF_FEAT(SIPL, "sipl", SCLP_CBL, 1, "Seucre-IPL facility")
+
 /* Features exposed via SCLP CPU info. */
 DEF_FEAT(SIE_F2, "sief2", SCLP_CPU, 4, "SIE: interception format 2 (Virtual 
SIE)")
 DEF_FEAT(SIE_SKEY, "skey", SCLP_CPU, 5, "SIE: Storage-key facility")
diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
index 7d65c40bd1..a83c27dcb8 100644
--- a/target/s390x/cpu_models.c
+++ b/target/s390x/cpu_models.c
@@ -263,6 +263,7 @@ bool s390_has_feat(S390Feat feat)
 case S390_FEAT_SIE_CMMA:
 case S390_FEAT_SIE_PFMFI:
 case S390_FEAT_SIE_IBS:
+case S390_FEAT_SIPL:
 case S390_FEAT_CONFIGURATION_TOPOLOGY:
 return false;
 break;
@@ -507,6 +508,7 @@ static void check_consistency(const S390CPUModel *model)
 { S390_FEAT_AP_QUEUE_INTERRUPT_CONTROL, S390_FEAT_AP },
 { S390_FEAT_DIAG_318, S390_FEAT_EXTENDED_LENGTH_SCCB },
 { S390_FEAT_DIAG_320, S390_FEAT_EXTENDED_LENGTH_SCCB },
+{ S390_FEAT_SIPL, S390_FEAT_EXTENDED_LENGTH_SCCB },
 { S390_FEAT_NNPA, S390_FEAT_VECTOR },
 { S390_FEAT_RDP, S390_FEAT_LOCAL_TLB_CLEARING },
 { S390_FEAT_UV_FEAT_AP, S390_FEAT_AP },
diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c
index 52c649adcd..d973efbf72 100644
--- a/target/s390x/gen-features.c
+++ b/target/s390x/gen-features.c
@@ -697,6 +697,7 @@ static uint16_t full

[PATCH v1 03/24] s390x: Guest support for Certificate Store Facility (CS)

2025-04-08 Thread Zhuoying Cai
DIAG 320 is supported when the certificate-store (CS) facility
is installed.

Availability of CS facility is determined by byte 134 bit 5 of the
SCLP Read Info block.

Signed-off-by: Zhuoying Cai 
---
 target/s390x/cpu_features.c | 1 +
 target/s390x/cpu_features_def.h.inc | 1 +
 target/s390x/cpu_models.c   | 2 ++
 target/s390x/gen-features.c | 1 +
 target/s390x/kvm/kvm.c  | 2 ++
 5 files changed, 7 insertions(+)

diff --git a/target/s390x/cpu_features.c b/target/s390x/cpu_features.c
index 4b5be6798e..99089ab3f5 100644
--- a/target/s390x/cpu_features.c
+++ b/target/s390x/cpu_features.c
@@ -147,6 +147,7 @@ void s390_fill_feat_block(const S390FeatBitmap features, 
S390FeatType type,
 break;
 case S390_FEAT_TYPE_SCLP_FAC134:
 clear_be_bit(s390_feat_def(S390_FEAT_DIAG_318)->bit, data);
+clear_be_bit(s390_feat_def(S390_FEAT_DIAG_320)->bit, data);
 break;
 default:
 return;
diff --git a/target/s390x/cpu_features_def.h.inc 
b/target/s390x/cpu_features_def.h.inc
index e23e603a79..65d38f546d 100644
--- a/target/s390x/cpu_features_def.h.inc
+++ b/target/s390x/cpu_features_def.h.inc
@@ -138,6 +138,7 @@ DEF_FEAT(SIE_IBS, "ibs", SCLP_CONF_CHAR_EXT, 10, "SIE: 
Interlock-and-broadcast-s
 
 /* Features exposed via SCLP SCCB Facilities byte 134 (bit numbers relative to 
byte-134) */
 DEF_FEAT(DIAG_318, "diag318", SCLP_FAC134, 0, "Control program name and 
version codes")
+DEF_FEAT(DIAG_320, "diag320", SCLP_FAC134, 5, "Provide Certificate Store 
functions")
 
 /* Features exposed via SCLP CPU info. */
 DEF_FEAT(SIE_F2, "sief2", SCLP_CPU, 4, "SIE: interception format 2 (Virtual 
SIE)")
diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
index 93a05e43d7..7d65c40bd1 100644
--- a/target/s390x/cpu_models.c
+++ b/target/s390x/cpu_models.c
@@ -248,6 +248,7 @@ bool s390_has_feat(S390Feat feat)
 if (s390_is_pv()) {
 switch (feat) {
 case S390_FEAT_DIAG_318:
+case S390_FEAT_DIAG_320:
 case S390_FEAT_HPMA2:
 case S390_FEAT_SIE_F2:
 case S390_FEAT_SIE_SKEY:
@@ -505,6 +506,7 @@ static void check_consistency(const S390CPUModel *model)
 { S390_FEAT_PTFF_STOUE, S390_FEAT_MULTIPLE_EPOCH },
 { S390_FEAT_AP_QUEUE_INTERRUPT_CONTROL, S390_FEAT_AP },
 { S390_FEAT_DIAG_318, S390_FEAT_EXTENDED_LENGTH_SCCB },
+{ S390_FEAT_DIAG_320, S390_FEAT_EXTENDED_LENGTH_SCCB },
 { S390_FEAT_NNPA, S390_FEAT_VECTOR },
 { S390_FEAT_RDP, S390_FEAT_LOCAL_TLB_CLEARING },
 { S390_FEAT_UV_FEAT_AP, S390_FEAT_AP },
diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c
index 41840677ce..52c649adcd 100644
--- a/target/s390x/gen-features.c
+++ b/target/s390x/gen-features.c
@@ -696,6 +696,7 @@ static uint16_t full_GEN14_GA1[] = {
 S390_FEAT_HPMA2,
 S390_FEAT_SIE_KSS,
 S390_FEAT_GROUP_MULTIPLE_EPOCH_PTFF,
+S390_FEAT_DIAG_320,
 };
 
 #define full_GEN14_GA2 EmptyFeat
diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c
index 4d56e653dd..d07ca879a3 100644
--- a/target/s390x/kvm/kvm.c
+++ b/target/s390x/kvm/kvm.c
@@ -2487,6 +2487,8 @@ bool kvm_s390_get_host_cpu_model(S390CPUModel *model, 
Error **errp)
 set_bit(S390_FEAT_DIAG_318, model->features);
 }
 
+set_bit(S390_FEAT_DIAG_320, model->features);
+
 /* Test for Ultravisor features that influence secure guest behavior */
 query_uv_feat_guest(model->features);
 
-- 
2.49.0




[PATCH v1 07/24] s390x/diag: Implement DIAG 320 subcode 2

2025-04-08 Thread Zhuoying Cai
DIAG 320 subcode 2 provides certificates that are in the
certificate store.

The subcode value is denoted by setting the second-left-most bit
of an 8-byte field.

The verification-certificate-block (VCB) contains the output data
when the operation completes successfully. VCB includes a common
header followed by zero or more verification-certificate entries (VCEs).

Signed-off-by: Zhuoying Cai 
---
 include/hw/s390x/ipl/diag320.h |  59 +
 target/s390x/diag.c| 227 -
 2 files changed, 285 insertions(+), 1 deletion(-)

diff --git a/include/hw/s390x/ipl/diag320.h b/include/hw/s390x/ipl/diag320.h
index ded336df25..32b6914b3b 100644
--- a/include/hw/s390x/ipl/diag320.h
+++ b/include/hw/s390x/ipl/diag320.h
@@ -14,15 +14,24 @@
 
 #define DIAG_320_SUBC_QUERY_ISM 0
 #define DIAG_320_SUBC_QUERY_VCSI1
+#define DIAG_320_SUBC_STORE_VC  2
 
 #define DIAG_320_RC_OK  0x0001
 #define DIAG_320_RC_NOMEM   0x0202
+#define DIAG_320_RC_INVAL_VCB_LEN   0x0204
+#define DIAG_320_RC_BAD_RANGE   0x0302
 
 #define VCSSB_MAX_LEN   128
 #define VCE_HEADER_LEN  128
 #define VCB_HEADER_LEN  64
 
 #define DIAG_320_ISM_QUERY_VCSI 0x4000
+#define DIAG_320_ISM_STORE_VC   0x2000
+
+#define DIAG_320_VCE_FLAGS_VALID0x80
+#define DIAG_320_VCE_KEYTYPE_SELF_DESCRIBING0
+#define DIAG_320_VCE_FORMAT_X509_DER1
+#define DIAG_320_VCE_HASHTYPE_SHA2_256  1
 
 struct VerificationCertificateStorageSizeBlock {
 uint32_t length;
@@ -41,4 +50,54 @@ struct VerificationCertificateStorageSizeBlock {
 typedef struct VerificationCertificateStorageSizeBlock \
 VerificationCertificateStorageSizeBlock;
 
+struct vcb_header {
+uint32_t vcbinlen;
+uint32_t reserved0;
+uint16_t fvci;
+uint16_t lvci;
+uint32_t reserved1;
+uint32_t cstoken;
+uint32_t reserved2[3];
+uint32_t vcboutlen;
+uint8_t reserved3[3];
+uint8_t version;
+uint16_t svcc;
+uint16_t rvcc;
+uint32_t reserved4[5];
+} QEMU_PACKED;
+typedef struct vcb_header vcb_header;
+
+struct VerficationCertificateBlock {
+vcb_header vcb_hdr;
+uint8_t vcb_buf[];
+} QEMU_PACKED;
+typedef struct VerficationCertificateBlock VerficationCertificateBlock;
+
+struct vce_header {
+uint32_t len;
+uint8_t flags;
+uint8_t keytype;
+uint16_t certidx;
+uint32_t name[16];
+uint8_t format;
+uint8_t reserved0;
+uint16_t keyidlen;
+uint8_t reserved1;
+uint8_t hashtype;
+uint16_t hashlen;
+uint32_t reserved2;
+uint32_t certlen;
+uint32_t reserved3[2];
+uint16_t hashoffset;
+uint16_t certoffset;
+uint32_t reserved4[7];
+} QEMU_PACKED;
+typedef struct vce_header vce_header;
+
+struct VerificationCertificateEntry {
+vce_header vce_hdr;
+uint8_t cert_data_buf[];
+} QEMU_PACKED;
+typedef struct VerificationCertificateEntry VerificationCertificateEntry;
+
 #endif
diff --git a/target/s390x/diag.c b/target/s390x/diag.c
index cc639819ec..82e4dc9e1e 100644
--- a/target/s390x/diag.c
+++ b/target/s390x/diag.c
@@ -17,6 +17,7 @@
 #include "s390x-internal.h"
 #include "hw/watchdog/wdt_diag288.h"
 #include "system/cpus.h"
+#include "hw/s390x/cert-store.h"
 #include "hw/s390x/ipl.h"
 #include "hw/s390x/s390-virtio-ccw.h"
 #include "system/kvm.h"
@@ -191,6 +192,94 @@ out:
 }
 }
 
+#ifdef CONFIG_GNUTLS
+static bool diag_320_is_cert_valid(gnutls_x509_crt_t cert)
+{
+time_t now;
+
+if (gnutls_x509_crt_get_version(cert) < 0) {
+return false;
+}
+
+now = time(0);
+if (!((gnutls_x509_crt_get_activation_time(cert) < now) &&
+ (gnutls_x509_crt_get_expiration_time(cert) > now))) {
+return false;
+}
+
+return true;
+}
+#endif /* CONFIG_GNUTLS */
+
+static int diag_320_get_cert_info(VerificationCertificateEntry *vce,
+ S390IPLCertificate qcert, bool *is_valid,
+ unsigned char **key_id_data, void **hash_data)
+{
+#ifdef CONFIG_GNUTLS
+unsigned int algo;
+unsigned int bits;
+int hash_type;
+int rc;
+
+gnutls_x509_crt_t g_cert = NULL;
+if (g_init_cert((uint8_t *)qcert.raw, qcert.size, &g_cert)) {
+return -1;
+}
+
+/* VCE flag (validity) */
+*is_valid = diag_320_is_cert_valid(g_cert);
+
+/* key-type */
+algo = gnutls_x509_crt_get_pk_algorithm(g_cert, &bits);
+if (algo == GNUTLS_PK_RSA) {
+vce->vce_hdr.keytype = DIAG_320_VCE_KEYTYPE_SELF_DESCRIBING;
+}
+
+/* VC format */
+if (qcert.format == GNUTLS_X509_FMT_DER) {
+vce->vce_hdr.format = DIAG_320_VCE_FORMAT_X509_DER;
+}
+
+/* key id and key id len */
+*key_id_data = g_malloc0(qcert.key_id_size);
+rc = gnutls_x509_crt_get_key_id(g_cert, GNUTLS_KEYID_USE_SHA256,
+*key_id_data, &qcert.key_id_size);
+if (rc < 0) {
+error_report("Fail to retrieve certificate key ID");
+   

[PATCH v1 11/24] pc-bios/s390-ccw: Define memory for IPLB and convert IPLB to pointers

2025-04-08 Thread Zhuoying Cai
This patch is necessary because of the architectural design of
IPL Parameter Block (IPLB) and IPL Information Report Block (IIRB).
IIRB will be introduced in the next patch.

Define a memory space for both IPL Parameter Block (IPLB) and
IPL Information Report Block (IIRB) since IIRB is stored immediately
following IPLB.

Convert IPLB to pointer and it points to the start of the defined memory space.
IIRB points to the end of IPLB.

Signed-off-by: Zhuoying Cai 
---
 pc-bios/s390-ccw/iplb.h | 12 ++--
 pc-bios/s390-ccw/jump2ipl.c |  6 +++---
 pc-bios/s390-ccw/main.c | 34 +++---
 pc-bios/s390-ccw/netmain.c  |  8 
 4 files changed, 36 insertions(+), 24 deletions(-)

diff --git a/pc-bios/s390-ccw/iplb.h b/pc-bios/s390-ccw/iplb.h
index bdbc733e16..11302e004d 100644
--- a/pc-bios/s390-ccw/iplb.h
+++ b/pc-bios/s390-ccw/iplb.h
@@ -20,7 +20,7 @@
 #include 
 
 extern QemuIplParameters qipl;
-extern IplParameterBlock iplb __attribute__((__aligned__(PAGE_SIZE)));
+extern IplParameterBlock *iplb;
 extern bool have_iplb;
 
 struct IplInfoReportBlockHeader {
@@ -85,6 +85,14 @@ struct IplInfoReportBlock {
 } __attribute__ ((packed));
 typedef struct IplInfoReportBlock IplInfoReportBlock;
 
+struct IplBlocks {
+IplParameterBlock   iplb;
+IplInfoReportBlock  iirb;
+} __attribute__ ((packed));
+typedef struct IplBlocks IplBlocks;
+
+extern IplBlocks ipl_data __attribute__((__aligned__(PAGE_SIZE)));
+
 #define S390_IPL_TYPE_FCP 0x00
 #define S390_IPL_TYPE_CCW 0x02
 #define S390_IPL_TYPE_QEMU_SCSI 0xff
@@ -127,7 +135,7 @@ static inline bool load_next_iplb(void)
 
 qipl.index++;
 next_iplb = (IplParameterBlock *) qipl.next_iplb;
-memcpy(&iplb, next_iplb, sizeof(IplParameterBlock));
+memcpy(iplb, next_iplb, sizeof(IplParameterBlock));
 
 qipl.chain_len--;
 qipl.next_iplb = qipl.next_iplb + sizeof(IplParameterBlock);
diff --git a/pc-bios/s390-ccw/jump2ipl.c b/pc-bios/s390-ccw/jump2ipl.c
index 86321d0f46..fa2ca5cbe1 100644
--- a/pc-bios/s390-ccw/jump2ipl.c
+++ b/pc-bios/s390-ccw/jump2ipl.c
@@ -43,11 +43,11 @@ int jump_to_IPL_code(uint64_t address)
  * The IPLB for QEMU SCSI type devices must be rebuilt during re-ipl. The
  * iplb.devno is set to the boot position of the target SCSI device.
  */
-if (iplb.pbt == S390_IPL_TYPE_QEMU_SCSI) {
-iplb.devno = qipl.index;
+if (iplb->pbt == S390_IPL_TYPE_QEMU_SCSI) {
+iplb->devno = qipl.index;
 }
 
-if (have_iplb && !set_iplb(&iplb)) {
+if (have_iplb && !set_iplb(iplb)) {
 panic("Failed to set IPLB");
 }
 
diff --git a/pc-bios/s390-ccw/main.c b/pc-bios/s390-ccw/main.c
index 76bf743900..c9328f1c51 100644
--- a/pc-bios/s390-ccw/main.c
+++ b/pc-bios/s390-ccw/main.c
@@ -22,7 +22,9 @@
 static SubChannelId blk_schid = { .one = 1 };
 static char loadparm_str[LOADPARM_LEN + 1];
 QemuIplParameters qipl;
-IplParameterBlock iplb __attribute__((__aligned__(PAGE_SIZE)));
+/* Ensure that IPLB and IIRB are page aligned and sequential in memory */
+IplBlocks ipl_data;
+IplParameterBlock *iplb;
 bool have_iplb;
 static uint16_t cutype;
 LowCore *lowcore; /* Yes, this *is* a pointer to address 0 */
@@ -51,7 +53,7 @@ void write_subsystem_identification(void)
 void write_iplb_location(void)
 {
 if (cutype == CU_TYPE_VIRTIO && virtio_get_device_type() != VIRTIO_ID_NET) 
{
-lowcore->ptr_iplb = ptr2u32(&iplb);
+lowcore->ptr_iplb = ptr2u32(iplb);
 }
 }
 
@@ -162,7 +164,7 @@ static void menu_setup(void)
 return;
 }
 
-switch (iplb.pbt) {
+switch (iplb->pbt) {
 case S390_IPL_TYPE_CCW:
 case S390_IPL_TYPE_QEMU_SCSI:
 menu_set_parms(qipl.qipl_flags & BOOT_MENU_FLAG_MASK,
@@ -191,8 +193,8 @@ static void boot_setup(void)
 {
 char lpmsg[] = "LOADPARM=[]\n";
 
-if (have_iplb && memcmp(iplb.loadparm, NO_LOADPARM, LOADPARM_LEN) != 0) {
-ebcdic_to_ascii((char *) iplb.loadparm, loadparm_str, LOADPARM_LEN);
+if (have_iplb && memcmp(iplb->loadparm, NO_LOADPARM, LOADPARM_LEN) != 0) {
+ebcdic_to_ascii((char *) iplb->loadparm, loadparm_str, LOADPARM_LEN);
 } else {
 sclp_get_loadparm_ascii(loadparm_str);
 }
@@ -216,21 +218,21 @@ static bool find_boot_device(void)
 VDev *vdev = virtio_get_device();
 bool found = false;
 
-switch (iplb.pbt) {
+switch (iplb->pbt) {
 case S390_IPL_TYPE_CCW:
 vdev->scsi_device_selected = false;
-debug_print_int("device no. ", iplb.ccw.devno);
-blk_schid.ssid = iplb.ccw.ssid & 0x3;
+debug_print_int("device no. ", iplb->ccw.devno);
+blk_schid.ssid = iplb->ccw.ssid & 0x3;
 debug_print_int("ssid ", blk_schid.ssid);
-found = find_subch(iplb.ccw.devno);
+found = find_subch(iplb->ccw.devno);
 break;
 case S390_IPL_TYPE_QEMU_SCSI:
 vdev->scsi_device_selected = true;
-vdev->selected_scsi_device.channel = iplb.scsi.channel;
-vdev->selected_scsi_

[PATCH v1 23/24] pc-bios/s390-ccw: Handle secure boot with multiple boot devices

2025-04-08 Thread Zhuoying Cai
The current approach to enabling secure boot relies on providing
-secure-boot and -boot-certificates options, which apply to all boot
devices.

With the possibility of multiple boot devices, secure boot expects all
provided devices to be supported and eligible (e.g.,
virtio-blk/virtio-scsi using the SCSI scheme).

If multiple boot devices are provided and include an unsupported (e.g.,
ECKD, VFIO) or a non-eligible (e.g., Net) device, the boot process will
terminate with an error logged to the console.

Signed-off-by: Zhuoying Cai 
---
 pc-bios/s390-ccw/bootmap.c  | 28 +-
 pc-bios/s390-ccw/main.c | 74 ++---
 pc-bios/s390-ccw/s390-ccw.h |  1 +
 3 files changed, 88 insertions(+), 15 deletions(-)

diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c
index a22061e1ad..285aae114f 100644
--- a/pc-bios/s390-ccw/bootmap.c
+++ b/pc-bios/s390-ccw/bootmap.c
@@ -1713,23 +1713,35 @@ int zipl_mode(void)
 }
 }
 
+int zipl_check_scsi_mbr_magic(void)
+{
+ScsiMbr *mbr = (void *)sec;
+
+/* Grab the MBR */
+memset(sec, FREE_SPACE_FILLER, sizeof(sec));
+if (virtio_read(0, mbr)) {
+puts("Cannot read block 0");
+return -EIO;
+}
+
+if (!magic_match(mbr->magic, ZIPL_MAGIC)) {
+return -1;
+}
+
+return 0;
+}
+
 void zipl_load(void)
 {
 VDev *vdev = virtio_get_device();
 
 if (vdev->is_cdrom) {
-if (boot_mode == ZIPL_SECURE_AUDIT_MODE || boot_mode == 
ZIPL_SECURE_MODE) {
-panic("Secure boot from ISO image is not supported!");
-}
 ipl_iso_el_torito();
 puts("Failed to IPL this ISO image!");
 return;
 }
 
 if (virtio_get_device_type() == VIRTIO_ID_NET) {
-if (boot_mode == ZIPL_SECURE_AUDIT_MODE || boot_mode == 
ZIPL_SECURE_MODE) {
-panic("Virtio net boot device does not support secure boot!");
-}
 netmain();
 puts("Failed to IPL from this network!");
 return;
@@ -1740,10 +1752,6 @@ void zipl_load(void)
 return;
 }
 
-if (boot_mode == ZIPL_SECURE_AUDIT_MODE || boot_mode == ZIPL_SECURE_MODE) {
-panic("ECKD boot device does not support secure boot!");
-}
-
 switch (virtio_get_device_type()) {
 case VIRTIO_ID_BLOCK:
 zipl_load_vblk();
diff --git a/pc-bios/s390-ccw/main.c b/pc-bios/s390-ccw/main.c
index 6189a5a7ba..30cb470ee4 100644
--- a/pc-bios/s390-ccw/main.c
+++ b/pc-bios/s390-ccw/main.c
@@ -271,8 +271,43 @@ static int virtio_setup(void)
 return ret;
 }
 
-static void ipl_boot_device(void)
+static void validate_secure_boot_device(void)
+{
+switch (cutype) {
+case CU_TYPE_DASD_3990:
+case CU_TYPE_DASD_2107:
+panic("Passthrough (vfio) device does not support secure boot!");
+break;
+case CU_TYPE_VIRTIO:
+if (virtio_setup() == 0) {
+VDev *vdev = virtio_get_device();
+
+if (vdev->is_cdrom) {
+panic("Secure boot from ISO image is not supported!");
+}
+
+if (virtio_get_device_type() == VIRTIO_ID_NET) {
+panic("Virtio net boot device does not support secure boot!");
+}
+
+if (zipl_check_scsi_mbr_magic()) {
+panic("ECKD boot device does not support secure boot!");
+}
+}
+break;
+default:
+panic("Secure boot from unexpected device type is not supported!");
+}
+
+printf("SCSI boot device supports secure boot.\n");
+}
+
+static void check_secure_boot_support(void)
 {
+bool have_iplb_copy;
+IplParameterBlock *iplb_copy;
+QemuIplParameters *qipl_copy;
+
 if (boot_mode == 0) {
 boot_mode = zipl_mode();
 }
@@ -281,13 +316,40 @@ static void ipl_boot_device(void)
 panic("Need at least one certificate for secure boot!");
 }
 
+if (boot_mode == ZIPL_NORMAL_MODE) {
+return;
+}
+
+/*
+ * Store copies of have_iplb, iplb and qipl.
+ * They will be updated in load_next_iplb().
+ */
+have_iplb_copy = have_iplb;
+iplb_copy = malloc(sizeof(IplParameterBlock));
+qipl_copy = malloc(sizeof(QemuIplParameters));
+
+memcpy(qipl_copy, &qipl, sizeof(QemuIplParameters));
+memcpy(iplb_copy, iplb, sizeof(IplParameterBlock));
+
+while (have_iplb_copy) {
+if (have_iplb_copy && find_boot_device()) {
+validate_secure_boot_device();
+}
+have_iplb_copy = load_next_iplb();
+}
+
+memcpy(&qipl, qipl_copy, sizeof(QemuIplParameters));
+memcpy(iplb, iplb_copy, sizeof(IplParameterBlock));
+
+free(qipl_copy);
+free(iplb_copy);
+}
+
+static void ipl_boot_device(void)
+{
 switch (cutype) {
 case CU_TYPE_DASD_3990:
 case CU_TYPE_DASD_2107:
-if (boot_mode == ZIPL_SECURE_AUDIT_MODE || boot_mode == 
ZIPL_SECURE_MODE) {
-panic("Passthrough (vfio) device does not support secure boot!");
-}
-
 dasd_ipl(blk_schi

[PATCH v1 15/24] pc-bios/s390-ccw: Refactor zipl_run()

2025-04-08 Thread Zhuoying Cai
Refactor to enhance readability before enabling secure IPL in later
patches.

Signed-off-by: Zhuoying Cai 
---
 pc-bios/s390-ccw/bootmap.c | 58 ++
 1 file changed, 34 insertions(+), 24 deletions(-)

diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c
index 0f8baa0198..485b55f1bf 100644
--- a/pc-bios/s390-ccw/bootmap.c
+++ b/pc-bios/s390-ccw/bootmap.c
@@ -674,6 +674,38 @@ static int zipl_load_segment(ComponentEntry *entry)
 return 0;
 }
 
+static int zipl_run_normal(ComponentEntry *entry, uint8_t *tmp_sec)
+{
+while (entry->component_type == ZIPL_COMP_ENTRY_LOAD ||
+entry->component_type == ZIPL_COMP_ENTRY_SIGNATURE) {
+
+/* Secure boot is off, so we skip signature entries */
+if (entry->component_type == ZIPL_COMP_ENTRY_SIGNATURE) {
+entry++;
+continue;
+}
+
+if (zipl_load_segment(entry)) {
+return -1;
+}
+
+entry++;
+
+if ((uint8_t *)(&entry[1]) > (tmp_sec + MAX_SECTOR_SIZE)) {
+puts("Wrong entry value");
+return -EINVAL;
+}
+}
+
+if (entry->component_type != ZIPL_COMP_ENTRY_EXEC) {
+puts("No EXEC entry");
+return -EINVAL;
+}
+
+write_reset_psw(entry->compdat.load_psw);
+return 0;
+}
+
 /* Run a zipl program */
 static int zipl_run(ScsiBlockPtr *pte)
 {
@@ -700,34 +732,12 @@ static int zipl_run(ScsiBlockPtr *pte)
 
 /* Load image(s) into RAM */
 entry = (ComponentEntry *)(&header[1]);
-while (entry->component_type == ZIPL_COMP_ENTRY_LOAD ||
-   entry->component_type == ZIPL_COMP_ENTRY_SIGNATURE) {
-
-/* We don't support secure boot yet, so we skip signature entries */
-if (entry->component_type == ZIPL_COMP_ENTRY_SIGNATURE) {
-entry++;
-continue;
-}
-
-if (zipl_load_segment(entry)) {
-return -1;
-}
 
-entry++;
-
-if ((uint8_t *)(&entry[1]) > (tmp_sec + MAX_SECTOR_SIZE)) {
-puts("Wrong entry value");
-return -EINVAL;
-}
-}
-
-if (entry->component_type != ZIPL_COMP_ENTRY_EXEC) {
-puts("No EXEC entry");
-return -EINVAL;
+if (zipl_run_normal(entry, tmp_sec)) {
+return -1;
 }
 
 /* should not return */
-write_reset_psw(entry->compdat.load_psw);
 jump_to_IPL_code(0);
 return -1;
 }
-- 
2.49.0




[PATCH v1 13/24] hw/s390x/ipl: Set iplb->len to maximum length of IPL Parameter Block

2025-04-08 Thread Zhuoying Cai
The IPL Information Report Block (IIRB) immediately follows the IPL
Parameter Block (IPLB).

The IPLB struct is allocated 4KB in memory, and iplb->len indicates
the amount of memory currently used by the IPLB.

To ensure proper alignment of the IIRB and prevent overlap, set
iplb->len to the maximum length of the IPLB, allowing alignment
constraints to be determined based on its size.

Signed-off-by: Zhuoying Cai 
---
 hw/s390x/ipl.c | 6 +++---
 hw/s390x/ipl.h | 1 +
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c
index 59ec81181d..b646fcc74e 100644
--- a/hw/s390x/ipl.c
+++ b/hw/s390x/ipl.c
@@ -460,7 +460,7 @@ static bool s390_build_iplb(DeviceState *dev_st, 
IplParameterBlock *iplb)
 if (scsi_lp && strlen(scsi_lp) > 0) {
 lp = scsi_lp;
 }
-iplb->len = cpu_to_be32(S390_IPLB_MIN_QEMU_SCSI_LEN);
+iplb->len = cpu_to_be32(S390_IPLB_MAX_LEN);
 iplb->blk0_len =
 cpu_to_be32(S390_IPLB_MIN_QEMU_SCSI_LEN - 
S390_IPLB_HEADER_LEN);
 iplb->pbt = S390_IPL_TYPE_QEMU_SCSI;
@@ -471,14 +471,14 @@ static bool s390_build_iplb(DeviceState *dev_st, 
IplParameterBlock *iplb)
 iplb->scsi.ssid = ccw_dev->sch->ssid & 3;
 break;
 case CCW_DEVTYPE_VFIO:
-iplb->len = cpu_to_be32(S390_IPLB_MIN_CCW_LEN);
+iplb->len = cpu_to_be32(S390_IPLB_MAX_LEN);
 iplb->pbt = S390_IPL_TYPE_CCW;
 iplb->ccw.devno = cpu_to_be16(ccw_dev->sch->devno);
 iplb->ccw.ssid = ccw_dev->sch->ssid & 3;
 break;
 case CCW_DEVTYPE_VIRTIO_NET:
 case CCW_DEVTYPE_VIRTIO:
-iplb->len = cpu_to_be32(S390_IPLB_MIN_CCW_LEN);
+iplb->len = cpu_to_be32(S390_IPLB_MAX_LEN);
 iplb->blk0_len =
 cpu_to_be32(S390_IPLB_MIN_CCW_LEN - S390_IPLB_HEADER_LEN);
 iplb->pbt = S390_IPL_TYPE_CCW;
diff --git a/hw/s390x/ipl.h b/hw/s390x/ipl.h
index e9ef8ddccd..c05f238753 100644
--- a/hw/s390x/ipl.h
+++ b/hw/s390x/ipl.h
@@ -114,6 +114,7 @@ QEMU_BUILD_BUG_MSG(offsetof(S390IPLState, iplb) & 3, 
"alignment of iplb wrong");
 #define S390_IPLB_MIN_CCW_LEN 200
 #define S390_IPLB_MIN_FCP_LEN 384
 #define S390_IPLB_MIN_QEMU_SCSI_LEN 200
+#define S390_IPLB_MAX_LEN 4096
 
 static inline bool diag_parm_addr_valid(uint64_t addr, size_t size, bool write)
 {
-- 
2.49.0




[PATCH v1 04/24] s390x/diag: Introduce DIAG 320 for certificate store facility

2025-04-08 Thread Zhuoying Cai
From: Collin Walling 

DIAGNOSE 320 is introduced to support certificate store facility,
which includes operations such as query certificate storage
information and provide certificates in the certificate store.

Currently, only subcode 0 is supported with this patch, which is
used to query a bitmap of which subcodes are supported.

Signed-off-by: Zhuoying Cai 
---
 hw/s390x/ipl.h |  1 +
 include/hw/s390x/ipl/diag320.h | 19 ++
 target/s390x/diag.c| 36 ++
 target/s390x/kvm/kvm.c | 14 +
 target/s390x/s390x-internal.h  |  2 ++
 5 files changed, 72 insertions(+)
 create mode 100644 include/hw/s390x/ipl/diag320.h

diff --git a/hw/s390x/ipl.h b/hw/s390x/ipl.h
index 8c2a442255..1bd73b4dc1 100644
--- a/hw/s390x/ipl.h
+++ b/hw/s390x/ipl.h
@@ -17,6 +17,7 @@
 #include "cpu.h"
 #include "exec/address-spaces.h"
 #include "hw/qdev-core.h"
+#include "hw/s390x/ipl/diag320.h"
 #include "hw/s390x/ipl/qipl.h"
 #include "qom/object.h"
 
diff --git a/include/hw/s390x/ipl/diag320.h b/include/hw/s390x/ipl/diag320.h
new file mode 100644
index 00..d6f70c65df
--- /dev/null
+++ b/include/hw/s390x/ipl/diag320.h
@@ -0,0 +1,19 @@
+/*
+ * S/390 DIAGNOSE 320 definitions and structures
+ *
+ * Copyright 2025 IBM Corp.
+ * Author(s): Zhuoying Cai 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#ifndef S390X_DIAG320_H
+#define S390X_DIAG320_H
+
+#define DIAG_320_SUBC_QUERY_ISM 0
+
+#define DIAG_320_RC_OK  0x0001
+
+#endif
diff --git a/target/s390x/diag.c b/target/s390x/diag.c
index da44b0133e..cb840e4b97 100644
--- a/target/s390x/diag.c
+++ b/target/s390x/diag.c
@@ -192,3 +192,39 @@ out:
 break;
 }
 }
+
+void handle_diag_320(CPUS390XState *env, uint64_t r1, uint64_t r3, uintptr_t 
ra)
+{
+S390CPU *cpu = env_archcpu(env);
+uint64_t subcode = env->regs[r3];
+uint64_t addr = env->regs[r1];
+int rc;
+
+if (env->psw.mask & PSW_MASK_PSTATE) {
+s390_program_interrupt(env, PGM_PRIVILEGED, ra);
+return;
+}
+
+if (r1 & 1) {
+s390_program_interrupt(env, PGM_SPECIFICATION, ra);
+return;
+}
+
+switch (subcode) {
+case DIAG_320_SUBC_QUERY_ISM:
+uint64_t ism =  0;
+
+if (s390_cpu_virt_mem_write(cpu, addr, (uint8_t)r1, &ism,
+be64_to_cpu(sizeof(ism {
+s390_cpu_virt_mem_handle_exc(cpu, ra);
+return;
+}
+
+rc = DIAG_320_RC_OK;
+break;
+default:
+s390_program_interrupt(env, PGM_SPECIFICATION, ra);
+return;
+}
+env->regs[r1 + 1] = rc;
+}
diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c
index d07ca879a3..b013751478 100644
--- a/target/s390x/kvm/kvm.c
+++ b/target/s390x/kvm/kvm.c
@@ -98,6 +98,7 @@
 #define DIAG_TIMEREVENT 0x288
 #define DIAG_IPL0x308
 #define DIAG_SET_CONTROL_PROGRAM_CODES  0x318
+#define DIAG_CERT_STORE 0x320
 #define DIAG_KVM_HYPERCALL  0x500
 #define DIAG_KVM_BREAKPOINT 0x501
 
@@ -1561,6 +1562,16 @@ static void handle_diag_318(S390CPU *cpu, struct kvm_run 
*run)
 }
 }
 
+static void kvm_handle_diag_320(S390CPU *cpu, struct kvm_run *run)
+{
+uint64_t r1, r3;
+
+r1 = (run->s390_sieic.ipa & 0x00f0) >> 4;
+r3 = run->s390_sieic.ipa & 0x000f;
+
+handle_diag_320(&cpu->env, r1, r3, RA_IGNORED);
+}
+
 #define DIAG_KVM_CODE_MASK 0x
 
 static int handle_diag(S390CPU *cpu, struct kvm_run *run, uint32_t ipb)
@@ -1591,6 +1602,9 @@ static int handle_diag(S390CPU *cpu, struct kvm_run *run, 
uint32_t ipb)
 case DIAG_KVM_BREAKPOINT:
 r = handle_sw_breakpoint(cpu, run);
 break;
+case DIAG_CERT_STORE:
+kvm_handle_diag_320(cpu, run);
+break;
 default:
 trace_kvm_insn_diag(func_code);
 kvm_s390_program_interrupt(cpu, PGM_SPECIFICATION);
diff --git a/target/s390x/s390x-internal.h b/target/s390x/s390x-internal.h
index a4ba6227ab..86a652f833 100644
--- a/target/s390x/s390x-internal.h
+++ b/target/s390x/s390x-internal.h
@@ -400,6 +400,8 @@ int mmu_translate_real(CPUS390XState *env, target_ulong 
raddr, int rw,
 int handle_diag_288(CPUS390XState *env, uint64_t r1, uint64_t r3);
 void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3,
  uintptr_t ra);
+void handle_diag_320(CPUS390XState *env, uint64_t r1, uint64_t r3,
+ uintptr_t ra);
 
 
 /* translate.c */
-- 
2.49.0




  1   2   >