date:20230921

Re: [PATCH v6 3/3] hw/nvme: add nvme management interface model

2023-09-21 Thread Andrew Jeffery

On Thu, 2023-09-14 at 11:53 +0200, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Add the 'nmi-i2c' device that emulates an NVMe Management Interface
> controller.
> 
> Initial support is very basic (Read NMI DS, Configuration Get).
> 
> This is based on previously posted code by Padmakar Kalghatgi, Arun
> Kumar Agasar and Saurav Kumar.
> 
> Reviewed-by: Jonathan Cameron 
> Signed-off-by: Klaus Jensen 
> ---
>  hw/nvme/Kconfig  |   4 +
>  hw/nvme/meson.build  |   1 +
>  hw/nvme/nmi-i2c.c| 407 
> +++
>  hw/nvme/trace-events |   6 +
>  4 files changed, 418 insertions(+)
> 
> diff --git a/hw/nvme/Kconfig b/hw/nvme/Kconfig
> index cfa2ab0f9d5a..e1f6360c0f4b 100644
> --- a/hw/nvme/Kconfig
> +++ b/hw/nvme/Kconfig
> @@ -2,3 +2,7 @@ config NVME_PCI
>  bool
>  default y if PCI_DEVICES || PCIE_DEVICES
>  depends on PCI
> +
> +config NVME_NMI_I2C
> +bool
> +default y if I2C_MCTP
> diff --git a/hw/nvme/meson.build b/hw/nvme/meson.build
> index 1a6a2ca2f307..7bc85f31c149 100644
> --- a/hw/nvme/meson.build
> +++ b/hw/nvme/meson.build
> @@ -1 +1,2 @@
>  system_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('ctrl.c', 'dif.c', 
> 'ns.c', 'subsys.c'))
> +system_ss.add(when: 'CONFIG_NVME_NMI_I2C', if_true: files('nmi-i2c.c'))
> diff --git a/hw/nvme/nmi-i2c.c b/hw/nvme/nmi-i2c.c
> new file mode 100644
> index ..bf4648db0457
> --- /dev/null
> +++ b/hw/nvme/nmi-i2c.c
> @@ -0,0 +1,407 @@
> +/*
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + *
> + * SPDX-FileCopyrightText: Copyright (c) 2023 Samsung Electronics Co., Ltd.
> + *
> + * SPDX-FileContributor: Padmakar Kalghatgi 
> + * SPDX-FileContributor: Arun Kumar Agasar 
> + * SPDX-FileContributor: Saurav Kumar 
> + * SPDX-FileContributor: Klaus Jensen 
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qemu/crc32c.h"
> +#include "hw/registerfields.h"
> +#include "hw/i2c/i2c.h"
> +#include "hw/i2c/mctp.h"
> +#include "net/mctp.h"
> +#include "trace.h"
> +
> +/* NVM Express Management Interface 1.2c, Section 3.1 */
> +#define NMI_MAX_MESSAGE_LENGTH 4224
> +
> +#define TYPE_NMI_I2C_DEVICE "nmi-i2c"
> +OBJECT_DECLARE_SIMPLE_TYPE(NMIDevice, NMI_I2C_DEVICE)
> +
> +typedef struct NMIDevice {
> +MCTPI2CEndpoint mctp;
> +
> +uint8_t buffer[NMI_MAX_MESSAGE_LENGTH];
> +uint8_t scratch[NMI_MAX_MESSAGE_LENGTH];
> +
> +size_t  len;
> +int64_t pos;
> +} NMIDevice;
> +
> +FIELD(NMI_MCTPD, MT, 0, 7)
> +FIELD(NMI_MCTPD, IC, 7, 1)
> +
> +#define NMI_MCTPD_MT_NMI 0x4
> +#define NMI_MCTPD_IC_ENABLED 0x1
> +
> +FIELD(NMI_NMP, ROR, 7, 1)
> +FIELD(NMI_NMP, NMIMT, 3, 4)
> +
> +#define NMI_NMP_NMIMT_NVME_MI 0x1
> +#define NMI_NMP_NMIMT_NVME_ADMIN 0x2
> +
> +typedef struct NMIMessage {
> +uint8_t mctpd;
> +uint8_t nmp;
> +uint8_t rsvd2[2];
> +uint8_t payload[]; /* includes the Message Integrity Check */
> +} NMIMessage;
> +
> +typedef struct NMIRequest {
> +   uint8_t opc;
> +   uint8_t rsvd1[3];
> +   uint32_t dw0;
> +   uint32_t dw1;
> +   uint32_t mic;
> +} NMIRequest;
> +
> +FIELD(NMI_CMD_READ_NMI_DS_DW0, DTYP, 24, 8)
> +
> +typedef enum NMIReadDSType {
> +NMI_CMD_READ_NMI_DS_SUBSYSTEM   = 0x0,
> +NMI_CMD_READ_NMI_DS_PORTS   = 0x1,
> +NMI_CMD_READ_NMI_DS_CTRL_LIST   = 0x2,
> +NMI_CMD_READ_NMI_DS_CTRL_INFO   = 0x3,
> +NMI_CMD_READ_NMI_DS_OPT_CMD_SUPPORT = 0x4,
> +NMI_CMD_READ_NMI_DS_MEB_CMD_SUPPORT = 0x5,
> +} NMIReadDSType;
> +
> +#define NMI_STATUS_INVALID_PARAMETER 0x4
> +
> +static void nmi_scratch_append(NMIDevice *nmi, const void *buf, size_t count)
> +{
> +assert(nmi->pos + count <= NMI_MAX_MESSAGE_LENGTH);
> +
> +memcpy(nmi->scratch + nmi->pos, buf, count);
> +nmi->pos += count;
> +}
> +
> +static void nmi_set_parameter_error(NMIDevice *nmi, uint8_t bit, uint16_t 
> byte)
> +{
> +/* NVM Express Management Interface 1.2c, Figure 30 */
> +struct resp {
> +uint8_t  status;
> +uint8_t  bit;
> +uint16_t byte;
> +};
> +
> +struct resp buf = {
> +.status = NMI_STATUS_INVALID_PARAMETER,
> +.bit = bit & 0x3,
> +.byte = byte,
> +};
> +
> +nmi_scratch_append(nmi, &buf, sizeof(buf));
> +}
> +
> +static void nmi_set_error(NMIDevice *nmi, uint8_t status)
> +{
> +const uint8_t buf[4] = {status,};
> +
> +nmi_scratch_append(nmi, buf, sizeof(buf));
> +}
> +
> +static void nmi_handle_mi_read_nmi_ds(NMIDevice *nmi, NMIRequest *request)
> +{
> +I2CSlave *i2c = I2C_SLAVE(nmi);
> +
> +uint32_t dw0 = le32_to_cpu(request->dw0);
> +uint8_t dtyp = FIELD_EX32(dw0, NMI_CMD_READ_NMI_DS_DW0, DTYP);
> +
> +trace_nmi_handle_mi_read_nmi_ds(dtyp);
> +
> +static const uint8_t nmi_ds_subsystem[36] = {
> +0x00,   /* success */
> +0x20, 0x00, /* response data length */
> +0x00,   /* reserved */
> +0x00,   /* number of ports */
> +0x01,   /* major version */
> +0x01,   /* minor version */

Re: [PATCH v2] linux-user: Undo incomplete mmap

2023-09-21 Thread Akihiko Odaki


On 2023/09/03 14:39, Akihiko Odaki wrote:

When the host page size is greater than the target page size and
MAP_FIXED or MAP_FIXED_NOREPLACE is requested, mmap will be done for
three parts: start, middle, and end. If a later part of mmap fail,
mmap done in the earlier parts must be reverted.

Fixes: 54936004fd ("mmap emulation")
Signed-off-by: Akihiko Odaki 
---
V1 -> V2: Rebased.

  linux-user/mmap.c | 65 +--
  1 file changed, 40 insertions(+), 25 deletions(-)

diff --git a/linux-user/mmap.c b/linux-user/mmap.c
index 9aab48d4a3..72521f8d27 100644
--- a/linux-user/mmap.c
+++ b/linux-user/mmap.c
@@ -224,13 +224,15 @@ int target_mprotect(abi_ulong start, abi_ulong len, int 
target_prot)
  
  /* map an incomplete host page */

  static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last,
-  int prot, int flags, int fd, off_t offset)
+  int prot, int flags, int fd, off_t offset, bool *mapped)
  {
  abi_ulong real_last;
  void *host_start;
  int prot_old, prot_new;
  int host_prot_old, host_prot_new;
  
+*mapped = false;

+
  if (!(flags & MAP_ANONYMOUS)
  && (flags & MAP_TYPE) == MAP_SHARED
  && (prot & PROT_WRITE)) {
@@ -271,6 +273,7 @@ static bool mmap_frag(abi_ulong real_start, abi_ulong 
start, abi_ulong last,
  return false;
  }
  prot_old = prot;
+*mapped = true;
  }
  prot_new = prot | prot_old;
  
@@ -448,7 +451,7 @@ abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)

  abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot,
   int flags, int fd, off_t offset)
  {
-abi_ulong ret, last, real_start, real_last, retaddr, host_len;
+abi_ulong ret, last, real_start, retaddr, host_len;
  abi_ulong passthrough_start = -1, passthrough_last = 0;
  int page_flags;
  off_t host_offset;
@@ -577,12 +580,16 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int 
target_prot,
  passthrough_start = start;
  passthrough_last = last;
  } else {
+abi_ulong middle_start = HOST_PAGE_ALIGN(start);
+abi_ulong middle_last = ((start + len) & qemu_host_page_mask) - 1;
+abi_ulong mapped_len = 0;
+bool mapped;
+
  if (start & ~TARGET_PAGE_MASK) {
  errno = EINVAL;
  goto fail;
  }
  last = start + len - 1;
-real_last = HOST_PAGE_ALIGN(last) - 1;
  
  /*

   * Test if requested memory area fits target address space
@@ -649,35 +656,26 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int 
target_prot,
  }
  
  /* handle the start of the mapping */

-if (start > real_start) {
-if (real_last == real_start + qemu_host_page_size - 1) {
+if (start < middle_start) {
+if (last < middle_start) {
  /* one single host page */
  if (!mmap_frag(real_start, start, last,
-   target_prot, flags, fd, offset)) {
+   target_prot, flags, fd, offset, &mapped)) {
  goto fail;
  }
  goto the_end1;
  }
-if (!mmap_frag(real_start, start,
-   real_start + qemu_host_page_size - 1,
-   target_prot, flags, fd, offset)) {
+if (!mmap_frag(real_start, start, middle_start - 1,
+   target_prot, flags, fd, offset, &mapped)) {
  goto fail;
  }
-real_start += qemu_host_page_size;
-}
-/* handle the end of the mapping */
-if (last < real_last) {
-abi_ulong real_page = real_last - qemu_host_page_size + 1;
-if (!mmap_frag(real_page, real_page, last,
-   target_prot, flags, fd,
-   offset + real_page - start)) {
-goto fail;
+if (mapped) {
+mapped_len = qemu_host_page_size;
  }
-real_last -= qemu_host_page_size;
  }
  
  /* map the middle (easier) */

-if (real_start < real_last) {
+if (middle_start < middle_last) {
  void *p, *want_p;
  off_t offset1;
  size_t len1;
@@ -685,10 +683,10 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int 
target_prot,
  if (flags & MAP_ANONYMOUS) {
  offset1 = 0;
  } else {
-offset1 = offset + real_start - start;
+offset1 = offset + middle_start - start;
  }
-len1 = real_last - real_start + 1;
-want_p = g2h_untagged(real_start);
+len1 = middle_last - middle_start + 1;
+want_p = g2h_untagged(middle_start);
  
  p = mmap(want_p, len1, targ

Re: [PATCH v6 2/3] hw/i2c: add mctp core

2023-09-21 Thread Andrew Jeffery

On Thu, 2023-09-14 at 11:53 +0200, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Add an abstract MCTP over I2C endpoint model. This implements MCTP
> control message handling as well as handling the actual I2C transport
> (packetization).
> 
> Devices are intended to derive from this and implement the class
> methods.
> 
> Parts of this implementation is inspired by code[1] previously posted by
> Jonathan Cameron.
> 
> Squashed a fix[2] from Matt Johnston.
> 
>   [1]: 
> https://lore.kernel.org/qemu-devel/20220520170128.4436-1-jonathan.came...@huawei.com/
>   [2]: 
> https://lore.kernel.org/qemu-devel/20221121080445.ga29...@codeconstruct.com.au/
> 
> Tested-by: Jonathan Cameron 
> Reviewed-by: Jonathan Cameron 
> Signed-off-by: Klaus Jensen 

Nice!

Reviewed-by: Andrew Jeffery

Re: [PATCH] vl: Free machine list

2023-09-21 Thread Akihiko Odaki


On 2023/07/22 15:26, Akihiko Odaki wrote:

Free machine list and make LeakSanitizer happy.

Signed-off-by: Akihiko Odaki 
---
  softmmu/vl.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/softmmu/vl.c b/softmmu/vl.c
index b0b96f67fa..802f728298 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -1484,7 +1484,8 @@ static gint machine_class_cmp(gconstpointer a, 
gconstpointer b)
  
  static void machine_help_func(const QDict *qdict)

  {
-GSList *machines, *el;
+g_autoptr(GSList) machines = NULL;
+GSList *el;
  const char *type = qdict_get_try_str(qdict, "type");
  
  machines = object_class_get_list(TYPE_MACHINE, false);


Hi Paolo,

Can you have a look at this patch?

Regards,
Akihiko Odaki

Re: [PATCH v6 1/3] hw/i2c: add smbus pec utility function

2023-09-21 Thread Andrew Jeffery

On Thu, 2023-09-14 at 11:53 +0200, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Add i2c_smbus_pec() to calculate the SMBus Packet Error Code for a
> message.
> 
> Reviewed-by: Jonathan Cameron 
> Signed-off-by: Klaus Jensen 

It at least looks a lot like the linux implementation :)

Reviewed-by: Andrew Jeffery 

> ---
>  hw/i2c/smbus_master.c | 26 ++
>  include/hw/i2c/smbus_master.h |  2 ++
>  2 files changed, 28 insertions(+)
> 
> diff --git a/hw/i2c/smbus_master.c b/hw/i2c/smbus_master.c
> index 6a53c34e70b7..01a8e4700222 100644
> --- a/hw/i2c/smbus_master.c
> +++ b/hw/i2c/smbus_master.c
> @@ -15,6 +15,32 @@
>  #include "hw/i2c/i2c.h"
>  #include "hw/i2c/smbus_master.h"
>  
> +static uint8_t crc8(uint16_t data)
> +{
> +int i;
> +
> +for (i = 0; i < 8; i++) {
> +if (data & 0x8000) {
> +data ^= 0x1070U << 3;
> +}
> +
> +data <<= 1;
> +}
> +
> +return (uint8_t)(data >> 8);
> +}
> +
> +uint8_t i2c_smbus_pec(uint8_t crc, uint8_t *buf, size_t len)
> +{
> +int i;
> +
> +for (i = 0; i < len; i++) {
> +crc = crc8((crc ^ buf[i]) << 8);
> +}
> +
> +return crc;
> +}
> +
>  /* Master device commands.  */
>  int smbus_quick_command(I2CBus *bus, uint8_t addr, int read)
>  {
> diff --git a/include/hw/i2c/smbus_master.h b/include/hw/i2c/smbus_master.h
> index bb13bc423c22..d90f81767d86 100644
> --- a/include/hw/i2c/smbus_master.h
> +++ b/include/hw/i2c/smbus_master.h
> @@ -27,6 +27,8 @@
>  
>  #include "hw/i2c/i2c.h"
>  
> +uint8_t i2c_smbus_pec(uint8_t crc, uint8_t *buf, size_t len);
> +
>  /* Master device commands.  */
>  int smbus_quick_command(I2CBus *bus, uint8_t addr, int read);
>  int smbus_receive_byte(I2CBus *bus, uint8_t addr);
>

Re: [PATCH v2 0/2] net: Update MemReentrancyGuard for NIC

2023-09-21 Thread Akihiko Odaki


On 2023/06/01 12:18, Akihiko Odaki wrote:

Recently MemReentrancyGuard was added to DeviceState to record that the
device is engaging in I/O. The network device backend needs to update it
when delivering a packet to a device.

This implementation follows what bottom half does, but it does not add
a tracepoint for the case that the network device backend started
delivering a packet to a device which is already engaging in I/O. This
is because such reentrancy frequently happens for
qemu_flush_queued_packets() and is insignificant.

This series consists of two patches. The first patch makes a bulk change to
add a new parameter to qemu_new_nic() and does not contain behavioral changes.
The second patch actually implements MemReentrancyGuard update.

V1 -> V2: Added the 'Fixes: CVE-2023-3019' tag

Akihiko Odaki (2):
   net: Provide MemReentrancyGuard * to qemu_new_nic()
   net: Update MemReentrancyGuard for NIC

  include/net/net.h |  2 ++
  hw/net/allwinner-sun8i-emac.c |  3 ++-
  hw/net/allwinner_emac.c   |  3 ++-
  hw/net/cadence_gem.c  |  3 ++-
  hw/net/dp8393x.c  |  3 ++-
  hw/net/e1000.c|  3 ++-
  hw/net/e1000e.c   |  2 +-
  hw/net/eepro100.c |  4 +++-
  hw/net/etraxfs_eth.c  |  3 ++-
  hw/net/fsl_etsec/etsec.c  |  3 ++-
  hw/net/ftgmac100.c|  3 ++-
  hw/net/i82596.c   |  2 +-
  hw/net/igb.c  |  2 +-
  hw/net/imx_fec.c  |  2 +-
  hw/net/lan9118.c  |  3 ++-
  hw/net/mcf_fec.c  |  3 ++-
  hw/net/mipsnet.c  |  3 ++-
  hw/net/msf2-emac.c|  3 ++-
  hw/net/mv88w8618_eth.c|  3 ++-
  hw/net/ne2000-isa.c   |  3 ++-
  hw/net/ne2000-pci.c   |  3 ++-
  hw/net/npcm7xx_emc.c  |  3 ++-
  hw/net/opencores_eth.c|  3 ++-
  hw/net/pcnet.c|  3 ++-
  hw/net/rocker/rocker_fp.c |  4 ++--
  hw/net/rtl8139.c  |  3 ++-
  hw/net/smc91c111.c|  3 ++-
  hw/net/spapr_llan.c   |  3 ++-
  hw/net/stellaris_enet.c   |  3 ++-
  hw/net/sungem.c   |  2 +-
  hw/net/sunhme.c   |  3 ++-
  hw/net/tulip.c|  3 ++-
  hw/net/virtio-net.c   |  6 --
  hw/net/vmxnet3.c  |  2 +-
  hw/net/xen_nic.c  |  4 ++--
  hw/net/xgmac.c|  3 ++-
  hw/net/xilinx_axienet.c   |  3 ++-
  hw/net/xilinx_ethlite.c   |  3 ++-
  hw/usb/dev-network.c  |  3 ++-
  net/net.c | 15 +++
  40 files changed, 90 insertions(+), 41 deletions(-)



Hi Jason,

Can you review this series?

Regards,
Akihiko Odaki

[PATCH v3] hw/i386/pc: improve physical address space bound check for 32-bit systems

2023-09-21 Thread Ani Sinha

32-bit systems do not have a reserved memory for hole64 and hotplugging memory
devices are not supported on those systems. Therefore, the maximum limit of the
guest physical address in the absence of additional memory devices effectively
coincides with the end of "above 4G memory space" region. When users configure
additional memory devices, we need to properly account for the additional device
memory region so as to find the maximum value of the guest physical address
and enforce that it is within the physical address space of the processor. For
32-bit, this maximum PA will be outside the range of the processor's address
space.

With this change, for example, previously this was allowed:

$ ./qemu-system-x86_64 -cpu pentium -m size=10G

Now it is no longer allowed:

$ ./qemu-system-x86_64 -cpu pentium -m size=10G
qemu-system-x86_64: Address space limit 0x < 0x2bfff phys-bits too 
low (32)

For 32-bit, hotplugging additional memory is no longer allowed.

$ ./qemu-system-i386 -m size=1G,maxmem=3G,slots=2
qemu-system-i386: Address space limit 0x < 0x1 phys-bits too 
low (32)

The above is still allowed for older machine types in order to support
compatibility. Therefore, this still works:

$ ./qemu-system-i386 -machine pc-i440fx-8.1 -m size=1G,maxmem=3G,slots=2

After calling CPUID with EAX=0x8001, all AMD64 compliant processors
have the longmode-capable-bit turned on in the extended feature flags (bit 29)
in EDX. The absence of CPUID longmode can be used to differentiate between
32-bit and 64-bit processors and is the recommended approach. QEMU takes this
approach elsewhere (for example, please see x86_cpu_realizefn()) and with
this change, pc_max_used_gpa() also takes the same approach to detect 32-bit
processors.

Unit tests are modified to not run those tests that use memory hotplug
on 32-bit x86 architecture.

Finally, a new compatibility flag is introduced to retain the old behavior
for pc_max_used_gpa() for machines 8.1 and older.

Suggested-by: David Hildenbrand 
Signed-off-by: Ani Sinha 
---
 hw/i386/pc.c   | 34 +++---
 hw/i386/pc_piix.c  |  4 
 include/hw/i386/pc.h   |  3 +++
 tests/qtest/bios-tables-test.c | 26 ++
 tests/qtest/numa-test.c|  7 ++-
 5 files changed, 62 insertions(+), 12 deletions(-)

changelog:
v3: still accounting for additional memory device region above 4G.
unit tests fixed (not running for 32-bit where mem hotplug is used).

v2: removed memory hotplug region from max_gpa. added compat knobs.

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 54838c0c41..0aa2f6b6c0 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -907,12 +907,39 @@ static uint64_t pc_get_cxl_range_end(PCMachineState *pcms)
 static hwaddr pc_max_used_gpa(PCMachineState *pcms, uint64_t pci_hole64_size)
 {
 X86CPU *cpu = X86_CPU(first_cpu);
+PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
+MachineState *ms = MACHINE(pcms);
+uint64_t devmem_start = 0;
+ram_addr_t devmem_size = 0;
 
-/* 32-bit systems don't have hole64 thus return max CPU address */
-if (cpu->phys_bits <= 32) {
-return ((hwaddr)1 << cpu->phys_bits) - 1;
+/*
+ * 32-bit systems don't have hole64 but they might have a region for
+ * memory devices. Even if additional hotplugged memory devices might
+ * not be usable by most guest OSes, we need to still consider them for
+ * calculating the highest possible GPA so that we can properly report
+ * if someone configures them on a CPU that cannot possibly address them.
+ */
+if (!(cpu->env.features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM)) {
+/* 32-bit systems */
+if (pcmc->fixed_32bit_mem_addr_check) {
+if (pcmc->has_reserved_memory &&
+(ms->ram_size < ms->maxram_size)) {
+pc_get_device_memory_range(pcms, &devmem_start,
+   &devmem_size);
+if (!pcmc->broken_reserved_end) {
+devmem_start += devmem_size;
+}
+return devmem_start - 1;
+} else {
+return pc_above_4g_end(pcms) - 1;
+}
+} else {
+/* old value for compatibility reasons */
+return ((hwaddr)1 << cpu->phys_bits) - 1;
+}
 }
 
+/* 64-bit systems */
 return pc_pci_hole64_start() + pci_hole64_size - 1;
 }
 
@@ -1867,6 +1894,7 @@ static void pc_machine_class_init(ObjectClass *oc, void 
*data)
 pcmc->pvh_enabled = true;
 pcmc->kvmclock_create_always = true;
 pcmc->resizable_acpi_blob = true;
+pcmc->fixed_32bit_mem_addr_check = true;
 assert(!mc->get_hotplug_handler);
 mc->get_hotplug_handler = pc_get_hotplug_handler;
 mc->hotplug_allowed = pc_hotplug_allowed;
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 8321f36f97..f100a5de8b 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386

Re: [PATCH] accel/kvm/kvm-all: Handle register access errors

2023-09-21 Thread Akihiko Odaki


On 2023/06/19 21:19, Peter Maydell wrote:

On Sat, 10 Jun 2023 at 04:51, Akihiko Odaki  wrote:


On 2022/12/01 20:00, Akihiko Odaki wrote:

On 2022/12/01 19:40, Peter Maydell wrote:

On Thu, 1 Dec 2022 at 10:27, Akihiko Odaki 
wrote:


A register access error typically means something seriously wrong
happened so that anything bad can happen after that and recovery is
impossible.
Even failing one register access is catastorophic as
architecture-specific code are not written so that it torelates such
failures.

Make sure the VM stop and nothing worse happens if such an error occurs.

Signed-off-by: Akihiko Odaki 


In a similar vein there was also
https://lore.kernel.org/all/20220617144857.34189-1-pet...@redhat.com/
back in June, which on the one hand was less comprehensive but on
the other does the plumbing to pass the error upwards rather than
reporting it immediately at point of failure.

I'm in principle in favour but suspect we'll run into some corner
cases where we were happily ignoring not-very-important failures
(eg if you're running Linux as the host OS on a Mac M1 and your
host kernel doesn't have this fix:
https://lore.kernel.org/all/ynhz6cw5onr2e...@google.com/T/
then QEMU will go from "works by sheer luck" to "consistently
hits this error check"). So we should aim to land this extra
error checking early in the release cycle so we have plenty of
time to deal with any bug reports we get about it.



Actually I found this problem when I tried to run QEMU with KVM on M2
MacBook Air and encountered a failure described and fixed at:
https://lore.kernel.org/all/20221201104914.28944-2-akihiko.od...@daynix.com/

Although the affected register was not really important, QEMU couldn't
run the guest well enough because kvm_arch_put_registers for ARM64 is
written in a way that it fails early. I guess the situation is not so
different for other architectures as well.

I still agree that this should be postponed until a new release cycle
starts as register saving/restoring is too important to fail.



Hi,

QEMU 8.0 is already released so I think it's time to revisit this.


Two months ago would have been a better time :-) We're heading up
towards softfreeze for 8.1 in about three weeks from now.

thanks
-- PMM


Hi Peter,

Please apply this.

Regards,
Akihiko Odaki

Re: [PATCH] docs/devel/reset.rst: Correct function names

2023-09-21 Thread Akihiko Odaki


On 2022/11/25 23:06, Akihiko Odaki wrote:

resettable_class_set_parent_phases() was mistakenly called
resettable_class_set_parent_reset_phases() in some places.

Signed-off-by: Akihiko Odaki 
---
  docs/devel/reset.rst | 17 -
  1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/docs/devel/reset.rst b/docs/devel/reset.rst
index 7cc6a6b314..38ed1790f7 100644
--- a/docs/devel/reset.rst
+++ b/docs/devel/reset.rst
@@ -184,21 +184,20 @@ in reset.
  {
  MyDevClass *myclass = MYDEV_CLASS(class);
  ResettableClass *rc = RESETTABLE_CLASS(class);
-resettable_class_set_parent_reset_phases(rc,
- mydev_reset_enter,
- mydev_reset_hold,
- mydev_reset_exit,
- &myclass->parent_phases);
+resettable_class_set_parent_phases(rc,
+   mydev_reset_enter,
+   mydev_reset_hold,
+   mydev_reset_exit,
+   &myclass->parent_phases);
  }
  
  In the above example, we override all three phases. It is possible to override

  only some of them by passing NULL instead of a function pointer to
-``resettable_class_set_parent_reset_phases()``. For example, the following will
+``resettable_class_set_parent_phases()``. For example, the following will
  only override the *enter* phase and leave *hold* and *exit* untouched::
  
-resettable_class_set_parent_reset_phases(rc, mydev_reset_enter,

- NULL, NULL,
- &myclass->parent_phases);
+resettable_class_set_parent_phases(rc, mydev_reset_enter, NULL, NULL,
+   &myclass->parent_phases);
  
  This is equivalent to providing a trivial implementation of the hold and exit

  phases which does nothing but call the parent class's implementation of the


Hi,

This patch seems to have been forgotten. Can anyone pull this?

Regards,
Akihiko Odaki

Re: [PATCH] cxl/vendor: update niagara to only build on linux, add KConfig options

2023-09-21 Thread Philippe Mathieu-Daudé


Hi Gregory,

On 20/9/23 17:50, Gregory Price wrote:

Niagara uses  which presently limits its compatibility to
linux hosts.  Change build to only build it on linux.

Add Kconfig file for skhynix directory, and make niagara depend on
CXL_MEM_DEVICE.  Add an explicit flag for niagara.

Signed-off-by: Gregory Price 
---
  hw/cxl/Kconfig| 2 ++
  hw/cxl/vendor/Kconfig | 1 +
  hw/cxl/vendor/skhynix/Kconfig | 4 
  hw/cxl/vendor/skhynix/meson.build | 4 +++-
  4 files changed, 10 insertions(+), 1 deletion(-)
  create mode 100644 hw/cxl/vendor/Kconfig
  create mode 100644 hw/cxl/vendor/skhynix/Kconfig




diff --git a/hw/cxl/vendor/skhynix/Kconfig b/hw/cxl/vendor/skhynix/Kconfig
new file mode 100644
index 00..382fa0cd6c
--- /dev/null
+++ b/hw/cxl/vendor/skhynix/Kconfig
@@ -0,0 +1,4 @@
+config CXL_SKHYNIX_NIAGARA
+bool
+depends on CXL_MEM_DEVICE


You want:

   depends on CXL_MEM_DEVICE && LINUX


+default y if CXL_VENDOR
diff --git a/hw/cxl/vendor/skhynix/meson.build 
b/hw/cxl/vendor/skhynix/meson.build
index 4e57db65f1..6f194aa517 100644
--- a/hw/cxl/vendor/skhynix/meson.build
+++ b/hw/cxl/vendor/skhynix/meson.build
@@ -1 +1,3 @@
-system_ss.add(when: 'CONFIG_CXL_VENDOR', if_true: files('skhynix_niagara.c',))
+if targetos == 'linux'


(Then this check is not necessary).


+system_ss.add(when: 'CONFIG_CXL_SKHYNIX_NIAGARA', if_true: 
files('skhynix_niagara.c',))
+endif

Re: [PATCH] docs/devel/reset.rst: Correct function names

2023-09-21 Thread Michael Tokarev


21.09.2023 10:33, Akihiko Odaki:
..

This patch seems to have been forgotten. Can anyone pull this?


Applied to my trivial-patches tree now.  Thank you!

/mjt

Re: [PATCH] plugins/hotblocks: Fix potential deadlock in plugin_exit() function

2023-09-21 Thread Philippe Mathieu-Daudé


Hi Cong,

On 21/9/23 08:12, Cong Liu wrote:

This patch fixes a potential deadlock in the plugin_exit() function of QEMU.
The original code does not release the lock mutex if it is NULL. This patch
adds a check for it being NULL and releases the mutex in that case.


You are correct.


Signed-off-by: Cong Liu 
---
  contrib/plugins/hotblocks.c | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/contrib/plugins/hotblocks.c b/contrib/plugins/hotblocks.c
index 6b74d25fead6..1f713f1904f3 100644
--- a/contrib/plugins/hotblocks.c
+++ b/contrib/plugins/hotblocks.c
@@ -70,6 +70,8 @@ static void plugin_exit(qemu_plugin_id_t id, void *p)
  
  g_list_free(it);

  g_mutex_unlock(&lock);
+} else {
+g_mutex_unlock(&lock);
  }


The code stays simpler if you simply move the unlock call out
of the if() statement, here.

  
  qemu_plugin_outs(report->str);


Regards,

Phil.

Re: Concerns regarding e17bebd049 ("dump: Set correct vaddr for ELF dump")

2023-09-21 Thread Laszlo Ersek

On 9/20/23 19:35, Stephen Brennan wrote:
> Hi Jon,
> 
> Jon Doron  writes:
>> Hi Stephen,
>> Like you have said the reason is as I wrote in the commit message, 
>> without "fixing" the vaddr GDB is messing up mapping and working with 
>> the generated core file.
> 
> For the record I totally love this workaround :)
> 
> It's clever and gets the job done and I would have done it in a
> heartbeat. It's just that it does end up making vmcores that have
> incorrect data, which is a pain for debuggers that are actually designed
> to look at kernel core dumps.
> 
>> This patch is almost 4 years old, perhaps some changes to GDB has been 
>> introduced to resolve this, I have not checked since then.
> 
> Program Headers:
>   Type   Offset VirtAddr   PhysAddr
>  FileSizMemSiz  Flags  Align
>   NOTE   0x0168 0x 0x
>  0x1980 0x1980 0x0
>   LOAD   0x1ae8 0x 0x
>  0x8000 0x8000 0x0
>   LOAD   0x80001ae8 0x 0xfffc
>  0x0004 0x0004 0x0
> 
> (gdb) info files
> Local core dump file:
> `/home/stepbren/repos/test_code/elf/dumpfile', file type elf64-x86-64.
> 0x - 0x8000 is load1
> 0x - 0x0004 is load2
> 
> $ gdb --version 
> GNU gdb (GDB) Red Hat Enterprise Linux 10.2-10.0.2.el9
> Copyright (C) 2021 Free Software Foundation, Inc.
> License GPLv3+: GNU GPL version 3 or later 
> This is free software: you are free to change and redistribute it.
> There is NO WARRANTY, to the extent permitted by law.
> 
> 
> It doesn't *look like* anything has changed in this version of GDB. But
> I'm not really certain that GDB is expected to use the physical
> addresses in the load segments: it's not a kernel debugger.

The paging=off vmcores dumped by QEMU are primarily meant for the
"crash" utility , not gdb.
Crash builds upon gdb (it downloads a gdb tarball at build time, IIRC),
but either way, the vmcores are meant to be consumed by "crash", and
crash *is* a kernel debugger (both live, and post-mortem).

So, from my perspective: "whatever works with 'crash'". If you revert
Jon's commit and the vmcores continue working with "crash", I won't object.

I commented similarly under Jon's v1 patch -- as long as paging=off
dumps continue working with "crash", I'm fine:

http://mid.mail-archive.com/7961a154-f139-af73-613d-94b88bf95392@redhat.com

For reference, these are the v1 through v3 patch threads, from 2019:

http://mid.mail-archive.com/20181225125344.4482-1-arilou@gmail.com
http://mid.mail-archive.com/20190108130219.18550-1-arilou@gmail.com
http://mid.mail-archive.com/20190109082203.27142-1-arilou@gmail.com

Laszlo


> 
> I think hacking the p_vaddr field _is_ the way to get GDB to behave in
> the way you want: allow you to read physical memory addresses.
> 
>> As I'm no longer using this feature and have not worked and tested it 
>> in a long while, so I have no obligations to this change, but perhaps
>> someone else might be using it...
> 
> I definitely think it's valuable for people to continue being able to
> use QEMU vmcores generated with paging=off in GDB, even if GDB isn't
> desgined for it. It seems like a useful hack that appeals to the lowest
> common denominator: most people have GDB and not a purpose-built kernel
> debugger. But maybe we could point to a program like the below that will
> tweak the p_paddr field after the fact, in order to appeal to GDB's
> sensibilities?
> 
> Thanks,
> Stephen
> 
> ---
> #include 
> #include 
> #include 
> #include 
> #include 
> 
> #include 
> 
> static void fail(const char *msg)
> {
>   fprintf(stderr, "%s\n", msg);
>   exit(EXIT_FAILURE);
> }
> 
> static void perror_fail(const char *pfx)
> {
>   perror(pfx);
>   exit(EXIT_FAILURE);
> }
> 
> static void usage(void)
> {
>   puts("usage: phys2virt COREFILE");
>   puts("Modifies the ELF COREFILE so that load segments have their 
> virtual");
>   puts("address value copied from the physical address field.");
>   exit(EXIT_SUCCESS);
> }
> 
> static int endian(void)
> {
>   union {
>   uint32_t ival;
>   char cval[4];
>   } data;
>   data.ival = 1;
>   if (data.cval[0])
>   return ELFDATA2LSB;
>   else
>   return ELFDATA2MSB;
> }
> 
> int main(int argc, char **argv)
> {
>   char *filename;
>   FILE *f;
>   Elf64_Ehdr hdr;
>   Elf64_Phdr *phdrs;
>   off_t phoff;
>   int phnum, phentsize;
>   bool bswap;
> 
>   if (argc != 2 || strcmp(argv[1], "-h") == 0)
>   usage();
> 
>   filename = argv[

intermittent qtest-aarch64/xlnx-canfd-test test failure

2023-09-21 Thread Michael Tokarev


Hi!

While doing stable-8.1.1 preparation, I've a CI failure of 
ubuntu-20.04-s390x-all here:

https://gitlab.com/qemu-project/qemu/-/jobs/5132720046

▶ ERROR:../tests/qtest/xlnx-canfd-test.c:265:match_rx_tx_data: \
  assertion failed ((buf_rx[size] & DLC_FD_BIT_MASK) == (buf_tx[size] & 
DLC_FD_BIT_MASK)): (2281701376 == 2147483648)

 qemu:qtest+qtest-aarch64 / qtest-aarch64/xlnx-canfd-test  \
   ERROR   0.77s   killed by signal 6 SIGABRT

Re-run of this test went successfully.  However, the assert looks
a bit scary, and the fact that it does not fail in a reliable way
is a bit troubling too.  Is it a flaky test or there's something
else in qemu with concurrent threads?

I don't see this assert in previous test runs.  But ubuntu-20.04-s390x-all
test fails quite often due to other reasons, so it isn't conclusive.

Should something be done with this?

The testing is done for commit f2fc49c302036315db6e8c9f74592decc3be0476,
which is in staging-8.1 branch only temporarily.

Thanks,

/mjt

Re: [PATCH v3] hw/i386/pc: improve physical address space bound check for 32-bit systems

2023-09-21 Thread Ani Sinha




> On 21-Sep-2023, at 12:47 PM, Ani Sinha  wrote:
> 
> 32-bit systems do not have a reserved memory for hole64 and hotplugging memory
> devices are not supported on those systems.

Relevant thread for Linux kernel 
https://lore.kernel.org/all/20210929143600.49379-4-da...@redhat.com/


> Therefore, the maximum limit of the
> guest physical address in the absence of additional memory devices effectively
> coincides with the end of "above 4G memory space" region. When users configure
> additional memory devices, we need to properly account for the additional 
> device
> memory region so as to find the maximum value of the guest physical address
> and enforce that it is within the physical address space of the processor. For
> 32-bit, this maximum PA will be outside the range of the processor's address
> space.
> 
> With this change, for example, previously this was allowed:
> 
> $ ./qemu-system-x86_64 -cpu pentium -m size=10G
> 
> Now it is no longer allowed:
> 
> $ ./qemu-system-x86_64 -cpu pentium -m size=10G
> qemu-system-x86_64: Address space limit 0x < 0x2bfff phys-bits 
> too low (32)
> 
> For 32-bit, hotplugging additional memory is no longer allowed.
> 
> $ ./qemu-system-i386 -m size=1G,maxmem=3G,slots=2
> qemu-system-i386: Address space limit 0x < 0x1 phys-bits too 
> low (32)
> 
> The above is still allowed for older machine types in order to support
> compatibility. Therefore, this still works:
> 
> $ ./qemu-system-i386 -machine pc-i440fx-8.1 -m size=1G,maxmem=3G,slots=2
> 
> After calling CPUID with EAX=0x8001, all AMD64 compliant processors
> have the longmode-capable-bit turned on in the extended feature flags (bit 29)
> in EDX. The absence of CPUID longmode can be used to differentiate between
> 32-bit and 64-bit processors and is the recommended approach. QEMU takes this
> approach elsewhere (for example, please see x86_cpu_realizefn()) and with
> this change, pc_max_used_gpa() also takes the same approach to detect 32-bit
> processors.
> 
> Unit tests are modified to not run those tests that use memory hotplug
> on 32-bit x86 architecture.
> 
> Finally, a new compatibility flag is introduced to retain the old behavior
> for pc_max_used_gpa() for machines 8.1 and older.
> 
> Suggested-by: David Hildenbrand 
> Signed-off-by: Ani Sinha 
> ---
> hw/i386/pc.c   | 34 +++---
> hw/i386/pc_piix.c  |  4 
> include/hw/i386/pc.h   |  3 +++
> tests/qtest/bios-tables-test.c | 26 ++
> tests/qtest/numa-test.c|  7 ++-
> 5 files changed, 62 insertions(+), 12 deletions(-)
> 
> changelog:
> v3: still accounting for additional memory device region above 4G.
> unit tests fixed (not running for 32-bit where mem hotplug is used).
> 
> v2: removed memory hotplug region from max_gpa. added compat knobs.
> 
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index 54838c0c41..0aa2f6b6c0 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -907,12 +907,39 @@ static uint64_t pc_get_cxl_range_end(PCMachineState 
> *pcms)
> static hwaddr pc_max_used_gpa(PCMachineState *pcms, uint64_t pci_hole64_size)
> {
> X86CPU *cpu = X86_CPU(first_cpu);
> +PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
> +MachineState *ms = MACHINE(pcms);
> +uint64_t devmem_start = 0;
> +ram_addr_t devmem_size = 0;
> 
> -/* 32-bit systems don't have hole64 thus return max CPU address */
> -if (cpu->phys_bits <= 32) {
> -return ((hwaddr)1 << cpu->phys_bits) - 1;
> +/*
> + * 32-bit systems don't have hole64 but they might have a region for
> + * memory devices. Even if additional hotplugged memory devices might
> + * not be usable by most guest OSes, we need to still consider them for
> + * calculating the highest possible GPA so that we can properly report
> + * if someone configures them on a CPU that cannot possibly address them.
> + */
> +if (!(cpu->env.features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM)) {
> +/* 32-bit systems */
> +if (pcmc->fixed_32bit_mem_addr_check) {
> +if (pcmc->has_reserved_memory &&
> +(ms->ram_size < ms->maxram_size)) {
> +pc_get_device_memory_range(pcms, &devmem_start,
> +   &devmem_size);
> +if (!pcmc->broken_reserved_end) {
> +devmem_start += devmem_size;
> +}
> +return devmem_start - 1;
> +} else {
> +return pc_above_4g_end(pcms) - 1;
> +}
> +} else {
> +/* old value for compatibility reasons */
> +return ((hwaddr)1 << cpu->phys_bits) - 1;
> +}
> }
> 
> +/* 64-bit systems */
> return pc_pci_hole64_start() + pci_hole64_size - 1;
> }
> 
> @@ -1867,6 +1894,7 @@ static void pc_machine_class_init(ObjectClass *oc, void 
> *data)
> pcmc->pvh_enabled = true;
> pcmc->kvmcl

[PATCH v3 0/8] util: Introduce qemu_get_runtime_dir()

2023-09-21 Thread Akihiko Odaki

qemu_get_runtime_dir() returns a dynamically allocated directory path
that is appropriate for storing runtime files. It corresponds to "run"
directory in Unix.

With a tree-wide search, it was found that there are several cases
where such a functionality is implemented so let's have one as a common
utlity function.

A notable feature of qemu_get_runtime_dir() is that it uses
$XDG_RUNTIME_DIR if available. While the function is often called by
executables which requires root privileges, it is still possible that
they are called from a user without privilege to write the system
runtime directory. In fact, I decided to write this patch when I ran
virtiofsd in a Linux namespace created by a normal user and realized
it tries to write the system runtime directory, not writable in this
case. $XDG_RUNTIME_DIR should provide a writable directory in such
cases.

This function does not use qemu_get_local_state_dir() or its logic
for Windows. Actually the implementation of qemu_get_local_state_dir()
for Windows seems not right as it calls g_get_system_data_dirs(),
which refers to $XDG_DATA_DIRS. In Unix terminology, it is basically
"/usr/share", not "/var", which qemu_get_local_state_dir() is intended
to provide. Instead, this function try to use the following in order:
- $XDG_RUNTIME_DIR
- LocalAppData folder
- get_relocated_path(CONFIG_QEMU_LOCALSTATEDIR "/run")

This function does not use g_get_user_runtime_dir() either as it
falls back to g_get_user_cache_dir() when $XDG_DATA_DIRS is not
available. In the case, we rather use:
get_relocated_path(CONFIG_QEMU_LOCALSTATEDIR "/run")

V2 -> V3:
  Rebase to the current master.
  Dropped patch "qga: Remove platform GUID definitions" since it is
  irrelevant.

V1 -> V2:
  Rebased to the current master since Patchew complains.

Akihiko Odaki (8):
  util: Introduce qemu_get_runtime_dir()
  ivshmem-server: Use qemu_get_runtime_dir()
  contrib/rdmacm-mux: Use qemu_get_runtime_dir()
  qga: Use qemu_get_runtime_dir()
  scsi: Use qemu_get_runtime_dir()
  module: Use qemu_get_runtime_dir()
  util: Remove qemu_get_local_state_dir()
  spice-app: Use qemu_get_runtime_dir()

 include/qemu/osdep.h   | 10 +++---
 contrib/ivshmem-server/main.c  | 20 
 contrib/rdmacm-mux/main.c  | 22 ++
 qga/main.c |  9 -
 scsi/qemu-pr-helper.c  |  6 +++---
 ui/spice-app.c |  4 ++--
 util/module.c  |  3 ++-
 util/oslib-posix.c |  9 +++--
 util/oslib-win32.c | 24 
 contrib/rdmacm-mux/meson.build |  2 +-
 10 files changed, 76 insertions(+), 33 deletions(-)

-- 
2.41.0

[PATCH v3 2/8] ivshmem-server: Use qemu_get_runtime_dir()

2023-09-21 Thread Akihiko Odaki

qemu_get_runtime_dir() is used to construct the default PID file path.

Signed-off-by: Akihiko Odaki 
---
 contrib/ivshmem-server/main.c | 20 
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/contrib/ivshmem-server/main.c b/contrib/ivshmem-server/main.c
index 5901f17707..313124dd45 100644
--- a/contrib/ivshmem-server/main.c
+++ b/contrib/ivshmem-server/main.c
@@ -14,7 +14,6 @@
 
 #define IVSHMEM_SERVER_DEFAULT_VERBOSE0
 #define IVSHMEM_SERVER_DEFAULT_FOREGROUND 0
-#define IVSHMEM_SERVER_DEFAULT_PID_FILE   "/var/run/ivshmem-server.pid"
 #define IVSHMEM_SERVER_DEFAULT_UNIX_SOCK_PATH "/tmp/ivshmem_socket"
 #define IVSHMEM_SERVER_DEFAULT_SHM_PATH   "ivshmem"
 #define IVSHMEM_SERVER_DEFAULT_SHM_SIZE   (4 * 1024 * 1024)
@@ -35,15 +34,23 @@ typedef struct IvshmemServerArgs {
 unsigned n_vectors;
 } IvshmemServerArgs;
 
+static char *ivshmem_server_get_default_pid_file(void)
+{
+g_autofree char *run = qemu_get_runtime_dir();
+return g_build_filename(run, "ivshmem-server.pid", NULL);
+}
+
 static void
 ivshmem_server_usage(const char *progname)
 {
+g_autofree char *pid_file = ivshmem_server_get_default_pid_file();
+
 printf("Usage: %s [OPTION]...\n"
"  -h: show this help\n"
"  -v: verbose mode\n"
"  -F: foreground mode (default is to daemonize)\n"
"  -p : path to the PID file (used in daemon mode only)\n"
-   " default " IVSHMEM_SERVER_DEFAULT_PID_FILE "\n"
+   " default %s\n"
"  -S : path to the unix socket to listen to\n"
" default " IVSHMEM_SERVER_DEFAULT_UNIX_SOCK_PATH "\n"
"  -M : POSIX shared memory object to use\n"
@@ -54,7 +61,7 @@ ivshmem_server_usage(const char *progname)
" default %u\n"
"  -n : number of vectors\n"
" default %u\n",
-   progname, IVSHMEM_SERVER_DEFAULT_SHM_SIZE,
+   progname, pid_file, IVSHMEM_SERVER_DEFAULT_SHM_SIZE,
IVSHMEM_SERVER_DEFAULT_N_VECTORS);
 }
 
@@ -189,10 +196,10 @@ main(int argc, char *argv[])
 {
 IvshmemServer server;
 struct sigaction sa, sa_quit;
+g_autofree char *default_pid_file = NULL;
 IvshmemServerArgs args = {
 .verbose = IVSHMEM_SERVER_DEFAULT_VERBOSE,
 .foreground = IVSHMEM_SERVER_DEFAULT_FOREGROUND,
-.pid_file = IVSHMEM_SERVER_DEFAULT_PID_FILE,
 .unix_socket_path = IVSHMEM_SERVER_DEFAULT_UNIX_SOCK_PATH,
 .shm_path = IVSHMEM_SERVER_DEFAULT_SHM_PATH,
 .use_shm_open = true,
@@ -207,6 +214,11 @@ main(int argc, char *argv[])
  */
 printf("*** Example code, do not use in production ***\n");
 
+qemu_init_exec_dir(argv[0]);
+
+default_pid_file = ivshmem_server_get_default_pid_file();
+args.pid_file = default_pid_file;
+
 /* parse arguments, will exit on error */
 ivshmem_server_parse_args(&args, argc, argv);
 
-- 
2.41.0

[PATCH v3 6/8] module: Use qemu_get_runtime_dir()

2023-09-21 Thread Akihiko Odaki

qemu_get_runtime_dir() is used to construct the path to module upgrades.

Signed-off-by: Akihiko Odaki 
---
 util/module.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/util/module.c b/util/module.c
index 32e263163c..580658edf4 100644
--- a/util/module.c
+++ b/util/module.c
@@ -242,7 +242,8 @@ int module_load(const char *prefix, const char *name, Error 
**errp)
 version_dir = g_strcanon(g_strdup(QEMU_PKGVERSION),
  G_CSET_A_2_Z G_CSET_a_2_z G_CSET_DIGITS "+-.~",
  '_');
-dirs[n_dirs++] = g_strdup_printf("/var/run/qemu/%s", version_dir);
+g_autofree char *run = qemu_get_runtime_dir();
+dirs[n_dirs++] = g_build_filename(run, "qemu", version_dir, NULL);
 #endif
 assert(n_dirs <= ARRAY_SIZE(dirs));
 
-- 
2.41.0

[PATCH v3 8/8] spice-app: Use qemu_get_runtime_dir()

2023-09-21 Thread Akihiko Odaki

qemu_get_runtime_dir() provides QEMU-specific fallback of runtime
directory.

Signed-off-by: Akihiko Odaki 
---
 ui/spice-app.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ui/spice-app.c b/ui/spice-app.c
index 405fb7f9f5..f6c2343213 100644
--- a/ui/spice-app.c
+++ b/ui/spice-app.c
@@ -151,8 +151,8 @@ static void spice_app_display_early_init(DisplayOptions 
*opts)
 atexit(spice_app_atexit);
 
 if (qemu_name) {
-app_dir = g_build_filename(g_get_user_runtime_dir(),
-   "qemu", qemu_name, NULL);
+g_autofree char *run = qemu_get_runtime_dir();
+app_dir = g_build_filename(run, "qemu", qemu_name, NULL);
 if (g_mkdir_with_parents(app_dir, S_IRWXU) < -1) {
 error_report("Failed to create directory %s: %s",
  app_dir, strerror(errno));
-- 
2.41.0

[PULL v2 00/22] implement discard operation for Parallels images

2023-09-21 Thread Denis V. Lunev

The following changes since commit 4907644841e3200aea6475c0f72d3d987e9f3d93:

  Merge tag 'mem-2023-09-19' of https://github.com/davidhildenbrand/qemu into 
staging (2023-09-19 13:22:19 -0400)

are available in the Git repository at:

  https://src.openvz.org/scm/~den/qemu.git tags/pull-parallels-2023-09-20-v2

for you to fetch changes up to 1dba99e34d1bcb3c0de69c4270f9587b01e225fe:

  tests: extend test 131 to cover availability of the write-zeroes (2023-09-21 
08:49:28 +0200)


Parallels format driver:
* regular calculation of cluster used bitmap of the image file
* cluster allocation on the base of that bitmap (effectively allocation of
  new clusters could be done inside the image if that offset space is unused)
* support of DISCARD and WRITE_ZEROES operations
* image check bugfixes
* unit tests fixes
* unit tests covering new functionality


Changes from v1:
* patch 12: bdrv_co_getlength -> bdrv_getlength in parallels_fill_used_bitmap
  as called from open()
* patch 19: added GRAPH_RDLOCK specifier for parallels_co_pdiscard
* patch 21: added GRAPH_RDLOCK specifier for parallels_co_pwrite_zeroes


Denis V. Lunev (22):
  parallels: fix formatting in bdrv_parallels initialization
  parallels: mark driver as supporting CBT
  parallels: fix memory leak in parallels_open()
  parallels: invent parallels_opts_prealloc() helper to parse prealloc opts
  parallels: return earler in fail_format branch in parallels_open()
  parallels: return earlier from parallels_open() function on error
  parallels: refactor path when we need to re-check image in parallels_open
  parallels: create mark_used() helper which sets bit in used bitmap
  tests: ensure that image validation will not cure the corruption
  parallels: fix broken parallels_check_data_off()
  parallels: add test which will validate data_off fixes through repair
  parallels: collect bitmap of used clusters at open
  tests: fix broken deduplication check in parallels format test
  tests: test self-cure of parallels image with duplicated clusters
  parallels: accept multiple clusters in mark_used()
  parallels: update used bitmap in allocate_cluster
  parallels: naive implementation of allocate_clusters with used bitmap
  parallels: improve readability of allocate_clusters
  parallels: naive implementation of parallels_co_pdiscard
  tests: extend test 131 to cover availability of the discard operation
  parallels: naive implementation of parallels_co_pwrite_zeroes
  tests: extend test 131 to cover availability of the write-zeroes

 block/parallels.c | 389 --
 block/parallels.h |   3 +
 tests/qemu-iotests/131|  52 
 tests/qemu-iotests/131.out|  60 
 tests/qemu-iotests/tests/parallels-checks |  76 -
 tests/qemu-iotests/tests/parallels-checks.out |  65 -
 6 files changed, 544 insertions(+), 101 deletions(-)

-- 
2.34.1

[PULL v2 07/22] parallels: refactor path when we need to re-check image in parallels_open

2023-09-21 Thread Denis V. Lunev

More conditions follows thus the check should be more scalable.

Signed-off-by: Denis V. Lunev 
Reviewed-by: Alexander Ivanov 
---
 block/parallels.c | 21 ++---
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/block/parallels.c b/block/parallels.c
index bd26c8db63..af3b4894d7 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -1071,7 +1071,7 @@ static int parallels_open(BlockDriverState *bs, QDict 
*options, int flags,
 int ret, size, i;
 int64_t file_nb_sectors, sector;
 uint32_t data_start;
-bool data_off_is_correct;
+bool need_check = false;
 
 ret = parallels_opts_prealloc(bs, options, errp);
 if (ret < 0) {
@@ -1139,11 +1139,14 @@ static int parallels_open(BlockDriverState *bs, QDict 
*options, int flags,
 s->bat_bitmap = (uint32_t *)(s->header + 1);
 
 if (le32_to_cpu(ph.inuse) == HEADER_INUSE_MAGIC) {
-s->header_unclean = true;
+need_check = s->header_unclean = true;
+}
+
+{
+bool ok = parallels_test_data_off(s, file_nb_sectors, &data_start);
+need_check = need_check || !ok;
 }
 
-data_off_is_correct = parallels_test_data_off(s, file_nb_sectors,
-  &data_start);
 s->data_start = data_start;
 s->data_end = s->data_start;
 if (s->data_end < (s->header_size >> BDRV_SECTOR_BITS)) {
@@ -1200,6 +1203,7 @@ static int parallels_open(BlockDriverState *bs, QDict 
*options, int flags,
 s->data_end = sector + s->tracks;
 }
 }
+need_check = need_check || s->data_end > file_nb_sectors;
 
 /*
  * We don't repair the image here if it's opened for checks. Also we don't
@@ -1209,12 +1213,8 @@ static int parallels_open(BlockDriverState *bs, QDict 
*options, int flags,
 return 0;
 }
 
-/*
- * Repair the image if it's dirty or
- * out-of-image corruption was detected.
- */
-if (s->data_end > file_nb_sectors || s->header_unclean
-|| !data_off_is_correct) {
+/* Repair the image if corruption was detected. */
+if (need_check) {
 BdrvCheckResult res;
 ret = bdrv_check(bs, &res, BDRV_FIX_ERRORS | BDRV_FIX_LEAKS);
 if (ret < 0) {
@@ -1223,7 +1223,6 @@ static int parallels_open(BlockDriverState *bs, QDict 
*options, int flags,
 goto fail;
 }
 }
-
 return 0;
 
 fail_format:
-- 
2.34.1

[PATCH v3 1/8] util: Introduce qemu_get_runtime_dir()

2023-09-21 Thread Akihiko Odaki

qemu_get_runtime_dir() returns a dynamically allocated directory path
that is appropriate for storing runtime files. It corresponds to "run"
directory in Unix.

With a tree-wide search, it was found that there are several cases
where such a functionality is implemented so let's have one as a common
utlity function.

A notable feature of qemu_get_runtime_dir() is that it uses
$XDG_RUNTIME_DIR if available. While the function is often called by
executables which requires root privileges, it is still possible that
they are called from a user without privilege to write the system
runtime directory. In fact, I decided to write this patch when I ran
virtiofsd in a Linux namespace created by a normal user and realized
it tries to write the system runtime directory, not writable in this
case. $XDG_RUNTIME_DIR should provide a writable directory in such
cases.

This function does not use qemu_get_local_state_dir() or its logic
for Windows. Actually the implementation of qemu_get_local_state_dir()
for Windows seems not right as it calls g_get_system_data_dirs(),
which refers to $XDG_DATA_DIRS. In Unix terminology, it is basically
"/usr/share", not "/var", which qemu_get_local_state_dir() is intended
to provide. Instead, this function try to use the following in order:
- $XDG_RUNTIME_DIR
- LocalAppData folder
- get_relocated_path(CONFIG_QEMU_LOCALSTATEDIR "/run")

This function does not use g_get_user_runtime_dir() either as it
falls back to g_get_user_cache_dir() when $XDG_DATA_DIRS is not
available. In the case, we rather use:
get_relocated_path(CONFIG_QEMU_LOCALSTATEDIR "/run")

Signed-off-by: Akihiko Odaki 
---
 include/qemu/osdep.h | 12 
 util/oslib-posix.c   | 11 +++
 util/oslib-win32.c   | 26 ++
 3 files changed, 49 insertions(+)

diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index 2897720fac..bb857c910f 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -636,6 +636,18 @@ void qemu_set_cloexec(int fd);
  */
 char *qemu_get_local_state_dir(void);
 
+/**
+ * qemu_get_runtime_dir:
+ *
+ * Return a dynamically allocated directory path that is appropriate for 
storing
+ * runtime files. It corresponds to "run" directory in Unix, and uses
+ * $XDG_RUNTIME_DIR if available.
+ *
+ * The caller is responsible for releasing the value returned with g_free()
+ * after use.
+ */
+char *qemu_get_runtime_dir(void);
+
 /**
  * qemu_getauxval:
  * @type: the auxiliary vector key to lookup
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index e86fd64e09..0c82717be5 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -273,6 +273,17 @@ qemu_get_local_state_dir(void)
 return get_relocated_path(CONFIG_QEMU_LOCALSTATEDIR);
 }
 
+char *
+qemu_get_runtime_dir(void)
+{
+char *env = getenv("XDG_RUNTIME_DIR");
+if (env) {
+return g_strdup(env);
+}
+
+return get_relocated_path(CONFIG_QEMU_LOCALSTATEDIR "/run");
+}
+
 void qemu_set_tty_echo(int fd, bool echo)
 {
 struct termios tty;
diff --git a/util/oslib-win32.c b/util/oslib-win32.c
index 19a0ea7fbe..38df7b57b5 100644
--- a/util/oslib-win32.c
+++ b/util/oslib-win32.c
@@ -27,6 +27,8 @@
  */
 
 #include "qemu/osdep.h"
+#include 
+#include 
 #include 
 #include "qapi/error.h"
 #include "qemu/main-loop.h"
@@ -237,6 +239,30 @@ qemu_get_local_state_dir(void)
 return g_strdup(data_dirs[0]);
 }
 
+char *
+qemu_get_runtime_dir(void)
+{
+size_t size = GetEnvironmentVariableA("XDG_RUNTIME_DIR", NULL, 0);
+if (size) {
+char *env = g_malloc(size);
+GetEnvironmentVariableA("XDG_RUNTIME_DIR", env, size);
+return env;
+}
+
+PWSTR wpath;
+const wchar_t *cwpath;
+if (!SHGetKnownFolderPath(&FOLDERID_LocalAppData, KF_FLAG_DEFAULT, NULL, 
&wpath)) {
+cwpath = wpath;
+size = wcsrtombs(NULL, &cwpath, 0, &(mbstate_t){0}) + 1;
+char *path = g_malloc(size);
+wcsrtombs(path, &cwpath, size, &(mbstate_t){0});
+CoTaskMemFree(wpath);
+return path;
+}
+
+return get_relocated_path(CONFIG_QEMU_LOCALSTATEDIR "/run");
+}
+
 void qemu_set_tty_echo(int fd, bool echo)
 {
 HANDLE handle = (HANDLE)_get_osfhandle(fd);
-- 
2.41.0

[PULL v2 01/22] parallels: fix formatting in bdrv_parallels initialization

2023-09-21 Thread Denis V. Lunev

Old code is ugly and contains tabulations. There are no functional
changes in this patch.

Signed-off-by: Denis V. Lunev 
Reviewed-by: Alexander Ivanov 
---
 block/parallels.c | 36 +++-
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/block/parallels.c b/block/parallels.c
index 48c32d6821..2ebd8e1301 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -1249,23 +1249,25 @@ static void parallels_close(BlockDriverState *bs)
 }
 
 static BlockDriver bdrv_parallels = {
-.format_name   = "parallels",
-.instance_size = sizeof(BDRVParallelsState),
-.bdrv_probe= parallels_probe,
-.bdrv_open = parallels_open,
-.bdrv_close= parallels_close,
-.bdrv_child_perm  = bdrv_default_perms,
-.bdrv_co_block_status = parallels_co_block_status,
-.bdrv_has_zero_init   = bdrv_has_zero_init_1,
-.bdrv_co_flush_to_os  = parallels_co_flush_to_os,
-.bdrv_co_readv  = parallels_co_readv,
-.bdrv_co_writev = parallels_co_writev,
-.is_format  = true,
-.supports_backing = true,
-.bdrv_co_create  = parallels_co_create,
-.bdrv_co_create_opts = parallels_co_create_opts,
-.bdrv_co_check  = parallels_co_check,
-.create_opts= ¶llels_create_opts,
+.format_name= "parallels",
+.instance_size  = sizeof(BDRVParallelsState),
+.create_opts= ¶llels_create_opts,
+.is_format  = true,
+.supports_backing   = true,
+
+.bdrv_has_zero_init = bdrv_has_zero_init_1,
+
+.bdrv_probe = parallels_probe,
+.bdrv_open  = parallels_open,
+.bdrv_close = parallels_close,
+.bdrv_child_perm= bdrv_default_perms,
+.bdrv_co_block_status   = parallels_co_block_status,
+.bdrv_co_flush_to_os= parallels_co_flush_to_os,
+.bdrv_co_readv  = parallels_co_readv,
+.bdrv_co_writev = parallels_co_writev,
+.bdrv_co_create = parallels_co_create,
+.bdrv_co_create_opts= parallels_co_create_opts,
+.bdrv_co_check  = parallels_co_check,
 };
 
 static void bdrv_parallels_init(void)
-- 
2.34.1

[PULL v2 02/22] parallels: mark driver as supporting CBT

2023-09-21 Thread Denis V. Lunev

Parallels driver indeed support Parallels Dirty Bitmap Feature in
read-only mode. The patch adds bdrv_supports_persistent_dirty_bitmap()
callback which always return 1 to indicate that.

This will allow to copy CBT from Parallels image with qemu-img.

Note: read-write support is signalled through
bdrv_co_can_store_new_dirty_bitmap() and is different.

Signed-off-by: Denis V. Lunev 
Reviewed-by: Alexander Ivanov 
---
 block/parallels.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/block/parallels.c b/block/parallels.c
index 2ebd8e1301..428f72de1c 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -1248,6 +1248,11 @@ static void parallels_close(BlockDriverState *bs)
 error_free(s->migration_blocker);
 }
 
+static bool parallels_is_support_dirty_bitmaps(BlockDriverState *bs)
+{
+return 1;
+}
+
 static BlockDriver bdrv_parallels = {
 .format_name= "parallels",
 .instance_size  = sizeof(BDRVParallelsState),
@@ -1256,6 +1261,7 @@ static BlockDriver bdrv_parallels = {
 .supports_backing   = true,
 
 .bdrv_has_zero_init = bdrv_has_zero_init_1,
+.bdrv_supports_persistent_dirty_bitmap = 
parallels_is_support_dirty_bitmaps,
 
 .bdrv_probe = parallels_probe,
 .bdrv_open  = parallels_open,
-- 
2.34.1

[PULL v2 03/22] parallels: fix memory leak in parallels_open()

2023-09-21 Thread Denis V. Lunev

We should free opts allocated through qemu_opts_create() at the end.

Signed-off-by: Denis V. Lunev 
Reviewed-by: Alexander Ivanov 
---
 block/parallels.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/block/parallels.c b/block/parallels.c
index 428f72de1c..af7be427c9 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -1217,6 +1217,7 @@ fail_format:
 fail_options:
 ret = -EINVAL;
 fail:
+qemu_opts_del(opts);
 /*
  * "s" object was allocated by g_malloc0 so we can safely
  * try to free its fields even they were not allocated.
-- 
2.34.1

[PULL v2 05/22] parallels: return earler in fail_format branch in parallels_open()

2023-09-21 Thread Denis V. Lunev

We do not need to perform any deallocation/cleanup if wrong format is
detected.

Signed-off-by: Denis V. Lunev 
Reviewed-by: Alexander Ivanov 
---
 block/parallels.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/block/parallels.c b/block/parallels.c
index ae006e7fc7..12f38cf70b 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -1232,7 +1232,8 @@ static int parallels_open(BlockDriverState *bs, QDict 
*options, int flags,
 
 fail_format:
 error_setg(errp, "Image not in Parallels format");
-ret = -EINVAL;
+return -EINVAL;
+
 fail:
 /*
  * "s" object was allocated by g_malloc0 so we can safely
-- 
2.34.1

[PULL v2 19/22] parallels: naive implementation of parallels_co_pdiscard

2023-09-21 Thread Denis V. Lunev

* Discarding with backing stores is not supported by the format.
* There is no buffering/queueing of the discard operation.
* Only operations aligned to the cluster are supported.

Signed-off-by: Denis V. Lunev 
Reviewed-by: Alexander Ivanov 
---
 block/parallels.c | 46 ++
 1 file changed, 46 insertions(+)

diff --git a/block/parallels.c b/block/parallels.c
index a97fb8b506..1808029f14 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -537,6 +537,51 @@ parallels_co_readv(BlockDriverState *bs, int64_t 
sector_num, int nb_sectors,
 return ret;
 }
 
+
+static int coroutine_fn GRAPH_RDLOCK
+parallels_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
+{
+int ret = 0;
+uint32_t cluster, count;
+BDRVParallelsState *s = bs->opaque;
+
+/*
+ * The image does not support ZERO mark inside the BAT, which means that
+ * stale data could be exposed from the backing file.
+ */
+if (bs->backing) {
+return -ENOTSUP;
+}
+
+if (!QEMU_IS_ALIGNED(offset, s->cluster_size)) {
+return -ENOTSUP;
+} else if (!QEMU_IS_ALIGNED(bytes, s->cluster_size)) {
+return -ENOTSUP;
+}
+
+cluster = offset / s->cluster_size;
+count = bytes / s->cluster_size;
+
+qemu_co_mutex_lock(&s->lock);
+for (; count > 0; cluster++, count--) {
+int64_t host_off = bat2sect(s, cluster) << BDRV_SECTOR_BITS;
+if (host_off == 0) {
+continue;
+}
+
+ret = bdrv_co_pdiscard(bs->file, host_off, s->cluster_size);
+if (ret < 0) {
+goto done;
+}
+
+parallels_set_bat_entry(s, cluster, 0);
+bitmap_clear(s->used_bmap, host_cluster_index(s, host_off), 1);
+}
+done:
+qemu_co_mutex_unlock(&s->lock);
+return ret;
+}
+
 static void parallels_check_unclean(BlockDriverState *bs,
 BdrvCheckResult *res,
 BdrvCheckMode fix)
@@ -1417,6 +1462,7 @@ static BlockDriver bdrv_parallels = {
 .bdrv_co_create = parallels_co_create,
 .bdrv_co_create_opts= parallels_co_create_opts,
 .bdrv_co_check  = parallels_co_check,
+.bdrv_co_pdiscard   = parallels_co_pdiscard,
 };
 
 static void bdrv_parallels_init(void)
-- 
2.34.1

[PATCH v3 3/8] contrib/rdmacm-mux: Use qemu_get_runtime_dir()

2023-09-21 Thread Akihiko Odaki

qemu_get_runtime_dir() is used to construct the default Unix socket
path.

Signed-off-by: Akihiko Odaki 
---
 contrib/rdmacm-mux/main.c  | 22 ++
 contrib/rdmacm-mux/meson.build |  2 +-
 2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/contrib/rdmacm-mux/main.c b/contrib/rdmacm-mux/main.c
index 771ca01e03..00c14031ca 100644
--- a/contrib/rdmacm-mux/main.c
+++ b/contrib/rdmacm-mux/main.c
@@ -14,6 +14,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/cutils.h"
 #include 
 #include 
 #include 
@@ -40,8 +41,6 @@
 #define CM_REQ_DGID_POS  80
 #define CM_SIDR_REQ_DGID_POS 44
 
-/* The below can be override by command line parameter */
-#define UNIX_SOCKET_PATH "/var/run/rdmacm-mux"
 /* Has format %s-%s-%d" -- */
 #define SOCKET_PATH_MAX (PATH_MAX - NAME_MAX - sizeof(int) - 2)
 #define RDMA_PORT_NUM 1
@@ -77,7 +76,13 @@ typedef struct RdmaCmServer {
 
 static RdmaCMServer server = {0};
 
-static void usage(const char *progname)
+static char *get_default_unix_socket_path(void)
+{
+g_autofree char *run = qemu_get_runtime_dir();
+return g_build_filename(run, "rdmacm-mux", NULL);
+}
+
+static void usage(const char *progname, const char *default_unix_socket_path)
 {
 printf("Usage: %s [OPTION]...\n"
"Start a RDMA-CM multiplexer\n"
@@ -86,7 +91,7 @@ static void usage(const char *progname)
"\t-d rdma-device-name   Name of RDMA device to register with\n"
"\t-s unix-socket-path   Path to unix socket to listen on (default 
%s)\n"
"\t-p rdma-device-port   Port number of RDMA device to register 
with (default %d)\n",
-   progname, UNIX_SOCKET_PATH, RDMA_PORT_NUM);
+   progname, default_unix_socket_path, RDMA_PORT_NUM);
 }
 
 static void help(const char *progname)
@@ -97,16 +102,16 @@ static void help(const char *progname)
 static void parse_args(int argc, char *argv[])
 {
 int c;
-char unix_socket_path[SOCKET_PATH_MAX];
+g_autofree char *default_unix_socket_path = get_default_unix_socket_path();
+char *unix_socket_path = default_unix_socket_path;
 
 strcpy(server.args.rdma_dev_name, "");
-strcpy(unix_socket_path, UNIX_SOCKET_PATH);
 server.args.rdma_port_num = RDMA_PORT_NUM;
 
 while ((c = getopt(argc, argv, "hs:d:p:")) != -1) {
 switch (c) {
 case 'h':
-usage(argv[0]);
+usage(argv[0], default_unix_socket_path);
 exit(0);
 
 case 'd':
@@ -115,7 +120,7 @@ static void parse_args(int argc, char *argv[])
 
 case 's':
 /* This is temporary, final name will build below */
-strncpy(unix_socket_path, optarg, SOCKET_PATH_MAX - 1);
+unix_socket_path = optarg;
 break;
 
 case 'p':
@@ -811,6 +816,7 @@ int main(int argc, char *argv[])
 {
 int rc;
 
+qemu_init_exec_dir(argv[0]);
 memset(&server, 0, sizeof(server));
 
 parse_args(argc, argv);
diff --git a/contrib/rdmacm-mux/meson.build b/contrib/rdmacm-mux/meson.build
index 36c9c89630..59f60f9cac 100644
--- a/contrib/rdmacm-mux/meson.build
+++ b/contrib/rdmacm-mux/meson.build
@@ -1,7 +1,7 @@
 if have_pvrdma
   # FIXME: broken on big endian architectures
   executable('rdmacm-mux', files('main.c'), genh,
- dependencies: [glib, libumad],
+ dependencies: [glib, libumad, qemuutil],
  build_by_default: false,
  install: false)
 endif
-- 
2.41.0

[PULL v2 14/22] tests: test self-cure of parallels image with duplicated clusters

2023-09-21 Thread Denis V. Lunev

The test is quite similar with the original one for duplicated clusters.
There is the only difference in the operation which should fix the
image.

Signed-off-by: Denis V. Lunev 
Reviewed-by: Alexander Ivanov 
---
 tests/qemu-iotests/tests/parallels-checks | 36 +++
 tests/qemu-iotests/tests/parallels-checks.out | 31 
 2 files changed, 67 insertions(+)

diff --git a/tests/qemu-iotests/tests/parallels-checks 
b/tests/qemu-iotests/tests/parallels-checks
index df99558486..b281246a42 100755
--- a/tests/qemu-iotests/tests/parallels-checks
+++ b/tests/qemu-iotests/tests/parallels-checks
@@ -135,6 +135,42 @@ echo "== check the second cluster (deduplicated) =="
 # Clear image
 _make_test_img $SIZE
 
+echo "== TEST DUPLICATION SELF-CURE =="
+
+echo "== write pattern to whole image =="
+{ $QEMU_IO -c "write -P 0x11 0 $SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | 
_filter_testdir
+
+echo "== write another pattern to second cluster =="
+{ $QEMU_IO -c "write -P 0x55 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 
| _filter_qemu_io | _filter_testdir
+
+echo "== check second cluster =="
+{ $QEMU_IO -r -c "read -P 0x55 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 
2>&1 | _filter_qemu_io | _filter_testdir
+
+
+echo "== corrupt image =="
+poke_file "$TEST_IMG" "$(($BAT_OFFSET + 4))" "\x01\x00\x00\x00"
+
+echo "== check second cluster =="
+{ $QEMU_IO -r -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 
2>&1 | _filter_qemu_io | _filter_testdir
+
+echo "== check the first cluster with self-repair =="
+{ $QEMU_IO -c "read -P 0x11 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | 
_filter_qemu_io | _filter_testdir
+
+echo "== check second cluster =="
+{ $QEMU_IO -r -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 
2>&1 | _filter_qemu_io | _filter_testdir
+
+echo "== write another pattern to the first clusters =="
+{ $QEMU_IO -c "write -P 0x66 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | 
_filter_qemu_io | _filter_testdir
+
+echo "== check the first cluster =="
+{ $QEMU_IO -r -c "read -P 0x66 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | 
_filter_qemu_io | _filter_testdir
+
+echo "== check the second cluster (deduplicated) =="
+{ $QEMU_IO -r -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 
2>&1 | _filter_qemu_io | _filter_testdir
+
+# Clear image
+_make_test_img $SIZE
+
 echo "== TEST DATA_OFF CHECK =="
 
 echo "== write pattern to first cluster =="
diff --git a/tests/qemu-iotests/tests/parallels-checks.out 
b/tests/qemu-iotests/tests/parallels-checks.out
index 1325d2b611..9793423111 100644
--- a/tests/qemu-iotests/tests/parallels-checks.out
+++ b/tests/qemu-iotests/tests/parallels-checks.out
@@ -71,6 +71,37 @@ read 1048576/1048576 bytes at offset 0
 read 1048576/1048576 bytes at offset 1048576
 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4194304
+== TEST DUPLICATION SELF-CURE ==
+== write pattern to whole image ==
+wrote 4194304/4194304 bytes at offset 0
+4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== write another pattern to second cluster ==
+wrote 1048576/1048576 bytes at offset 1048576
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== check second cluster ==
+read 1048576/1048576 bytes at offset 1048576
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== corrupt image ==
+== check second cluster ==
+read 1048576/1048576 bytes at offset 1048576
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== check the first cluster with self-repair ==
+Repairing duplicate offset in BAT entry 1
+read 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== check second cluster ==
+read 1048576/1048576 bytes at offset 1048576
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== write another pattern to the first clusters ==
+wrote 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== check the first cluster ==
+read 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== check the second cluster (deduplicated) ==
+read 1048576/1048576 bytes at offset 1048576
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4194304
 == TEST DATA_OFF CHECK ==
 == write pattern to first cluster ==
 wrote 1048576/1048576 bytes at offset 0
-- 
2.34.1

[PATCH v3 7/8] util: Remove qemu_get_local_state_dir()

2023-09-21 Thread Akihiko Odaki

There are no users of the function anymore.

Signed-off-by: Akihiko Odaki 
---
 include/qemu/osdep.h |  8 
 util/oslib-posix.c   |  6 --
 util/oslib-win32.c   | 10 --
 3 files changed, 24 deletions(-)

diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index bb857c910f..cc585447ef 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -628,14 +628,6 @@ ssize_t qemu_write_full(int fd, const void *buf, size_t 
count)
 
 void qemu_set_cloexec(int fd);
 
-/* Return a dynamically allocated directory path that is appropriate for 
storing
- * local state.
- *
- * The caller is responsible for releasing the value returned with g_free()
- * after use.
- */
-char *qemu_get_local_state_dir(void);
-
 /**
  * qemu_get_runtime_dir:
  *
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index 0c82717be5..f3054ad2cd 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -267,12 +267,6 @@ int qemu_socketpair(int domain, int type, int protocol, 
int sv[2])
 return ret;
 }
 
-char *
-qemu_get_local_state_dir(void)
-{
-return get_relocated_path(CONFIG_QEMU_LOCALSTATEDIR);
-}
-
 char *
 qemu_get_runtime_dir(void)
 {
diff --git a/util/oslib-win32.c b/util/oslib-win32.c
index 38df7b57b5..f93c3bff8e 100644
--- a/util/oslib-win32.c
+++ b/util/oslib-win32.c
@@ -229,16 +229,6 @@ int qemu_get_thread_id(void)
 return GetCurrentThreadId();
 }
 
-char *
-qemu_get_local_state_dir(void)
-{
-const char * const *data_dirs = g_get_system_data_dirs();
-
-g_assert(data_dirs && data_dirs[0]);
-
-return g_strdup(data_dirs[0]);
-}
-
 char *
 qemu_get_runtime_dir(void)
 {
-- 
2.41.0

[PULL v2 21/22] parallels: naive implementation of parallels_co_pwrite_zeroes

2023-09-21 Thread Denis V. Lunev

The zero flag is missed in the Parallels format specification. We can
resort to discard if we have no backing file.

Signed-off-by: Denis V. Lunev 
Reviewed-by: Alexander Ivanov 
---
 block/parallels.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/block/parallels.c b/block/parallels.c
index 1808029f14..d026ce9e2f 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -582,6 +582,19 @@ done:
 return ret;
 }
 
+static int coroutine_fn GRAPH_RDLOCK
+parallels_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+   BdrvRequestFlags flags)
+{
+/*
+ * The zero flag is missed in the Parallels format specification. We can
+ * resort to discard if we have no backing file (this condition is checked
+ * inside parallels_co_pdiscard().
+ */
+return parallels_co_pdiscard(bs, offset, bytes);
+}
+
+
 static void parallels_check_unclean(BlockDriverState *bs,
 BdrvCheckResult *res,
 BdrvCheckMode fix)
@@ -1463,6 +1476,7 @@ static BlockDriver bdrv_parallels = {
 .bdrv_co_create_opts= parallels_co_create_opts,
 .bdrv_co_check  = parallels_co_check,
 .bdrv_co_pdiscard   = parallels_co_pdiscard,
+.bdrv_co_pwrite_zeroes  = parallels_co_pwrite_zeroes,
 };
 
 static void bdrv_parallels_init(void)
-- 
2.34.1

[PULL v2 12/22] parallels: collect bitmap of used clusters at open

2023-09-21 Thread Denis V. Lunev

If the operation is failed, we need to check image consistency if the
problem is not about memory allocation.

Bitmap adjustments in allocate_cluster are not performed yet.
They worth to be separate. This was proven useful during debug of this
series. Kept as is for future bissecting.

It should be specifically noted that used bitmap must be recalculated
if data_off has been fixed during image consistency check.

Signed-off-by: Denis V. Lunev 
Reviewed-by: Alexander Ivanov 
---
 block/parallels.c | 73 +++
 block/parallels.h |  3 ++
 2 files changed, 76 insertions(+)

diff --git a/block/parallels.c b/block/parallels.c
index 2b5f2b54a0..a997880c34 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -193,6 +193,58 @@ static int mark_used(BlockDriverState *bs,
 return 0;
 }
 
+/*
+ * Collect used bitmap. The image can contain errors, we should fill the
+ * bitmap anyway, as much as we can. This information will be used for
+ * error resolution.
+ */
+static int parallels_fill_used_bitmap(BlockDriverState *bs)
+{
+BDRVParallelsState *s = bs->opaque;
+int64_t payload_bytes;
+uint32_t i;
+int err = 0;
+
+payload_bytes = bdrv_getlength(bs->file->bs);
+if (payload_bytes < 0) {
+return payload_bytes;
+}
+payload_bytes -= s->data_start * BDRV_SECTOR_SIZE;
+if (payload_bytes < 0) {
+return -EINVAL;
+}
+
+s->used_bmap_size = DIV_ROUND_UP(payload_bytes, s->cluster_size);
+if (s->used_bmap_size == 0) {
+return 0;
+}
+s->used_bmap = bitmap_try_new(s->used_bmap_size);
+if (s->used_bmap == NULL) {
+return -ENOMEM;
+}
+
+for (i = 0; i < s->bat_size; i++) {
+int err2;
+int64_t host_off = bat2sect(s, i) << BDRV_SECTOR_BITS;
+if (host_off == 0) {
+continue;
+}
+
+err2 = mark_used(bs, s->used_bmap, s->used_bmap_size, host_off);
+if (err2 < 0 && err == 0) {
+err = err2;
+}
+}
+return err;
+}
+
+static void parallels_free_used_bitmap(BlockDriverState *bs)
+{
+BDRVParallelsState *s = bs->opaque;
+s->used_bmap_size = 0;
+g_free(s->used_bmap);
+}
+
 static int64_t coroutine_fn GRAPH_RDLOCK
 allocate_clusters(BlockDriverState *bs, int64_t sector_num,
   int nb_sectors, int *pnum)
@@ -530,8 +582,17 @@ parallels_check_data_off(BlockDriverState *bs, 
BdrvCheckResult *res,
 
 res->corruptions++;
 if (fix & BDRV_FIX_ERRORS) {
+int err;
 s->header->data_off = cpu_to_le32(data_off);
 s->data_start = data_off;
+
+parallels_free_used_bitmap(bs);
+err = parallels_fill_used_bitmap(bs);
+if (err == -ENOMEM) {
+res->check_errors++;
+return err;
+}
+
 res->corruptions_fixed++;
 }
 
@@ -1222,6 +1283,14 @@ static int parallels_open(BlockDriverState *bs, QDict 
*options, int flags,
 }
 need_check = need_check || s->data_end > file_nb_sectors;
 
+if (!need_check) {
+ret = parallels_fill_used_bitmap(bs);
+if (ret == -ENOMEM) {
+goto fail;
+}
+need_check = need_check || ret < 0; /* These are correctable errors */
+}
+
 /*
  * We don't repair the image here if it's opened for checks. Also we don't
  * want to change inactive images and can't change readonly images.
@@ -1251,6 +1320,8 @@ fail:
  * "s" object was allocated by g_malloc0 so we can safely
  * try to free its fields even they were not allocated.
  */
+parallels_free_used_bitmap(bs);
+
 error_free(s->migration_blocker);
 g_free(s->bat_dirty_bmap);
 qemu_vfree(s->header);
@@ -1271,6 +1342,8 @@ static void parallels_close(BlockDriverState *bs)
   PREALLOC_MODE_OFF, 0, NULL);
 }
 
+parallels_free_used_bitmap(bs);
+
 g_free(s->bat_dirty_bmap);
 qemu_vfree(s->header);
 
diff --git a/block/parallels.h b/block/parallels.h
index 4e53e9572d..6b199443cf 100644
--- a/block/parallels.h
+++ b/block/parallels.h
@@ -72,6 +72,9 @@ typedef struct BDRVParallelsState {
 unsigned long *bat_dirty_bmap;
 unsigned int  bat_dirty_block;
 
+unsigned long *used_bmap;
+unsigned long used_bmap_size;
+
 uint32_t *bat_bitmap;
 unsigned int bat_size;
 
-- 
2.34.1

[PULL v2 18/22] parallels: improve readability of allocate_clusters

2023-09-21 Thread Denis V. Lunev

Replace 'space' representing the amount of data to preallocate with
'bytes'.

Rationale:
* 'space' at each place is converted to bytes
* the unit is more close to the variable name

Signed-off-by: Denis V. Lunev 
Reviewed-by: Alexander Ivanov 
---
 block/parallels.c | 13 +
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/block/parallels.c b/block/parallels.c
index ebcdff8736..a97fb8b506 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -279,7 +279,8 @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num,
 first_free = find_first_zero_bit(s->used_bmap, s->used_bmap_size);
 if (first_free == s->used_bmap_size) {
 uint32_t new_usedsize;
-int64_t space = to_allocate * s->tracks + s->prealloc_size;
+int64_t bytes = to_allocate * s->cluster_size;
+bytes += s->prealloc_size * BDRV_SECTOR_SIZE;
 
 host_off = s->data_end * BDRV_SECTOR_SIZE;
 
@@ -289,8 +290,7 @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num,
  * force the safer-but-slower fallocate.
  */
 if (s->prealloc_mode == PRL_PREALLOC_MODE_TRUNCATE) {
-ret = bdrv_co_truncate(bs->file,
-   (s->data_end + space) << BDRV_SECTOR_BITS,
+ret = bdrv_co_truncate(bs->file, host_off + bytes,
false, PREALLOC_MODE_OFF,
BDRV_REQ_ZERO_WRITE, NULL);
 if (ret == -ENOTSUP) {
@@ -298,16 +298,13 @@ allocate_clusters(BlockDriverState *bs, int64_t 
sector_num,
 }
 }
 if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) {
-ret = bdrv_co_pwrite_zeroes(bs->file,
-s->data_end << BDRV_SECTOR_BITS,
-space << BDRV_SECTOR_BITS, 0);
+ret = bdrv_co_pwrite_zeroes(bs->file, host_off, bytes, 0);
 }
 if (ret < 0) {
 return ret;
 }
 
-new_usedsize = s->used_bmap_size +
-   (space << BDRV_SECTOR_BITS) / s->cluster_size;
+new_usedsize = s->used_bmap_size + bytes / s->cluster_size;
 s->used_bmap = bitmap_zero_extend(s->used_bmap, s->used_bmap_size,
   new_usedsize);
 s->used_bmap_size = new_usedsize;
-- 
2.34.1

[PULL v2 08/22] parallels: create mark_used() helper which sets bit in used bitmap

2023-09-21 Thread Denis V. Lunev

This functionality is used twice already and next patch will add more
code with it.

Signed-off-by: Denis V. Lunev 
Reviewed-by: Alexander Ivanov 
---
 block/parallels.c | 34 +-
 1 file changed, 25 insertions(+), 9 deletions(-)

diff --git a/block/parallels.c b/block/parallels.c
index af3b4894d7..66c86d87e3 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -178,6 +178,21 @@ static void parallels_set_bat_entry(BDRVParallelsState *s,
 bitmap_set(s->bat_dirty_bmap, bat_entry_off(index) / s->bat_dirty_block, 
1);
 }
 
+static int mark_used(BlockDriverState *bs,
+ unsigned long *bitmap, uint32_t bitmap_size, int64_t off)
+{
+BDRVParallelsState *s = bs->opaque;
+uint32_t cluster_index = host_cluster_index(s, off);
+if (cluster_index >= bitmap_size) {
+return -E2BIG;
+}
+if (test_bit(cluster_index, bitmap)) {
+return -EBUSY;
+}
+bitmap_set(bitmap, cluster_index, 1);
+return 0;
+}
+
 static int64_t coroutine_fn GRAPH_RDLOCK
 allocate_clusters(BlockDriverState *bs, int64_t sector_num,
   int nb_sectors, int *pnum)
@@ -621,7 +636,7 @@ parallels_check_duplicate(BlockDriverState *bs, 
BdrvCheckResult *res,
 BDRVParallelsState *s = bs->opaque;
 int64_t host_off, host_sector, guest_sector;
 unsigned long *bitmap;
-uint32_t i, bitmap_size, cluster_index, bat_entry;
+uint32_t i, bitmap_size, bat_entry;
 int n, ret = 0;
 uint64_t *buf = NULL;
 bool fixed = false;
@@ -655,10 +670,9 @@ parallels_check_duplicate(BlockDriverState *bs, 
BdrvCheckResult *res,
 continue;
 }
 
-cluster_index = host_cluster_index(s, host_off);
-assert(cluster_index < bitmap_size);
-if (!test_bit(cluster_index, bitmap)) {
-bitmap_set(bitmap, cluster_index, 1);
+ret = mark_used(bs, bitmap, bitmap_size, host_off);
+assert(ret != -E2BIG);
+if (ret == 0) {
 continue;
 }
 
@@ -713,11 +727,13 @@ parallels_check_duplicate(BlockDriverState *bs, 
BdrvCheckResult *res,
  * consistent for the new allocated clusters too.
  *
  * Note, clusters allocated outside the current image are not
- * considered, and the bitmap size doesn't change.
+ * considered, and the bitmap size doesn't change. This specifically
+ * means that -E2BIG is OK.
  */
-cluster_index = host_cluster_index(s, host_off);
-if (cluster_index < bitmap_size) {
-bitmap_set(bitmap, cluster_index, 1);
+ret = mark_used(bs, bitmap, bitmap_size, host_off);
+if (ret == -EBUSY) {
+res->check_errors++;
+goto out_repair_bat;
 }
 
 fixed = true;
-- 
2.34.1

[PULL v2 20/22] tests: extend test 131 to cover availability of the discard operation

2023-09-21 Thread Denis V. Lunev

This patch contains test which minimally tests discard and new cluster
allocation logic.

The following checks are added:
* write 2 clusters, discard the first allocated
* write another cluster, check that the hole is filled
* write 2 clusters, discard the first allocated, write 1 cluster at
  non-aligned to cluster offset (2 new clusters should be allocated)

Signed-off-by: Denis V. Lunev 
Reviewed-by: Alexander Ivanov 
---
 tests/qemu-iotests/131 | 31 +++
 tests/qemu-iotests/131.out | 38 ++
 2 files changed, 69 insertions(+)

diff --git a/tests/qemu-iotests/131 b/tests/qemu-iotests/131
index 304bbb3f61..324008b3f6 100755
--- a/tests/qemu-iotests/131
+++ b/tests/qemu-iotests/131
@@ -74,6 +74,37 @@ poke_file "$TEST_IMG" "$inuse_offset" "\x59\x6e\x6f\x74"
 echo "== read corrupted image with repairing =="
 { $QEMU_IO -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | 
_filter_qemu_io | _filter_testdir
 
+echo "== check discard =="
+
+# Clear image
+_make_test_img $size
+
+{ $QEMU_IO -c "write -P 0x11 0 $CLUSTER_DBL_SIZE" "$TEST_IMG"; } 2>&1 | 
_filter_qemu_io | _filter_testdir
+{ $QEMU_IMG map "$TEST_IMG"; } 2>&1 | _filter_qemu_img_map
+{ $QEMU_IO -c "discard 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io 
| _filter_testdir
+{ $QEMU_IMG map "$TEST_IMG"; } 2>&1 | _filter_qemu_img_map
+{ $QEMU_IO -c "read -P 0 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | 
_filter_qemu_io | _filter_testdir
+
+echo "== check simple allocation over the discarded hole =="
+
+{ $QEMU_IO -c "write -P 0x11 $CLUSTER_DBL_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 
2>&1 | _filter_qemu_io | _filter_testdir
+{ $QEMU_IMG map "$TEST_IMG"; } 2>&1 | _filter_qemu_img_map
+{ $QEMU_IO -c "read -P 0x11 $CLUSTER_DBL_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 
2>&1 | _filter_qemu_io | _filter_testdir
+
+echo "== check more complex allocation over the discard hole =="
+
+# Clear image
+_make_test_img $size
+
+{ $QEMU_IO -c "write -P 0x11 $CLUSTER_DBL_SIZE $CLUSTER_DBL_SIZE" "$TEST_IMG"; 
} 2>&1 | _filter_qemu_io | _filter_testdir
+{ $QEMU_IO -c "discard $CLUSTER_DBL_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | 
_filter_qemu_io | _filter_testdir
+# There is 1 cluster hole. Fill it fully and allocate 1 cluster at the end
+{ $QEMU_IO -c "write -P 0x12 $CLUSTER_HALF_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 
2>&1 | _filter_qemu_io | _filter_testdir
+{ $QEMU_IMG map "$TEST_IMG"; } 2>&1 | _filter_qemu_img_map
+{ $QEMU_IO -c "read -P 0x12 $CLUSTER_HALF_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 
2>&1 | _filter_qemu_io | _filter_testdir
+{ $QEMU_IO -c "read -P 0 0 $CLUSTER_HALF_SIZE" "$TEST_IMG"; } 2>&1 | 
_filter_qemu_io | _filter_testdir
+{ $QEMU_IO -c "read -P 0 $((CLUSTER_SIZE + CLUSTER_HALF_SIZE)) 
$CLUSTER_HALF_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
 echo "== allocate with backing =="
 # Verify that allocating clusters works fine even when there is a backing 
image.
 # Regression test for a bug where we would pass a buffer read from the backing
diff --git a/tests/qemu-iotests/131.out b/tests/qemu-iotests/131.out
index d2904578df..27df91ca97 100644
--- a/tests/qemu-iotests/131.out
+++ b/tests/qemu-iotests/131.out
@@ -26,6 +26,44 @@ read 524288/524288 bytes at offset 0
 Repairing image was not closed correctly
 read 1048576/1048576 bytes at offset 1048576
 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== check discard ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+wrote 2097152/2097152 bytes at offset 0
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Offset  Length  File
+0   0x20TEST_DIR/t.IMGFMT
+discard 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Offset  Length  File
+0x100x10TEST_DIR/t.IMGFMT
+read 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== check simple allocation over the discarded hole ==
+wrote 1048576/1048576 bytes at offset 2097152
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Offset  Length  File
+0x100x10TEST_DIR/t.IMGFMT
+0x200x10TEST_DIR/t.IMGFMT
+read 1048576/1048576 bytes at offset 2097152
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== check more complex allocation over the discard hole ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+wrote 2097152/2097152 bytes at offset 2097152
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+discard 1048576/1048576 bytes at offset 2097152
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 1048576/1048576 bytes at offset 524288
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Offset  Length  File
+0   0x10TEST_DIR/t.IMGFMT
+0x100x10TEST_DIR/t.IMGFMT
+0x300x10TEST_DIR/t.IMGFMT
+read 1048576/1048576 bytes at

[PULL v2 15/22] parallels: accept multiple clusters in mark_used()

2023-09-21 Thread Denis V. Lunev

This would be useful in the next patch in allocate_clusters(). This
change would not imply serious performance drawbacks as usually image
is full of data or are at the end of the bitmap.

Signed-off-by: Denis V. Lunev 
Reviewed-by: Alexander Ivanov 
---
 block/parallels.c | 18 ++
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/block/parallels.c b/block/parallels.c
index a997880c34..3df73aa8a0 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -178,18 +178,20 @@ static void parallels_set_bat_entry(BDRVParallelsState *s,
 bitmap_set(s->bat_dirty_bmap, bat_entry_off(index) / s->bat_dirty_block, 
1);
 }
 
-static int mark_used(BlockDriverState *bs,
- unsigned long *bitmap, uint32_t bitmap_size, int64_t off)
+static int mark_used(BlockDriverState *bs, unsigned long *bitmap,
+ uint32_t bitmap_size, int64_t off, uint32_t count)
 {
 BDRVParallelsState *s = bs->opaque;
 uint32_t cluster_index = host_cluster_index(s, off);
-if (cluster_index >= bitmap_size) {
+unsigned long next_used;
+if (cluster_index + count > bitmap_size) {
 return -E2BIG;
 }
-if (test_bit(cluster_index, bitmap)) {
+next_used = find_next_bit(bitmap, bitmap_size, cluster_index);
+if (next_used < cluster_index + count) {
 return -EBUSY;
 }
-bitmap_set(bitmap, cluster_index, 1);
+bitmap_set(bitmap, cluster_index, count);
 return 0;
 }
 
@@ -230,7 +232,7 @@ static int parallels_fill_used_bitmap(BlockDriverState *bs)
 continue;
 }
 
-err2 = mark_used(bs, s->used_bmap, s->used_bmap_size, host_off);
+err2 = mark_used(bs, s->used_bmap, s->used_bmap_size, host_off, 1);
 if (err2 < 0 && err == 0) {
 err = err2;
 }
@@ -732,7 +734,7 @@ parallels_check_duplicate(BlockDriverState *bs, 
BdrvCheckResult *res,
 continue;
 }
 
-ret = mark_used(bs, bitmap, bitmap_size, host_off);
+ret = mark_used(bs, bitmap, bitmap_size, host_off, 1);
 assert(ret != -E2BIG);
 if (ret == 0) {
 continue;
@@ -792,7 +794,7 @@ parallels_check_duplicate(BlockDriverState *bs, 
BdrvCheckResult *res,
  * considered, and the bitmap size doesn't change. This specifically
  * means that -E2BIG is OK.
  */
-ret = mark_used(bs, bitmap, bitmap_size, host_off);
+ret = mark_used(bs, bitmap, bitmap_size, host_off, 1);
 if (ret == -EBUSY) {
 res->check_errors++;
 goto out_repair_bat;
-- 
2.34.1

[PULL v2 10/22] parallels: fix broken parallels_check_data_off()

2023-09-21 Thread Denis V. Lunev

Once we have repaired data_off field in the header we should update
s->data_start which is calculated on the base of it.

Signed-off-by: Denis V. Lunev 
Reviewed-by: Alexander Ivanov 
---
 block/parallels.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/block/parallels.c b/block/parallels.c
index 66c86d87e3..2b5f2b54a0 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -531,6 +531,7 @@ parallels_check_data_off(BlockDriverState *bs, 
BdrvCheckResult *res,
 res->corruptions++;
 if (fix & BDRV_FIX_ERRORS) {
 s->header->data_off = cpu_to_le32(data_off);
+s->data_start = data_off;
 res->corruptions_fixed++;
 }
 
-- 
2.34.1

[PULL v2 04/22] parallels: invent parallels_opts_prealloc() helper to parse prealloc opts

2023-09-21 Thread Denis V. Lunev

This patch creates above mentioned helper and moves its usage to the
beginning of parallels_open(). This simplifies parallels_open() a bit.

The patch also ensures that we store prealloc_size on block driver state
always in sectors. This makes code cleaner and avoids wrong opinion at
the assignment that the value is in bytes.

Signed-off-by: Denis V. Lunev 
Reviewed-by: Alexander Ivanov 
---
 block/parallels.c | 72 +--
 1 file changed, 44 insertions(+), 28 deletions(-)

diff --git a/block/parallels.c b/block/parallels.c
index af7be427c9..ae006e7fc7 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -1025,6 +1025,44 @@ static int parallels_update_header(BlockDriverState *bs)
 return bdrv_pwrite_sync(bs->file, 0, size, s->header, 0);
 }
 
+
+static int parallels_opts_prealloc(BlockDriverState *bs, QDict *options,
+   Error **errp)
+{
+int err;
+char *buf;
+int64_t bytes;
+BDRVParallelsState *s = bs->opaque;
+Error *local_err = NULL;
+QemuOpts *opts = qemu_opts_create(¶llels_runtime_opts, NULL, 0, errp);
+if (!opts) {
+return -ENOMEM;
+}
+
+err = -EINVAL;
+if (!qemu_opts_absorb_qdict(opts, options, errp)) {
+goto done;
+}
+
+bytes = qemu_opt_get_size_del(opts, PARALLELS_OPT_PREALLOC_SIZE, 0);
+s->prealloc_size = bytes >> BDRV_SECTOR_BITS;
+buf = qemu_opt_get_del(opts, PARALLELS_OPT_PREALLOC_MODE);
+/* prealloc_mode can be downgraded later during allocate_clusters */
+s->prealloc_mode = qapi_enum_parse(&prealloc_mode_lookup, buf,
+   PRL_PREALLOC_MODE_FALLOCATE,
+   &local_err);
+g_free(buf);
+if (local_err != NULL) {
+error_propagate(errp, local_err);
+goto done;
+}
+err = 0;
+
+done:
+qemu_opts_del(opts);
+return err;
+}
+
 static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
   Error **errp)
 {
@@ -1033,11 +1071,13 @@ static int parallels_open(BlockDriverState *bs, QDict 
*options, int flags,
 int ret, size, i;
 int64_t file_nb_sectors, sector;
 uint32_t data_start;
-QemuOpts *opts = NULL;
-Error *local_err = NULL;
-char *buf;
 bool data_off_is_correct;
 
+ret = parallels_opts_prealloc(bs, options, errp);
+if (ret < 0) {
+return ret;
+}
+
 ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
 if (ret < 0) {
 return ret;
@@ -1078,6 +1118,7 @@ static int parallels_open(BlockDriverState *bs, QDict 
*options, int flags,
 ret = -EFBIG;
 goto fail;
 }
+s->prealloc_size = MAX(s->tracks, s->prealloc_size);
 s->cluster_size = s->tracks << BDRV_SECTOR_BITS;
 
 s->bat_size = le32_to_cpu(ph.bat_entries);
@@ -1117,29 +1158,6 @@ static int parallels_open(BlockDriverState *bs, QDict 
*options, int flags,
 s->header_size = size;
 }
 
-opts = qemu_opts_create(¶llels_runtime_opts, NULL, 0, errp);
-if (!opts) {
-goto fail_options;
-}
-
-if (!qemu_opts_absorb_qdict(opts, options, errp)) {
-goto fail_options;
-}
-
-s->prealloc_size =
-qemu_opt_get_size_del(opts, PARALLELS_OPT_PREALLOC_SIZE, 0);
-s->prealloc_size = MAX(s->tracks, s->prealloc_size >> BDRV_SECTOR_BITS);
-buf = qemu_opt_get_del(opts, PARALLELS_OPT_PREALLOC_MODE);
-/* prealloc_mode can be downgraded later during allocate_clusters */
-s->prealloc_mode = qapi_enum_parse(&prealloc_mode_lookup, buf,
-   PRL_PREALLOC_MODE_FALLOCATE,
-   &local_err);
-g_free(buf);
-if (local_err != NULL) {
-error_propagate(errp, local_err);
-goto fail_options;
-}
-
 if (ph.ext_off) {
 if (flags & BDRV_O_RDWR) {
 /*
@@ -1214,10 +1232,8 @@ static int parallels_open(BlockDriverState *bs, QDict 
*options, int flags,
 
 fail_format:
 error_setg(errp, "Image not in Parallels format");
-fail_options:
 ret = -EINVAL;
 fail:
-qemu_opts_del(opts);
 /*
  * "s" object was allocated by g_malloc0 so we can safely
  * try to free its fields even they were not allocated.
-- 
2.34.1

[PATCH v3 5/8] scsi: Use qemu_get_runtime_dir()

2023-09-21 Thread Akihiko Odaki

qemu_get_runtime_dir() is used to construct the default paths.

Signed-off-by: Akihiko Odaki 
---
 scsi/qemu-pr-helper.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c
index c6c6347e9b..507f23357f 100644
--- a/scsi/qemu-pr-helper.c
+++ b/scsi/qemu-pr-helper.c
@@ -77,10 +77,10 @@ static int gid = -1;
 
 static void compute_default_paths(void)
 {
-g_autofree char *state = qemu_get_local_state_dir();
+g_autofree char *run = qemu_get_runtime_dir();
 
-socket_path = g_build_filename(state, "run", "qemu-pr-helper.sock", NULL);
-pidfile = g_build_filename(state, "run", "qemu-pr-helper.pid", NULL);
+socket_path = g_build_filename(run, "qemu-pr-helper.sock", NULL);
+pidfile = g_build_filename(run, "qemu-pr-helper.pid", NULL);
 }
 
 static void usage(const char *name)
-- 
2.41.0

[PULL v2 09/22] tests: ensure that image validation will not cure the corruption

2023-09-21 Thread Denis V. Lunev

Since
commit cfce1091d55322789582480798a891cbaf66924e
Author: Alexander Ivanov 
Date:   Tue Jul 18 12:44:29 2023 +0200
parallels: Image repairing in parallels_open()
there is a potential pit fall with calling
qemu-io -c "read"
The image is opened in read-write mode and thus could be potentially
repaired. This could ruin testing process.

The patch forces read-only opening for reads. In that case repairing
is impossible.

Signed-off-by: Denis V. Lunev 
Reviewed-by: Alexander Ivanov 
---
 tests/qemu-iotests/tests/parallels-checks | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tests/qemu-iotests/tests/parallels-checks 
b/tests/qemu-iotests/tests/parallels-checks
index a7a1b357b5..5917ee079d 100755
--- a/tests/qemu-iotests/tests/parallels-checks
+++ b/tests/qemu-iotests/tests/parallels-checks
@@ -91,7 +91,7 @@ file_size=`stat --printf="%s" "$TEST_IMG"`
 echo "file size: $file_size"
 
 echo "== check last cluster =="
-{ $QEMU_IO -c "read -P 0x11 $LAST_CLUSTER_OFF $CLUSTER_SIZE" "$TEST_IMG"; } 
2>&1 | _filter_qemu_io | _filter_testdir
+{ $QEMU_IO -r -c "read -P 0x11 $LAST_CLUSTER_OFF $CLUSTER_SIZE" "$TEST_IMG"; } 
2>&1 | _filter_qemu_io | _filter_testdir
 
 # Clear image
 _make_test_img $SIZE
@@ -105,19 +105,20 @@ echo "== write another pattern to second cluster =="
 { $QEMU_IO -c "write -P 0x55 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 
| _filter_qemu_io | _filter_testdir
 
 echo "== check second cluster =="
-{ $QEMU_IO -c "read -P 0x55 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | 
_filter_qemu_io | _filter_testdir
+{ $QEMU_IO -r -c "read -P 0x55 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 
2>&1 | _filter_qemu_io | _filter_testdir
+
 
 echo "== corrupt image =="
 poke_file "$TEST_IMG" "$(($BAT_OFFSET + 4))" "\x01\x00\x00\x00"
 
 echo "== check second cluster =="
-{ $QEMU_IO -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | 
_filter_qemu_io | _filter_testdir
+{ $QEMU_IO -r -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 
2>&1 | _filter_qemu_io | _filter_testdir
 
 echo "== repair image =="
 _check_test_img -r all
 
 echo "== check second cluster =="
-{ $QEMU_IO -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | 
_filter_qemu_io | _filter_testdir
+{ $QEMU_IO -r -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 
2>&1 | _filter_qemu_io | _filter_testdir
 
 echo "== check first cluster on host =="
 printf "content: 0x%02x\n" `peek_file_le $TEST_IMG $(($CLUSTER_SIZE)) 1`
-- 
2.34.1

[PULL v2 22/22] tests: extend test 131 to cover availability of the write-zeroes

2023-09-21 Thread Denis V. Lunev

This patch contains test which minimally tests write-zeroes on top of
working discard.

The following checks are added:
* write 2 clusters, write-zero to the first allocated cluster
* write 2 cluster, write-zero to the half the first allocated cluster

Signed-off-by: Denis V. Lunev 
Reviewed-by: Alexander Ivanov 
---
 tests/qemu-iotests/131 | 21 +
 tests/qemu-iotests/131.out | 22 ++
 2 files changed, 43 insertions(+)

diff --git a/tests/qemu-iotests/131 b/tests/qemu-iotests/131
index 324008b3f6..3119100e78 100755
--- a/tests/qemu-iotests/131
+++ b/tests/qemu-iotests/131
@@ -105,6 +105,27 @@ _make_test_img $size
 { $QEMU_IO -c "read -P 0 0 $CLUSTER_HALF_SIZE" "$TEST_IMG"; } 2>&1 | 
_filter_qemu_io | _filter_testdir
 { $QEMU_IO -c "read -P 0 $((CLUSTER_SIZE + CLUSTER_HALF_SIZE)) 
$CLUSTER_HALF_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
 
+echo "== check write-zeroes =="
+
+# Clear image
+_make_test_img $size
+
+{ $QEMU_IO -c "write -P 0x11 0 $CLUSTER_DBL_SIZE" "$TEST_IMG"; } 2>&1 | 
_filter_qemu_io | _filter_testdir
+{ $QEMU_IO -c "write -z 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io 
| _filter_testdir
+{ $QEMU_IMG map "$TEST_IMG"; } 2>&1 | _filter_qemu_img_map
+{ $QEMU_IO -c "read -P 0 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | 
_filter_qemu_io | _filter_testdir
+{ $QEMU_IO -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | 
_filter_qemu_io | _filter_testdir
+
+echo "== check cluster-partial write-zeroes =="
+
+# Clear image
+_make_test_img $size
+
+{ $QEMU_IO -c "write -P 0x11 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | 
_filter_qemu_io | _filter_testdir
+{ $QEMU_IO -c "write -z 0 $CLUSTER_HALF_SIZE" "$TEST_IMG"; } 2>&1 | 
_filter_qemu_io | _filter_testdir
+{ $QEMU_IO -c "read -P 0 0 $CLUSTER_HALF_SIZE" "$TEST_IMG"; } 2>&1 | 
_filter_qemu_io | _filter_testdir
+{ $QEMU_IO -c "read -P 0x11 $CLUSTER_HALF_SIZE $CLUSTER_HALF_SIZE" 
"$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
 echo "== allocate with backing =="
 # Verify that allocating clusters works fine even when there is a backing 
image.
 # Regression test for a bug where we would pass a buffer read from the backing
diff --git a/tests/qemu-iotests/131.out b/tests/qemu-iotests/131.out
index 27df91ca97..86a2d2a49b 100644
--- a/tests/qemu-iotests/131.out
+++ b/tests/qemu-iotests/131.out
@@ -64,6 +64,28 @@ read 524288/524288 bytes at offset 0
 512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 524288/524288 bytes at offset 1572864
 512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== check write-zeroes ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+wrote 2097152/2097152 bytes at offset 0
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Offset  Length  File
+0x100x10TEST_DIR/t.IMGFMT
+read 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 1048576/1048576 bytes at offset 1048576
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== check cluster-partial write-zeroes ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+wrote 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 524288/524288 bytes at offset 0
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 524288/524288 bytes at offset 0
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 524288/524288 bytes at offset 524288
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 == allocate with backing ==
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864
-- 
2.34.1

[PULL v2 06/22] parallels: return earlier from parallels_open() function on error

2023-09-21 Thread Denis V. Lunev

At the beginning of the function we can return immediately until we
really allocate s->header.

Signed-off-by: Denis V. Lunev 
Reviewed-by: Alexander Ivanov 
---
 block/parallels.c | 14 +-
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/block/parallels.c b/block/parallels.c
index 12f38cf70b..bd26c8db63 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -1090,7 +1090,7 @@ static int parallels_open(BlockDriverState *bs, QDict 
*options, int flags,
 
 ret = bdrv_pread(bs->file, 0, sizeof(ph), &ph, 0);
 if (ret < 0) {
-goto fail;
+return ret;
 }
 
 bs->total_sectors = le64_to_cpu(ph.nb_sectors);
@@ -1110,13 +1110,11 @@ static int parallels_open(BlockDriverState *bs, QDict 
*options, int flags,
 s->tracks = le32_to_cpu(ph.tracks);
 if (s->tracks == 0) {
 error_setg(errp, "Invalid image: Zero sectors per track");
-ret = -EINVAL;
-goto fail;
+return -EINVAL;
 }
 if (s->tracks > INT32_MAX/513) {
 error_setg(errp, "Invalid image: Too big cluster");
-ret = -EFBIG;
-goto fail;
+return -EFBIG;
 }
 s->prealloc_size = MAX(s->tracks, s->prealloc_size);
 s->cluster_size = s->tracks << BDRV_SECTOR_BITS;
@@ -1124,16 +1122,14 @@ static int parallels_open(BlockDriverState *bs, QDict 
*options, int flags,
 s->bat_size = le32_to_cpu(ph.bat_entries);
 if (s->bat_size > INT_MAX / sizeof(uint32_t)) {
 error_setg(errp, "Catalog too large");
-ret = -EFBIG;
-goto fail;
+return -EFBIG;
 }
 
 size = bat_entry_off(s->bat_size);
 s->header_size = ROUND_UP(size, bdrv_opt_mem_align(bs->file->bs));
 s->header = qemu_try_blockalign(bs->file->bs, s->header_size);
 if (s->header == NULL) {
-ret = -ENOMEM;
-goto fail;
+return -ENOMEM;
 }
 
 ret = bdrv_pread(bs->file, 0, s->header_size, s->header, 0);
-- 
2.34.1

Re: [PATCH v2 01/10] qga: Remove platform GUID definitions

2023-09-21 Thread Akihiko Odaki


On 2022/11/17 18:45, Konstantin Kostiuk wrote:
Reviewed-by: Konstantin Kostiuk >


Will merge this patch in QGA series

On Thu, Nov 10, 2022 at 12:06 PM Akihiko Odaki > wrote:


GUID_DEVINTERFACE_DISK and GUID_DEVINTERFACE_STORAGEPORT are already
defined by MinGW-w64. They are not only unnecessary, but can lead to
duplicate definition errors at link time with some unknown condition.

Signed-off-by: Akihiko Odaki mailto:akihiko.od...@daynix.com>>
---
  qga/commands-win32.c | 7 ---
  1 file changed, 7 deletions(-)

diff --git a/qga/commands-win32.c b/qga/commands-win32.c
index ec9f55b453..dde5d401bb 100644
--- a/qga/commands-win32.c
+++ b/qga/commands-win32.c
@@ -506,13 +506,6 @@ static GuestDiskBusType
find_bus_type(STORAGE_BUS_TYPE bus)
      return win2qemu[(int)bus];
  }

-DEFINE_GUID(GUID_DEVINTERFACE_DISK,
-        0x53f56307L, 0xb6bf, 0x11d0, 0x94, 0xf2,
-        0x00, 0xa0, 0xc9, 0x1e, 0xfb, 0x8b);
-DEFINE_GUID(GUID_DEVINTERFACE_STORAGEPORT,
-        0x2accfe60L, 0xc130, 0x11d2, 0xb0, 0x82,
-        0x00, 0xa0, 0xc9, 0x1e, 0xfb, 0x8b);
-
  static void get_pci_address_for_device(GuestPCIAddress *pci,
                                         HDEVINFO dev_info)
  {
-- 
2.38.1




Hi Konstantin,

This patch seems missed since then. Can you merge it?

Regards,
Akihiko Odaki

[PULL v2 11/22] parallels: add test which will validate data_off fixes through repair

2023-09-21 Thread Denis V. Lunev

We have only check through self-repair and that proven to be not enough.

Signed-off-by: Denis V. Lunev 
Reviewed-by: Alexander Ivanov 
---
 tests/qemu-iotests/tests/parallels-checks | 17 +
 tests/qemu-iotests/tests/parallels-checks.out | 18 ++
 2 files changed, 35 insertions(+)

diff --git a/tests/qemu-iotests/tests/parallels-checks 
b/tests/qemu-iotests/tests/parallels-checks
index 5917ee079d..f4ca50295e 100755
--- a/tests/qemu-iotests/tests/parallels-checks
+++ b/tests/qemu-iotests/tests/parallels-checks
@@ -140,6 +140,23 @@ poke_file "$TEST_IMG" "$DATA_OFF_OFFSET" "\xff\xff\xff\xff"
 echo "== check first cluster =="
 { $QEMU_IO -c "read -P 0x55 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | 
_filter_qemu_io | _filter_testdir
 
+# Clear image
+_make_test_img $SIZE
+
+echo "== TEST DATA_OFF THROUGH REPAIR =="
+
+echo "== write pattern to first cluster =="
+{ $QEMU_IO -c "write -P 0x55 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | 
_filter_qemu_io | _filter_testdir
+
+echo "== spoil data_off field =="
+poke_file "$TEST_IMG" "$DATA_OFF_OFFSET" "\xff\xff\xff\xff"
+
+echo "== repair image =="
+_check_test_img -r all
+
+echo "== check first cluster =="
+{ $QEMU_IO -r -c "read -P 0x55 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | 
_filter_qemu_io | _filter_testdir
+
 # success, all done
 echo "*** done"
 rm -f $seq.full
diff --git a/tests/qemu-iotests/tests/parallels-checks.out 
b/tests/qemu-iotests/tests/parallels-checks.out
index 98a3a7f55e..74a5e29260 100644
--- a/tests/qemu-iotests/tests/parallels-checks.out
+++ b/tests/qemu-iotests/tests/parallels-checks.out
@@ -72,4 +72,22 @@ wrote 1048576/1048576 bytes at offset 0
 Repairing data_off field has incorrect value
 read 1048576/1048576 bytes at offset 0
 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4194304
+== TEST DATA_OFF THROUGH REPAIR ==
+== write pattern to first cluster ==
+wrote 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== spoil data_off field ==
+== repair image ==
+Repairing data_off field has incorrect value
+The following inconsistencies were found and repaired:
+
+0 leaked clusters
+1 corruptions
+
+Double checking the fixed image now...
+No errors were found on the image.
+== check first cluster ==
+read 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 *** done
-- 
2.34.1

[PULL v2 13/22] tests: fix broken deduplication check in parallels format test

2023-09-21 Thread Denis V. Lunev

Original check is broken as supposed reading from 2 different clusters
results in read from the same file offset twice. This is definitely
wrong.

We should be sure that
* the content of both clusters is correct after repair
* clusters are at the different offsets after repair
In order to check the latter we write some content into the first one
and validate that fact.

Signed-off-by: Denis V. Lunev 
Reviewed-by: Alexander Ivanov 
---
 tests/qemu-iotests/tests/parallels-checks | 14 ++
 tests/qemu-iotests/tests/parallels-checks.out | 16 
 2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/tests/qemu-iotests/tests/parallels-checks 
b/tests/qemu-iotests/tests/parallels-checks
index f4ca50295e..df99558486 100755
--- a/tests/qemu-iotests/tests/parallels-checks
+++ b/tests/qemu-iotests/tests/parallels-checks
@@ -117,14 +117,20 @@ echo "== check second cluster =="
 echo "== repair image =="
 _check_test_img -r all
 
+echo "== check the first cluster =="
+{ $QEMU_IO -r -c "read -P 0x11 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | 
_filter_qemu_io | _filter_testdir
+
 echo "== check second cluster =="
 { $QEMU_IO -r -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 
2>&1 | _filter_qemu_io | _filter_testdir
 
-echo "== check first cluster on host =="
-printf "content: 0x%02x\n" `peek_file_le $TEST_IMG $(($CLUSTER_SIZE)) 1`
+echo "== write another pattern to the first clusters =="
+{ $QEMU_IO -c "write -P 0x66 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | 
_filter_qemu_io | _filter_testdir
+
+echo "== check the first cluster =="
+{ $QEMU_IO -r -c "read -P 0x66 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | 
_filter_qemu_io | _filter_testdir
 
-echo "== check second cluster on host =="
-printf "content: 0x%02x\n" `peek_file_le $TEST_IMG $(($CLUSTER_SIZE)) 1`
+echo "== check the second cluster (deduplicated) =="
+{ $QEMU_IO -r -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 
2>&1 | _filter_qemu_io | _filter_testdir
 
 # Clear image
 _make_test_img $SIZE
diff --git a/tests/qemu-iotests/tests/parallels-checks.out 
b/tests/qemu-iotests/tests/parallels-checks.out
index 74a5e29260..1325d2b611 100644
--- a/tests/qemu-iotests/tests/parallels-checks.out
+++ b/tests/qemu-iotests/tests/parallels-checks.out
@@ -55,13 +55,21 @@ The following inconsistencies were found and repaired:
 
 Double checking the fixed image now...
 No errors were found on the image.
+== check the first cluster ==
+read 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 == check second cluster ==
 read 1048576/1048576 bytes at offset 1048576
 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-== check first cluster on host ==
-content: 0x11
-== check second cluster on host ==
-content: 0x11
+== write another pattern to the first clusters ==
+wrote 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== check the first cluster ==
+read 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== check the second cluster (deduplicated) ==
+read 1048576/1048576 bytes at offset 1048576
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4194304
 == TEST DATA_OFF CHECK ==
 == write pattern to first cluster ==
-- 
2.34.1

[PATCH v3 4/8] qga: Use qemu_get_runtime_dir()

2023-09-21 Thread Akihiko Odaki

qemu_get_runtime_dir() is used to construct the default state directory.

Signed-off-by: Akihiko Odaki 
---
 qga/main.c | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/qga/main.c b/qga/main.c
index 8668b9f3d3..145ee02df3 100644
--- a/qga/main.c
+++ b/qga/main.c
@@ -45,12 +45,11 @@
 #define QGA_VIRTIO_PATH_DEFAULT "/dev/virtio-ports/org.qemu.guest_agent.0"
 #endif /* CONFIG_BSD */
 #define QGA_SERIAL_PATH_DEFAULT "/dev/ttyS0"
-#define QGA_STATE_RELATIVE_DIR  "run"
 #else
 #define QGA_VIRTIO_PATH_DEFAULT ".\\Global\\org.qemu.guest_agent.0"
-#define QGA_STATE_RELATIVE_DIR  "qemu-ga"
 #define QGA_SERIAL_PATH_DEFAULT "COM1"
 #endif
+#define QGA_STATE_RELATIVE_DIR  "qemu-ga"
 #ifdef CONFIG_FSFREEZE
 #define QGA_FSFREEZE_HOOK_DEFAULT CONFIG_QEMU_CONFDIR "/fsfreeze-hook"
 #endif
@@ -129,12 +128,12 @@ static void stop_agent(GAState *s, bool requested);
 static void
 init_dfl_pathnames(void)
 {
-g_autofree char *state = qemu_get_local_state_dir();
+g_autofree char *run = qemu_get_runtime_dir();
 
 g_assert(dfl_pathnames.state_dir == NULL);
 g_assert(dfl_pathnames.pidfile == NULL);
-dfl_pathnames.state_dir = g_build_filename(state, QGA_STATE_RELATIVE_DIR, 
NULL);
-dfl_pathnames.pidfile = g_build_filename(state, QGA_STATE_RELATIVE_DIR, 
"qemu-ga.pid", NULL);
+dfl_pathnames.state_dir = g_build_filename(run, QGA_STATE_RELATIVE_DIR, 
NULL);
+dfl_pathnames.pidfile = g_build_filename(run, QGA_STATE_RELATIVE_DIR, 
"qemu-ga.pid", NULL);
 }
 
 static void quit_handler(int sig)
-- 
2.41.0

[PULL v2 17/22] parallels: naive implementation of allocate_clusters with used bitmap

2023-09-21 Thread Denis V. Lunev

The access to the bitmap is not optimized completely.

Signed-off-by: Denis V. Lunev 
Reviewed-by: Alexander Ivanov 
---
 block/parallels.c | 51 ---
 1 file changed, 39 insertions(+), 12 deletions(-)

diff --git a/block/parallels.c b/block/parallels.c
index ec35237119..ebcdff8736 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -253,7 +253,7 @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num,
 {
 int ret = 0;
 BDRVParallelsState *s = bs->opaque;
-int64_t pos, space, idx, to_allocate, i, len;
+int64_t i, pos, idx, to_allocate, first_free, host_off;
 
 pos = block_status(s, sector_num, nb_sectors, pnum);
 if (pos > 0) {
@@ -276,15 +276,13 @@ allocate_clusters(BlockDriverState *bs, int64_t 
sector_num,
  */
 assert(idx < s->bat_size && idx + to_allocate <= s->bat_size);
 
-space = to_allocate * s->tracks;
-len = bdrv_co_getlength(bs->file->bs);
-if (len < 0) {
-return len;
-}
-if (s->data_end + space > (len >> BDRV_SECTOR_BITS)) {
+first_free = find_first_zero_bit(s->used_bmap, s->used_bmap_size);
+if (first_free == s->used_bmap_size) {
 uint32_t new_usedsize;
+int64_t space = to_allocate * s->tracks + s->prealloc_size;
+
+host_off = s->data_end * BDRV_SECTOR_SIZE;
 
-space += s->prealloc_size;
 /*
  * We require the expanded size to read back as zero. If the
  * user permitted truncation, we try that; but if it fails, we
@@ -313,6 +311,32 @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num,
 s->used_bmap = bitmap_zero_extend(s->used_bmap, s->used_bmap_size,
   new_usedsize);
 s->used_bmap_size = new_usedsize;
+} else {
+int64_t next_used;
+next_used = find_next_bit(s->used_bmap, s->used_bmap_size, first_free);
+
+/* Not enough continuous clusters in the middle, adjust the size */
+if (next_used - first_free < to_allocate) {
+to_allocate = next_used - first_free;
+*pnum = (idx + to_allocate) * s->tracks - sector_num;
+}
+
+host_off = s->data_start * BDRV_SECTOR_SIZE;
+host_off += first_free * s->cluster_size;
+
+/*
+ * No need to preallocate if we are using tail area from the above
+ * branch. In the other case we are likely re-using hole. Preallocate
+ * the space if required by the prealloc_mode.
+ */
+if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE &&
+host_off < s->data_end * BDRV_SECTOR_SIZE) {
+ret = bdrv_co_pwrite_zeroes(bs->file, host_off,
+s->cluster_size * to_allocate, 0);
+if (ret < 0) {
+return ret;
+}
+}
 }
 
 /*
@@ -344,15 +368,18 @@ allocate_clusters(BlockDriverState *bs, int64_t 
sector_num,
 }
 }
 
-ret = mark_used(bs, s->used_bmap, s->used_bmap_size,
-s->data_end << BDRV_SECTOR_BITS, to_allocate);
+ret = mark_used(bs, s->used_bmap, s->used_bmap_size, host_off, 
to_allocate);
 if (ret < 0) {
 /* Image consistency is broken. Alarm! */
 return ret;
 }
 for (i = 0; i < to_allocate; i++) {
-parallels_set_bat_entry(s, idx + i, s->data_end / s->off_multiplier);
-s->data_end += s->tracks;
+parallels_set_bat_entry(s, idx + i,
+host_off / BDRV_SECTOR_SIZE / s->off_multiplier);
+host_off += s->cluster_size;
+}
+if (host_off > s->data_end * BDRV_SECTOR_SIZE) {
+s->data_end = host_off / BDRV_SECTOR_SIZE;
 }
 
 return bat2sect(s, idx) + sector_num % s->tracks;
-- 
2.34.1

[PULL v2 16/22] parallels: update used bitmap in allocate_cluster

2023-09-21 Thread Denis V. Lunev

We should extend the bitmap if the file is extended and set the bit in
the image used bitmap once the cluster is allocated. Sanity check at
that moment also looks like a good idea.

Signed-off-by: Denis V. Lunev 
Reviewed-by: Alexander Ivanov 
---
 block/parallels.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/block/parallels.c b/block/parallels.c
index 3df73aa8a0..ec35237119 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -282,6 +282,8 @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num,
 return len;
 }
 if (s->data_end + space > (len >> BDRV_SECTOR_BITS)) {
+uint32_t new_usedsize;
+
 space += s->prealloc_size;
 /*
  * We require the expanded size to read back as zero. If the
@@ -305,6 +307,12 @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num,
 if (ret < 0) {
 return ret;
 }
+
+new_usedsize = s->used_bmap_size +
+   (space << BDRV_SECTOR_BITS) / s->cluster_size;
+s->used_bmap = bitmap_zero_extend(s->used_bmap, s->used_bmap_size,
+  new_usedsize);
+s->used_bmap_size = new_usedsize;
 }
 
 /*
@@ -336,6 +344,12 @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num,
 }
 }
 
+ret = mark_used(bs, s->used_bmap, s->used_bmap_size,
+s->data_end << BDRV_SECTOR_BITS, to_allocate);
+if (ret < 0) {
+/* Image consistency is broken. Alarm! */
+return ret;
+}
 for (i = 0; i < to_allocate; i++) {
 parallels_set_bat_entry(s, idx + i, s->data_end / s->off_multiplier);
 s->data_end += s->tracks;
-- 
2.34.1

Re: [PATCH] cutils: Fix get_relocated_path on Windows

2023-09-21 Thread Akihiko Odaki


On 2022/10/31 9:59, Akihiko Odaki wrote:

get_relocated_path() did not have error handling for PathCchSkipRoot()
because a path given to get_relocated_path() was expected to be a valid
path containing a drive letter or UNC server/share path elements on
Windows, but sometimes it turned out otherwise.

The paths passed to get_relocated_path() are defined by macros generated
by Meson. Meson in turn uses a prefix given by the configure script to
generate them. For Windows, the script passes /qemu as a prefix to
Meson by default.

As documented in docs/about/build-platforms.rst, typically MSYS2 is used
for the build system, but it is also possible to use Linux as well. When
MSYS2 is used, its Bash variant recognizes /qemu as a MSYS2 path, and
converts it to a Windows path, adding the MSYS2 prefix including a drive
letter or UNC server/share path elements. Such a conversion does not
happen on a shell on Linux however, and /qemu will be passed as is in
the case.

Implement a proper error handling of PathCchSkipRoot() in
get_relocated_path() so that it can handle a path without a drive letter
or UNC server/share path elements.

Reported-by: Stefan Weil 
Signed-off-by: Akihiko Odaki 
---
  util/cutils.c | 18 +++---
  1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/util/cutils.c b/util/cutils.c
index cb43dda213..932c741d2b 100644
--- a/util/cutils.c
+++ b/util/cutils.c
@@ -1088,17 +1088,21 @@ char *get_relocated_path(const char *dir)
  g_string_append(result, "/qemu-bundle");
  if (access(result->str, R_OK) == 0) {
  #ifdef G_OS_WIN32
-size_t size = mbsrtowcs(NULL, &dir, 0, &(mbstate_t){0}) + 1;
+const char *src = dir;
+size_t size = mbsrtowcs(NULL, &src, 0, &(mbstate_t){0}) + 1;
  PWSTR wdir = g_new(WCHAR, size);
-mbsrtowcs(wdir, &dir, size, &(mbstate_t){0});
+mbsrtowcs(wdir, &src, size, &(mbstate_t){0});
  
  PCWSTR wdir_skipped_root;

-PathCchSkipRoot(wdir, &wdir_skipped_root);
+if (PathCchSkipRoot(wdir, &wdir_skipped_root)) {
+g_string_append(result, dir);
+} else {
+size = wcsrtombs(NULL, &wdir_skipped_root, 0, &(mbstate_t){0});
+char *cursor = result->str + result->len;
+g_string_set_size(result, result->len + size);
+wcsrtombs(cursor, &wdir_skipped_root, size + 1, &(mbstate_t){0});
+}
  
-size = wcsrtombs(NULL, &wdir_skipped_root, 0, &(mbstate_t){0});

-char *cursor = result->str + result->len;
-g_string_set_size(result, result->len + size);
-wcsrtombs(cursor, &wdir_skipped_root, size + 1, &(mbstate_t){0});
  g_free(wdir);
  #else
  g_string_append(result, dir);


Hi,

Can anyone have a look at this patch?

Regards,
Akihiko Odaki

Re: [PATCH v3] hw/i386/pc: improve physical address space bound check for 32-bit systems

2023-09-21 Thread David Hildenbrand


On 21.09.23 09:17, Ani Sinha wrote:

32-bit systems do not have a reserved memory for hole64 and hotplugging memory
devices are not supported on those systems. Therefore, the maximum limit of the
guest physical address in the absence of additional memory devices effectively
coincides with the end of "above 4G memory space" region. When users configure
additional memory devices, we need to properly account for the additional device
memory region so as to find the maximum value of the guest physical address
and enforce that it is within the physical address space of the processor. For
32-bit, this maximum PA will be outside the range of the processor's address
space.

With this change, for example, previously this was allowed:

$ ./qemu-system-x86_64 -cpu pentium -m size=10G

Now it is no longer allowed:

$ ./qemu-system-x86_64 -cpu pentium -m size=10G
qemu-system-x86_64: Address space limit 0x < 0x2bfff phys-bits too 
low (32)

For 32-bit, hotplugging additional memory is no longer allowed.


"32-bit without PAE/PSE36"



$ ./qemu-system-i386 -m size=1G,maxmem=3G,slots=2
qemu-system-i386: Address space limit 0x < 0x1 phys-bits too 
low (32)



We always place the device memory region above 4G. Without PAE/PSE36, 
you cannot ever possibly make use hotplugged memory, because it would 
reside > 4g.


So while the user could have started that QEMU instance, even with an OS 
that would support memory hotplug, a DIMM above 4G would not have been 
usable.


So we're now properly failing for a setup that doesn't make any sense. 
Good :)


... if someone ever cares about making that work, we would have to let 
the device memory region start below 4g (and obviously, not exceed 4g).



So while

./qemu-system-i386 -m size=1G,maxmem=3G,slots=2

fails (because pentium cannot access that memory), what should work is

./qemu-system-i386 -m size=1G,maxmem=3G,slots=2 -cpu pentium2

or

 ./qemu-system-i386 -m size=1G,maxmem=3G,slots=2 -cpu pentium,pse36=on

Because that CPU could actually address that memory somehow (PAE/PSE36).


So IMHO, we're now forbidding setups that are impossible.


The above is still allowed for older machine types in order to support
compatibility. Therefore, this still works:

$ ./qemu-system-i386 -machine pc-i440fx-8.1 -m size=1G,maxmem=3G,slots=2



Makes sense. (probably nobody cares, but better safe than sorry)


After calling CPUID with EAX=0x8001, all AMD64 compliant processors
have the longmode-capable-bit turned on in the extended feature flags (bit 29)
in EDX. The absence of CPUID longmode can be used to differentiate between
32-bit and 64-bit processors and is the recommended approach. QEMU takes this
approach elsewhere (for example, please see x86_cpu_realizefn()) and with
this change, pc_max_used_gpa() also takes the same approach to detect 32-bit
processors.

Unit tests are modified to not run those tests that use memory hotplug
on 32-bit x86 architecture.


We could use a different CPU (pentium2) to still run these tests. 
"pentium2" should work I assume?

[...]


@@ -907,12 +907,39 @@ static uint64_t pc_get_cxl_range_end(PCMachineState *pcms)
  static hwaddr pc_max_used_gpa(PCMachineState *pcms, uint64_t pci_hole64_size)
  {
  X86CPU *cpu = X86_CPU(first_cpu);
+PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
+MachineState *ms = MACHINE(pcms);
+uint64_t devmem_start = 0;
+ram_addr_t devmem_size = 0;
  
-/* 32-bit systems don't have hole64 thus return max CPU address */

-if (cpu->phys_bits <= 32) {
-return ((hwaddr)1 << cpu->phys_bits) - 1;
+/*
+ * 32-bit systems don't have hole64 but they might have a region for
+ * memory devices. Even if additional hotplugged memory devices might
+ * not be usable by most guest OSes, we need to still consider them for
+ * calculating the highest possible GPA so that we can properly report
+ * if someone configures them on a CPU that cannot possibly address them.
+ */
+if (!(cpu->env.features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM)) {
+/* 32-bit systems */
+if (pcmc->fixed_32bit_mem_addr_check) {
+if (pcmc->has_reserved_memory &&
+(ms->ram_size < ms->maxram_size)) {
+pc_get_device_memory_range(pcms, &devmem_start,
+   &devmem_size);
+if (!pcmc->broken_reserved_end) {


I think you can remove that check. "pcmc->fixed_32bit_mem_addr_check && 
pcmc->broken_reserved_end" can never hold at the same time.


broken_reserved_end is only set for QEMU <= 2.4, to work around another 
broken check. pcmc->fixed_32bit_mem_addr_check is only set for 8.2+.


Maybe consider calling "fixed_32bit_mem_addr_check" 
"pcmc->broken_32bit_max_gpa_check" and reverse the logic (treating it 
like broken_reserved_end).



--
Cheers,

David / dhildenb

Re: [PATCH 00/52] migration/rdma: Error handling fixes

2023-09-21 Thread Zhijian Li (Fujitsu)

Perter,


On 20/09/2023 00:49, Peter Xu wrote:
> On Mon, Sep 18, 2023 at 04:41:14PM +0200, Markus Armbruster wrote:
>> Oh dear, where to start.  There's so much wrong, and in pretty obvious
>> ways.  This code should never have passed review.  I'm refraining from
>> saying more; see the commit messages instead.
>>
>> Issues remaining after this series include:
>>
>> * Terrible error messages
>>
>> * Some error message cascades remain
>>
>> * There is no written contract for QEMUFileHooks, and the
>>responsibility for reporting errors is unclear
> 
> Even being removed.. because no one is really extending that..
> 
> https://lore.kernel.org/all/20230509120700.78359-1-quint...@redhat.com/#t
> 
>>
>> * There seem to be no tests whatsoever
> 
> I always see rdma as "odd fixes" stage.. for a long time.  But maybe I was
> wrong.
> 
> Copying Zhijian for status of rdma; 

Thanks,

Yeah, sometimes I will pay attention to migration, especially patches related
to RDMA and COLO. I just knew i have missed so much patches to RDMA, most of
them had got RVB, but dropped at PULL phase at last. What a pity.


Zhijian, I saw that you just replied to
> the hwpoison issue.  Maybe we should have one entry for rdma too, just like
> colo?

I'm worried that I may not have enough time, ability, or environment to 
review/test
the RDMA patches. but for this patch set, i will take a look later.


Thanks
Zhijian

> > Thanks,
>

Re: [PATCH v1 04/22] vfio/common: Introduce vfio_container_add|del_section_window()

2023-09-21 Thread Cédric Le Goater


Hello Zhenzhong,

On 8/30/23 12:37, Zhenzhong Duan wrote:

From: Eric Auger 

Introduce helper functions that isolate the code used for
VFIO_SPAPR_TCE_v2_IOMMU. This code reliance is IOMMU backend
specific whereas the rest of the code in the callers, ie.
vfio_listener_region_add|del is not.

Signed-off-by: Eric Auger 
Signed-off-by: Yi Liu 
Signed-off-by: Zhenzhong Duan 
---
  hw/vfio/common.c | 156 +++
  1 file changed, 89 insertions(+), 67 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 9ca695837f..67150e4575 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -796,6 +796,92 @@ static bool vfio_get_section_iova_range(VFIOContainer 
*container,
  return true;
  }
  
+static int vfio_container_add_section_window(VFIOContainer *container,

+ MemoryRegionSection *section,
+ Error **errp)
+{
+VFIOHostDMAWindow *hostwin;
+hwaddr pgsize = 0;
+int ret;
+
+if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) {
+return 0;
+}


This test makes me think that we should register a specific backend
for the pseries machines, implementing the add/del_window handler,
since others do not need it. Correct ?

It would avoid this ugly test. Let's keep that in mind when the
backends are introduced.


+
+/* For now intersections are not allowed, we may relax this later */
+QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
+if (ranges_overlap(hostwin->min_iova,
+   hostwin->max_iova - hostwin->min_iova + 1,
+   section->offset_within_address_space,
+   int128_get64(section->size))) {
+error_setg(errp,
+"region [0x%"PRIx64",0x%"PRIx64"] overlaps with existing"
+"host DMA window [0x%"PRIx64",0x%"PRIx64"]",
+section->offset_within_address_space,
+section->offset_within_address_space +
+int128_get64(section->size) - 1,
+hostwin->min_iova, hostwin->max_iova);
+return -EINVAL;
+}
+}
+
+ret = vfio_spapr_create_window(container, section, &pgsize);
+if (ret) {
+error_setg_errno(errp, -ret, "Failed to create SPAPR window");
+return ret;
+}
+
+vfio_host_win_add(container, section->offset_within_address_space,
+  section->offset_within_address_space +
+  int128_get64(section->size) - 1, pgsize);
+#ifdef CONFIG_KVM


the ifdef test doesn't seem useful because the compiler should compile
out the section below since, in that case, kvm_enabled() is defined as :

  #define kvm_enabled()   (0)


+if (kvm_enabled()) {
+VFIOGroup *group;
+IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr);
+struct kvm_vfio_spapr_tce param;
+struct kvm_device_attr attr = {
+.group = KVM_DEV_VFIO_GROUP,
+.attr = KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE,
+.addr = (uint64_t)(unsigned long)¶m,
+};
+
+if (!memory_region_iommu_get_attr(iommu_mr, IOMMU_ATTR_SPAPR_TCE_FD,
+  ¶m.tablefd)) {
+QLIST_FOREACH(group, &container->group_list, container_next) {
+param.groupfd = group->fd;
+if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) {
+error_report("vfio: failed to setup fd %d "
+ "for a group with fd %d: %s",
+ param.tablefd, param.groupfd,
+ strerror(errno));
+return 0;


hmm, the code bails out directly without undoing previous actions. we should
return some error at least.


+}
+trace_vfio_spapr_group_attach(param.groupfd, param.tablefd);
+}
+}
+}
+#endif
+return 0;
+}
+
+static void vfio_container_del_section_window(VFIOContainer *container,
+  MemoryRegionSection *section)
+{
+if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) {
+return;
+}
+
+vfio_spapr_remove_window(container,
+ section->offset_within_address_space);
+if (vfio_host_win_del(container,
+  section->offset_within_address_space,
+  section->offset_within_address_space +
+  int128_get64(section->size) - 1) < 0) {
+hw_error("%s: Cannot delete missing window at %"HWADDR_PRIx,
+ __func__, section->offset_within_address_space);
+}
+}
+
  static void vfio_listener_region_add(MemoryListener *listener,
   MemoryRegionSection *section)
  {
@@ -822,62 +908,8 @@ static void vfio_listener_region_add(MemoryListener

[PATCH v3] input: Allow to choose console with qemu_input_is_absolute

2023-09-21 Thread Akihiko Odaki

Although an input is routed depending on the console,
qemu_input_is_absolute() had no mechanism to specify the console.

Accept QemuConsole as an argument for qemu_input_is_absolute, and let
the display know the absolute/relative state for a particular console.

Signed-off-by: Akihiko Odaki 
---
V2 -> V3: Rebased to commit 55394dcbec8f0c29c30e792c102a0edd50a52bf4
V1 -> V2: Rebased to commit 79b677d658d3d35e1e776826ac4abb28cdce69b8

 include/ui/input.h |  2 +-
 ui/dbus-console.c  |  6 +++---
 ui/gtk.c   | 12 ++--
 ui/input.c | 29 +++--
 ui/sdl2.c  | 26 +-
 ui/spice-input.c   |  2 +-
 ui/vnc.c   |  2 +-
 ui/cocoa.m |  2 +-
 ui/trace-events|  1 -
 9 files changed, 33 insertions(+), 49 deletions(-)

diff --git a/include/ui/input.h b/include/ui/input.h
index c29a730a71..24d8e4579e 100644
--- a/include/ui/input.h
+++ b/include/ui/input.h
@@ -57,7 +57,7 @@ void qemu_input_queue_btn(QemuConsole *src, InputButton btn, 
bool down);
 void qemu_input_update_buttons(QemuConsole *src, uint32_t *button_map,
uint32_t button_old, uint32_t button_new);
 
-bool qemu_input_is_absolute(void);
+bool qemu_input_is_absolute(QemuConsole *con);
 int qemu_input_scale_axis(int value,
   int min_in, int max_in,
   int min_out, int max_out);
diff --git a/ui/dbus-console.c b/ui/dbus-console.c
index 36f7349585..49da9ccc83 100644
--- a/ui/dbus-console.c
+++ b/ui/dbus-console.c
@@ -386,7 +386,7 @@ dbus_mouse_rel_motion(DBusDisplayConsole *ddc,
 {
 trace_dbus_mouse_rel_motion(dx, dy);
 
-if (qemu_input_is_absolute()) {
+if (qemu_input_is_absolute(ddc->dcl.con)) {
 g_dbus_method_invocation_return_error(
 invocation, DBUS_DISPLAY_ERROR,
 DBUS_DISPLAY_ERROR_INVALID,
@@ -453,7 +453,7 @@ dbus_mouse_set_pos(DBusDisplayConsole *ddc,
 
 trace_dbus_mouse_set_pos(x, y);
 
-if (!qemu_input_is_absolute()) {
+if (!qemu_input_is_absolute(ddc->dcl.con)) {
 g_dbus_method_invocation_return_error(
 invocation, DBUS_DISPLAY_ERROR,
 DBUS_DISPLAY_ERROR_INVALID,
@@ -514,7 +514,7 @@ static void
 dbus_mouse_update_is_absolute(DBusDisplayConsole *ddc)
 {
 g_object_set(ddc->iface_mouse,
- "is-absolute", qemu_input_is_absolute(),
+ "is-absolute", qemu_input_is_absolute(ddc->dcl.con),
  NULL);
 }
 
diff --git a/ui/gtk.c b/ui/gtk.c
index e09f97a86b..40b8d27da5 100644
--- a/ui/gtk.c
+++ b/ui/gtk.c
@@ -204,7 +204,7 @@ static void gd_update_cursor(VirtualConsole *vc)
 }
 
 window = gtk_widget_get_window(GTK_WIDGET(vc->gfx.drawing_area));
-if (s->full_screen || qemu_input_is_absolute() || s->ptr_owner == vc) {
+if (s->full_screen || qemu_input_is_absolute(vc->gfx.dcl.con) || 
s->ptr_owner == vc) {
 gdk_window_set_cursor(window, s->null_cursor);
 } else {
 gdk_window_set_cursor(window, NULL);
@@ -453,7 +453,7 @@ static void gd_mouse_set(DisplayChangeListener *dcl,
 gint x_root, y_root;
 
 if (!gtk_widget_get_realized(vc->gfx.drawing_area) ||
-qemu_input_is_absolute()) {
+qemu_input_is_absolute(dcl->con)) {
 return;
 }
 
@@ -689,7 +689,7 @@ static void gd_mouse_mode_change(Notifier *notify, void 
*data)
 
 s = container_of(notify, GtkDisplayState, mouse_mode_notifier);
 /* release the grab at switching to absolute mode */
-if (qemu_input_is_absolute() && s->ptr_owner) {
+if (s->ptr_owner && qemu_input_is_absolute(s->ptr_owner->gfx.dcl.con)) {
 if (!s->ptr_owner->window) {
 gtk_check_menu_item_set_active(GTK_CHECK_MENU_ITEM(s->grab_item),
FALSE);
@@ -903,7 +903,7 @@ static gboolean gd_motion_event(GtkWidget *widget, 
GdkEventMotion *motion,
 x = (motion->x - mx) / vc->gfx.scale_x * ws;
 y = (motion->y - my) / vc->gfx.scale_y * ws;
 
-if (qemu_input_is_absolute()) {
+if (qemu_input_is_absolute(vc->gfx.dcl.con)) {
 if (x < 0 || y < 0 ||
 x >= surface_width(vc->gfx.ds) ||
 y >= surface_height(vc->gfx.ds)) {
@@ -923,7 +923,7 @@ static gboolean gd_motion_event(GtkWidget *widget, 
GdkEventMotion *motion,
 s->last_y = y;
 s->last_set = TRUE;
 
-if (!qemu_input_is_absolute() && s->ptr_owner == vc) {
+if (!qemu_input_is_absolute(vc->gfx.dcl.con) && s->ptr_owner == vc) {
 GdkScreen *screen = gtk_widget_get_screen(vc->gfx.drawing_area);
 GdkDisplay *dpy = gtk_widget_get_display(widget);
 GdkWindow *win = gtk_widget_get_window(widget);
@@ -965,7 +965,7 @@ static gboolean gd_button_event(GtkWidget *widget, 
GdkEventButton *button,
 
 /* implicitly grab the input at the first click in the relative mode */
 if (button->button == 1 && button->type == GDK_BUTTON_PRESS &&
-!qemu_input_is_absolute() && s->ptr_owner != vc) {
+

Re: [PATCH v2 5/7] block/vdi: Clean up local variable shadowing

2023-09-21 Thread Kevin Wolf

Am 20.09.2023 um 20:31 hat Markus Armbruster geschrieben:
> Local variables shadowing other local variables or parameters make the
> code needlessly hard to understand.  Tracked down with -Wshadow=local.
> Clean up: delete inner declarations when they are actually redundant,
> else rename variables.
> 
> Signed-off-by: Markus Armbruster 
> Reviewed-by: Stefan Hajnoczi 

Reviewed-by: Kevin Wolf

Re: [PATCH v2 6/7] block: Clean up local variable shadowing

2023-09-21 Thread Kevin Wolf

Am 20.09.2023 um 20:31 hat Markus Armbruster geschrieben:
> Local variables shadowing other local variables or parameters make the
> code needlessly hard to understand.  Tracked down with -Wshadow=local.
> Clean up: delete inner declarations when they are actually redundant,
> else rename variables.
> 
> Signed-off-by: Markus Armbruster 
> Reviewed-by: Stefan Hajnoczi 
> Acked-by: Anthony PERARD 
> Acked-by: Ilya Dryomov 

Reviewed-by: Kevin Wolf

[PULL 00/17] Trivial patches for 2023-09-21

2023-09-21 Thread Michael Tokarev

The following changes since commit 4907644841e3200aea6475c0f72d3d987e9f3d93:

  Merge tag 'mem-2023-09-19' of https://github.com/davidhildenbrand/qemu into 
staging (2023-09-19 13:22:19 -0400)

are available in the Git repository at:

  https://gitlab.com/mjt0k/qemu.git tags/pull-trivial-patches

for you to fetch changes up to fa365d05b7050163a53d16aa2d8efb96834e8725:

  docs/devel/reset.rst: Correct function names (2023-09-21 11:31:18 +0300)


trivial patches for 2023-09-21

The remainder of spelling fixes, cxl trivial fixes, and a few other
docs and comments fixes.


Akihiko Odaki (1):
  docs/devel/reset.rst: Correct function names

Dave Jiang (1):
  hw/pci-bridge/cxl_upstream: Fix bandwidth entry base unit for SSLBIS

Dmitry Frolov (1):
  hw/cxl: Fix out of bound array access

Fan Ni (1):
  hw/cxl/cxl_device: Replace magic number in CXLError definition

Jonathan Cameron (2):
  hw/mem/cxl_type3: Add missing copyright and license notice
  docs/cxl: Cleanout some more aarch64 examples.

Laszlo Ersek (1):
  hw/i386/pc: fix code comment on cumulative flash size

Li Zhijian (2):
  hw/cxl: Fix CFMW config memory leak
  docs/cxl: Change to lowercase as others

Michael Tokarev (7):
  ppc: spelling fixes
  bsd-user: spelling fixes
  i386: spelling fixes
  hw/net: spelling fixes
  hw/pci: spelling fixes
  hw/tpm: spelling fixes
  hw/other: spelling fixes

Thomas Huth (1):
  subprojects: Use the correct .git suffix in the repository URLs

 bsd-user/errno_defs.h |  2 +-
 bsd-user/freebsd/target_os_siginfo.h  |  2 +-
 bsd-user/freebsd/target_os_stack.h|  4 ++--
 bsd-user/freebsd/target_os_user.h |  2 +-
 bsd-user/qemu.h   |  2 +-
 bsd-user/signal-common.h  |  4 ++--
 bsd-user/signal.c |  6 +++---
 docs/devel/reset.rst  | 17 -
 docs/system/devices/cxl.rst   | 12 ++--
 host/include/i386/host/cpuinfo.h  |  2 +-
 host/include/ppc/host/cpuinfo.h   |  2 +-
 hw/acpi/aml-build.c   |  6 +++---
 hw/acpi/hmat.c|  2 +-
 hw/acpi/nvdimm.c  |  2 +-
 hw/block/hd-geometry.c|  4 ++--
 hw/block/pflash_cfi01.c   |  2 +-
 hw/char/cadence_uart.c|  2 +-
 hw/char/imx_serial.c  |  2 +-
 hw/char/serial.c  |  2 +-
 hw/core/generic-loader.c  |  4 ++--
 hw/core/machine.c |  2 +-
 hw/core/qdev-properties-system.c  |  2 +-
 hw/cpu/a15mpcore.c|  2 +-
 hw/cxl/cxl-events.c   |  2 +-
 hw/cxl/cxl-host.c | 12 ++--
 hw/cxl/cxl-mailbox-utils.c|  4 ++--
 hw/dma/omap_dma.c |  4 ++--
 hw/i386/acpi-build.c  |  4 ++--
 hw/i386/amd_iommu.c   |  4 ++--
 hw/i386/intel_iommu.c |  4 ++--
 hw/i386/kvm/xen_xenstore.c|  2 +-
 hw/i386/kvm/xenstore_impl.c   |  2 +-
 hw/i386/pc.c  | 16 
 hw/input/hid.c|  2 +-
 hw/input/tsc2005.c| 16 
 hw/intc/loongarch_extioi.c|  2 +-
 hw/intc/loongson_liointc.c|  2 +-
 hw/intc/omap_intc.c   |  2 +-
 hw/intc/pnv_xive.c|  2 +-
 hw/intc/spapr_xive.c  |  2 +-
 hw/intc/spapr_xive_kvm.c  |  6 +++---
 hw/intc/xive.c|  2 +-
 hw/intc/xive2.c   |  6 +++---
 hw/ipmi/ipmi_bmc_extern.c |  2 +-
 hw/mem/cxl_type3.c| 17 ++---
 hw/mem/cxl_type3_stubs.c  | 10 ++
 hw/misc/imx7_ccm.c|  2 +-
 hw/misc/mac_via.c |  2 +-
 hw/misc/stm32f2xx_syscfg.c|  4 ++--
 hw/misc/trace-events  |  2 +-
 hw/misc/zynq_slcr.c   |  2 +-
 hw/net/cadence_gem.c  | 10 +-
 hw/net/dp8393x.c  |  2 +-
 hw/net/e1000_regs.h   |  2 +-
 hw/net/e1000x_regs.h  |  2 +-
 hw/net/fsl_etsec/rings.c  |  2 +-
 hw/net/igb_regs.h |  4 ++--
 hw/net/mcf_fec.c  |  2 +-
 hw/net/rocker/rocker_fp.c |  2 +-
 hw/net/rtl8139.c  |  2 +-
 hw/net/smc91c111.c|  2 +-
 hw/net/sungem.c   |  2 +-
 hw/net/sunhme.c   |  2 +-
 hw/net/virtio-net.c   |  6 +++---
 hw/net/vmxnet3.c  |  2 +-
 hw/net/vmxnet3.h  |  2 +-
 hw/nvme/ctrl.c|  6 +++---
 hw/nvram/eeprom_at24c.c   |  2 +-
 hw/nvram/fw_

[PULL 06/17] hw/tpm: spelling fixes

2023-09-21 Thread Michael Tokarev

Signed-off-by: Michael Tokarev 
Reviewed-by: Stefan Berger 
---
 hw/tpm/tpm_tis.h| 2 +-
 hw/tpm/tpm_tis_common.c | 2 +-
 hw/tpm/tpm_tis_i2c.c| 4 ++--
 hw/tpm/tpm_tis_isa.c| 2 +-
 hw/tpm/tpm_tis_sysbus.c | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/hw/tpm/tpm_tis.h b/hw/tpm/tpm_tis.h
index 6f29a508dd..6f14896b97 100644
--- a/hw/tpm/tpm_tis.h
+++ b/hw/tpm/tpm_tis.h
@@ -19,7 +19,7 @@
  * specification.
  *
  * TPM TIS for TPM 2 implementation following TCG PC Client Platform
- * TPM Profile (PTP) Specification, Familiy 2.0, Revision 00.43
+ * TPM Profile (PTP) Specification, Family 2.0, Revision 00.43
  */
 #ifndef TPM_TPM_TIS_H
 #define TPM_TPM_TIS_H
diff --git a/hw/tpm/tpm_tis_common.c b/hw/tpm/tpm_tis_common.c
index c07c179dbc..279ce436b5 100644
--- a/hw/tpm/tpm_tis_common.c
+++ b/hw/tpm/tpm_tis_common.c
@@ -20,7 +20,7 @@
  * specification.
  *
  * TPM TIS for TPM 2 implementation following TCG PC Client Platform
- * TPM Profile (PTP) Specification, Familiy 2.0, Revision 00.43
+ * TPM Profile (PTP) Specification, Family 2.0, Revision 00.43
  */
 #include "qemu/osdep.h"
 #include "hw/irq.h"
diff --git a/hw/tpm/tpm_tis_i2c.c b/hw/tpm/tpm_tis_i2c.c
index b695fd3a46..4ecea7fa3e 100644
--- a/hw/tpm/tpm_tis_i2c.c
+++ b/hw/tpm/tpm_tis_i2c.c
@@ -13,7 +13,7 @@
  * Family 2.0, Level 00, Revision 1.00
  *
  * TPM TIS for TPM 2 implementation following TCG PC Client Platform
- * TPM Profile (PTP) Specification, Familiy 2.0, Revision 00.43
+ * TPM Profile (PTP) Specification, Family 2.0, Revision 00.43
  *
  */
 
@@ -507,7 +507,7 @@ static void tpm_tis_i2c_realizefn(DeviceState *dev, Error 
**errp)
 }
 
 /*
- * Get the backend pointer. It is not initialized propery during
+ * Get the backend pointer. It is not initialized properly during
  * device_class_set_props
  */
 s->be_driver = qemu_find_tpm_be("tpm0");
diff --git a/hw/tpm/tpm_tis_isa.c b/hw/tpm/tpm_tis_isa.c
index 91e3792248..0367401586 100644
--- a/hw/tpm/tpm_tis_isa.c
+++ b/hw/tpm/tpm_tis_isa.c
@@ -19,7 +19,7 @@
  * specification.
  *
  * TPM TIS for TPM 2 implementation following TCG PC Client Platform
- * TPM Profile (PTP) Specification, Familiy 2.0, Revision 00.43
+ * TPM Profile (PTP) Specification, Family 2.0, Revision 00.43
  */
 
 #include "qemu/osdep.h"
diff --git a/hw/tpm/tpm_tis_sysbus.c b/hw/tpm/tpm_tis_sysbus.c
index 6724b3d4f6..2fc550f119 100644
--- a/hw/tpm/tpm_tis_sysbus.c
+++ b/hw/tpm/tpm_tis_sysbus.c
@@ -19,7 +19,7 @@
  * specification.
  *
  * TPM TIS for TPM 2 implementation following TCG PC Client Platform
- * TPM Profile (PTP) Specification, Familiy 2.0, Revision 00.43
+ * TPM Profile (PTP) Specification, Family 2.0, Revision 00.43
  */
 
 #include "qemu/osdep.h"
-- 
2.39.2

[PULL 03/17] i386: spelling fixes

2023-09-21 Thread Michael Tokarev

Signed-off-by: Michael Tokarev 
Reviewed-by: Peter Maydell 
---
 host/include/i386/host/cpuinfo.h | 2 +-
 hw/i386/acpi-build.c | 4 ++--
 hw/i386/amd_iommu.c  | 4 ++--
 hw/i386/intel_iommu.c| 4 ++--
 hw/i386/kvm/xen_xenstore.c   | 2 +-
 hw/i386/kvm/xenstore_impl.c  | 2 +-
 hw/i386/pc.c | 4 ++--
 include/hw/i386/topology.h   | 2 +-
 target/i386/cpu.c| 4 ++--
 target/i386/cpu.h| 4 ++--
 target/i386/kvm/kvm.c| 4 ++--
 target/i386/kvm/xen-emu.c| 2 +-
 target/i386/machine.c| 4 ++--
 target/i386/tcg/translate.c  | 8 
 tests/tcg/i386/system/boot.S | 2 +-
 tests/tcg/i386/x86.csv   | 2 +-
 16 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/host/include/i386/host/cpuinfo.h b/host/include/i386/host/cpuinfo.h
index 7ae21568f7..b89e6d2e55 100644
--- a/host/include/i386/host/cpuinfo.h
+++ b/host/include/i386/host/cpuinfo.h
@@ -1,6 +1,6 @@
 /*
  * SPDX-License-Identifier: GPL-2.0-or-later
- * Host specific cpu indentification for x86.
+ * Host specific cpu identification for x86.
  */
 
 #ifndef HOST_CPUINFO_H
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index bb12b0ad43..4d2d40bab5 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -779,7 +779,7 @@ static Aml *initialize_route(Aml *route, const char 
*link_name,
  *
  * Returns an array of 128 routes, one for each device,
  * based on device location.
- * The main goal is to equaly distribute the interrupts
+ * The main goal is to equally distribute the interrupts
  * over the 4 existing ACPI links (works only for i440fx).
  * The hash function is  (slot + pin) & 3 -> "LNK[D|A|B|C]".
  *
@@ -2079,7 +2079,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, 
MachineState *machine)
 }
 
 /*
- * Insert DMAR scope for PCI bridges and endpoint devcie
+ * Insert DMAR scope for PCI bridges and endpoint devices
  */
 static void
 insert_scope(PCIBus *bus, PCIDevice *dev, void *opaque)
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 9c77304438..c98a3c6e11 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -259,7 +259,7 @@ static void amdvi_log_command_error(AMDVIState *s, hwaddr 
addr)
 pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
 PCI_STATUS_SIG_TARGET_ABORT);
 }
-/* log an illegal comand event
+/* log an illegal command event
  *   @addr : address of illegal command
  */
 static void amdvi_log_illegalcom_error(AMDVIState *s, uint16_t info,
@@ -767,7 +767,7 @@ static void amdvi_mmio_write(void *opaque, hwaddr addr, 
uint64_t val,
 break;
 case AMDVI_MMIO_COMMAND_BASE:
 amdvi_mmio_reg_write(s, size, val, addr);
-/* FIXME - make sure System Software has finished writing incase
+/* FIXME - make sure System Software has finished writing in case
  * it writes in chucks less than 8 bytes in a robust way.As for
  * now, this hacks works for the linux driver
  */
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index c9961ef752..c0ce896668 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -52,7 +52,7 @@
 
 /*
  * PCI bus number (or SID) is not reliable since the device is usaully
- * initalized before guest can configure the PCI bridge
+ * initialized before guest can configure the PCI bridge
  * (SECONDARY_BUS_NUMBER).
  */
 struct vtd_as_key {
@@ -1694,7 +1694,7 @@ static bool vtd_switch_address_space(VTDAddressSpace *as)
  * """
  *
  * We enable per as memory region (iommu_ir_fault) for catching
- * the tranlsation for interrupt range through PASID + PT.
+ * the translation for interrupt range through PASID + PT.
  */
 if (pt && as->pasid != PCI_NO_PASID) {
 memory_region_set_enabled(&as->iommu_ir_fault, true);
diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c
index 133d89e953..660d0b72f9 100644
--- a/hw/i386/kvm/xen_xenstore.c
+++ b/hw/i386/kvm/xen_xenstore.c
@@ -1156,7 +1156,7 @@ static unsigned int copy_to_ring(XenXenstoreState *s, 
uint8_t *ptr,
 
 /*
  * This matches the barrier in copy_to_ring() (or the guest's
- * equivalent) betweem writing the data to the ring and updating
+ * equivalent) between writing the data to the ring and updating
  * rsp_prod. It protects against the pathological case (which
  * again I think never happened except on Alpha) where our
  * subsequent writes to the ring could *cross* the read of
diff --git a/hw/i386/kvm/xenstore_impl.c b/hw/i386/kvm/xenstore_impl.c
index d9732b567e..1d134a6866 100644
--- a/hw/i386/kvm/xenstore_impl.c
+++ b/hw/i386/kvm/xenstore_impl.c
@@ -1436,7 +1436,7 @@ static void save_node(gpointer key, gpointer value, 
gpointer opaque)
 /*
  * If we already wrote this node, refer to the previous copy.
  * There's no rename/move in XenStore, so all we need to find
- * it is the tx_id of the transation

[PULL 02/17] bsd-user: spelling fixes

2023-09-21 Thread Michael Tokarev

Signed-off-by: Michael Tokarev 
Reviewed-by: Kyle Evans 
Reviewed-by: Warner Losh 
---
 bsd-user/errno_defs.h| 2 +-
 bsd-user/freebsd/target_os_siginfo.h | 2 +-
 bsd-user/freebsd/target_os_stack.h   | 4 ++--
 bsd-user/freebsd/target_os_user.h| 2 +-
 bsd-user/qemu.h  | 2 +-
 bsd-user/signal-common.h | 4 ++--
 bsd-user/signal.c| 6 +++---
 7 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/bsd-user/errno_defs.h b/bsd-user/errno_defs.h
index f3e8ac3488..abe70119d9 100644
--- a/bsd-user/errno_defs.h
+++ b/bsd-user/errno_defs.h
@@ -149,7 +149,7 @@
 #define TARGET_ELAST90  /* Must be equal largest errno 
*/
 
 /* Internal errors: */
-#define TARGET_EJUSTRETURN  254 /* Just return without 
modifing regs */
+#define TARGET_EJUSTRETURN  254 /* Just return without 
modifying regs */
 #define TARGET_ERESTART 255 /* Restart syscall */
 
 #include "special-errno.h"
diff --git a/bsd-user/freebsd/target_os_siginfo.h 
b/bsd-user/freebsd/target_os_siginfo.h
index 4573738752..6c282d8502 100644
--- a/bsd-user/freebsd/target_os_siginfo.h
+++ b/bsd-user/freebsd/target_os_siginfo.h
@@ -72,7 +72,7 @@ typedef struct target_siginfo {
 int32_t _mqd;
 } _mesgp;
 
-/* SIGPOLL -- Not really genreated in FreeBSD ??? */
+/* SIGPOLL -- Not really generated in FreeBSD ??? */
 struct {
 int _band;  /* POLL_IN, POLL_OUT, POLL_MSG */
 } _poll;
diff --git a/bsd-user/freebsd/target_os_stack.h 
b/bsd-user/freebsd/target_os_stack.h
index 0590133291..d15fc3263f 100644
--- a/bsd-user/freebsd/target_os_stack.h
+++ b/bsd-user/freebsd/target_os_stack.h
@@ -25,7 +25,7 @@
 #include "qemu/guest-random.h"
 
 /*
- * The inital FreeBSD stack is as follows:
+ * The initial FreeBSD stack is as follows:
  * (see kern/kern_exec.c exec_copyout_strings() )
  *
  *  Hi Address -> char **ps_argvstr  (struct ps_strings for ps, w, etc.)
@@ -59,7 +59,7 @@ static inline int setup_initial_stack(struct bsd_binprm *bprm,
 /* Save some space for ps_strings. */
 p -= sizeof(struct target_ps_strings);
 
-/* Add machine depedent sigcode. */
+/* Add machine dependent sigcode. */
 p -= TARGET_SZSIGCODE;
 if (setup_sigtramp(p, (unsigned)offsetof(struct target_sigframe, sf_uc),
 TARGET_FREEBSD_NR_sigreturn)) {
diff --git a/bsd-user/freebsd/target_os_user.h 
b/bsd-user/freebsd/target_os_user.h
index f036a32343..1ca7b5ab17 100644
--- a/bsd-user/freebsd/target_os_user.h
+++ b/bsd-user/freebsd/target_os_user.h
@@ -26,7 +26,7 @@
 struct target_priority {
 uint8_t pri_class;  /* Scheduling class. */
 uint8_t pri_level;  /* Normal priority level. */
-uint8_t pri_native; /* Priority before propogation. */
+uint8_t pri_native; /* Priority before propagation. */
 uint8_t pri_user;   /* User priority based on p_cpu and p_nice. */
 };
 
diff --git a/bsd-user/qemu.h b/bsd-user/qemu.h
index d3158bc2ed..d9507137cc 100644
--- a/bsd-user/qemu.h
+++ b/bsd-user/qemu.h
@@ -116,7 +116,7 @@ extern const char *qemu_uname_release;
 /*
  * TARGET_ARG_MAX defines the number of bytes allocated for arguments
  * and envelope for the new program. 256k should suffice for a reasonable
- * maxiumum env+arg in 32-bit environments, bump it up to 512k for !ILP32
+ * maximum env+arg in 32-bit environments, bump it up to 512k for !ILP32
  * platforms.
  */
 #if TARGET_ABI_BITS > 32
diff --git a/bsd-user/signal-common.h b/bsd-user/signal-common.h
index 6f90345bb2..c044e81165 100644
--- a/bsd-user/signal-common.h
+++ b/bsd-user/signal-common.h
@@ -49,11 +49,11 @@ void target_to_host_sigset(sigset_t *d, const 
target_sigset_t *s);
  * union in target_siginfo is valid. This only applies between
  * host_to_target_siginfo_noswap() and tswap_siginfo(); it does not appear
  * either within host siginfo_t or in target_siginfo structures which we get
- * from the guest userspace program. Linux kenrels use this internally, but BSD
+ * from the guest userspace program. Linux kernels use this internally, but BSD
  * kernels don't do this, but its a useful abstraction.
  *
  * The linux-user version of this uses the top 16 bits, but FreeBSD's SI_USER
- * and other signal indepenent SI_ codes have bit 16 set, so we only use the 
top
+ * and other signal independent SI_ codes have bit 16 set, so we only use the 
top
  * byte instead.
  *
  * For FreeBSD, we have si_pid, si_uid, si_status, and si_addr always. Linux 
and
diff --git a/bsd-user/signal.c b/bsd-user/signal.c
index 4db85a3485..b6beab659e 100644
--- a/bsd-user/signal.c
+++ b/bsd-user/signal.c
@@ -44,7 +44,7 @@ static inline int sas_ss_flags(TaskState *ts, unsigned long 
sp)
 }
 
 /*
- * The BSD ABIs use the same singal numbers across all the CPU architectures, 
so
+ * The BSD ABIs use the same signal numbers across all the CPU architectures, 
so
  * (unlik

[PULL 04/17] hw/net: spelling fixes

2023-09-21 Thread Michael Tokarev

Signed-off-by: Michael Tokarev 
Reviewed-by: Peter Maydell 
---
 hw/net/cadence_gem.c  | 10 +-
 hw/net/dp8393x.c  |  2 +-
 hw/net/e1000_regs.h   |  2 +-
 hw/net/e1000x_regs.h  |  2 +-
 hw/net/fsl_etsec/rings.c  |  2 +-
 hw/net/igb_regs.h |  4 ++--
 hw/net/mcf_fec.c  |  2 +-
 hw/net/rocker/rocker_fp.c |  2 +-
 hw/net/rtl8139.c  |  2 +-
 hw/net/smc91c111.c|  2 +-
 hw/net/sungem.c   |  2 +-
 hw/net/sunhme.c   |  2 +-
 hw/net/virtio-net.c   |  6 +++---
 hw/net/vmxnet3.c  |  2 +-
 hw/net/vmxnet3.h  |  2 +-
 15 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c
index 42ea2411a2..f445d8bb5e 100644
--- a/hw/net/cadence_gem.c
+++ b/hw/net/cadence_gem.c
@@ -81,8 +81,8 @@
 #define GEM_IPGSTRETCH(0x00BC / 4) /* IPG Stretch reg */
 #define GEM_SVLAN (0x00C0 / 4) /* Stacked VLAN reg */
 #define GEM_MODID (0x00FC / 4) /* Module ID reg */
-#define GEM_OCTTXLO   (0x0100 / 4) /* Octects transmitted Low reg */
-#define GEM_OCTTXHI   (0x0104 / 4) /* Octects transmitted High reg */
+#define GEM_OCTTXLO   (0x0100 / 4) /* Octets transmitted Low reg */
+#define GEM_OCTTXHI   (0x0104 / 4) /* Octets transmitted High reg */
 #define GEM_TXCNT (0x0108 / 4) /* Error-free Frames transmitted */
 #define GEM_TXBCNT(0x010C / 4) /* Error-free Broadcast Frames */
 #define GEM_TXMCNT(0x0110 / 4) /* Error-free Multicast Frame */
@@ -101,8 +101,8 @@
 #define GEM_LATECOLLCNT   (0x0144 / 4) /* Late Collision Frames */
 #define GEM_DEFERTXCNT(0x0148 / 4) /* Deferred Transmission Frames */
 #define GEM_CSENSECNT (0x014C / 4) /* Carrier Sense Error Counter */
-#define GEM_OCTRXLO   (0x0150 / 4) /* Octects Received register Low */
-#define GEM_OCTRXHI   (0x0154 / 4) /* Octects Received register High */
+#define GEM_OCTRXLO   (0x0150 / 4) /* Octets Received register Low */
+#define GEM_OCTRXHI   (0x0154 / 4) /* Octets Received register High */
 #define GEM_RXCNT (0x0158 / 4) /* Error-free Frames Received */
 #define GEM_RXBROADCNT(0x015C / 4) /* Error-free Broadcast Frames RX */
 #define GEM_RXMULTICNT(0x0160 / 4) /* Error-free Multicast Frames RX */
@@ -954,7 +954,7 @@ static ssize_t gem_receive(NetClientState *nc, const 
uint8_t *buf, size_t size)
 /* Is this destination MAC address "for us" ? */
 maf = gem_mac_address_filter(s, buf);
 if (maf == GEM_RX_REJECT) {
-return size;  /* no, drop siliently b/c it's not an error */
+return size;  /* no, drop silently b/c it's not an error */
 }
 
 /* Discard packets with receive length error enabled ? */
diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
index a596f7fbc6..c6f5fb7dce 100644
--- a/hw/net/dp8393x.c
+++ b/hw/net/dp8393x.c
@@ -551,7 +551,7 @@ static uint64_t dp8393x_read(void *opaque, hwaddr addr, 
unsigned int size)
 val = s->cam[s->regs[SONIC_CEP] & 0xf][SONIC_CAP0 - reg];
 }
 break;
-/* All other registers have no special contraints */
+/* All other registers have no special constraints */
 default:
 val = s->regs[reg];
 }
diff --git a/hw/net/e1000_regs.h b/hw/net/e1000_regs.h
index 8a4ce82034..39f4882510 100644
--- a/hw/net/e1000_regs.h
+++ b/hw/net/e1000_regs.h
@@ -130,7 +130,7 @@
 
 #define E1000_GCR2  0x05B64 /* 3GIO Control Register 2 */
 #define E1000_FFLT_DBG  0x05F04 /* Debug Register */
-#define E1000_HICR  0x08F00 /* Host Inteface Control */
+#define E1000_HICR  0x08F00 /* Host Interface Control */
 
 #define E1000_RXMTRL 0x0B634 /* Time sync Rx EtherType and Msg Type - RW */
 #define E1000_RXUDP  0x0B638 /* Time Sync Rx UDP Port - RW */
diff --git a/hw/net/e1000x_regs.h b/hw/net/e1000x_regs.h
index 13760c66d3..cd896fc0ca 100644
--- a/hw/net/e1000x_regs.h
+++ b/hw/net/e1000x_regs.h
@@ -839,7 +839,7 @@ union e1000_rx_desc_packet_split {
 #define E1000_RXD_STAT_EOP  0x02/* End of Packet */
 #define E1000_RXD_STAT_IXSM 0x04/* Ignore checksum */
 #define E1000_RXD_STAT_VP   0x08/* IEEE VLAN Packet */
-#define E1000_RXD_STAT_UDPCS0x10/* UDP xsum caculated */
+#define E1000_RXD_STAT_UDPCS0x10/* UDP xsum calculated */
 #define E1000_RXD_STAT_TCPCS0x20/* TCP xsum calculated */
 #define E1000_RXD_STAT_IPCS 0x40/* IP xsum calculated */
 #define E1000_RXD_STAT_PIF  0x80/* passed in-exact filter */
diff --git a/hw/net/fsl_etsec/rings.c b/hw/net/fsl_etsec/rings.c
index 2f2f359f7a..42216de6c9 100644
--- a/hw/net/fsl_etsec/rings.c
+++ b/hw/net/fsl_etsec/rings.c
@@ -365,7 +365,7 @@ void etsec_walk_tx_ring(eTSEC *etsec, int ring_nbr)
 } while (TRUE);
 
 /* Save the Buffer Descriptor Pointers to last bd that was not
- * succesfully closed */
+ * successfully closed */
 etsec->regs[TBPTR0

[PULL 01/17] ppc: spelling fixes

2023-09-21 Thread Michael Tokarev

Signed-off-by: Michael Tokarev 
Reviewed-by: Cédric Le Goater 
---
 host/include/ppc/host/cpuinfo.h |  2 +-
 hw/ppc/ppc.c|  2 +-
 hw/ppc/prep_systemio.c  |  2 +-
 hw/ppc/spapr.c  |  8 
 hw/ppc/spapr_hcall.c|  2 +-
 hw/ppc/spapr_nvdimm.c   |  4 ++--
 hw/ppc/spapr_pci_vfio.c |  2 +-
 include/hw/ppc/openpic.h|  2 +-
 include/hw/ppc/spapr.h  |  2 +-
 target/ppc/cpu-models.h |  4 ++--
 target/ppc/cpu.h|  2 +-
 target/ppc/cpu_init.c   |  4 ++--
 target/ppc/excp_helper.c| 14 +++---
 target/ppc/power8-pmu-regs.c.inc|  4 ++--
 target/ppc/translate/vmx-impl.c.inc |  6 +++---
 15 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/host/include/ppc/host/cpuinfo.h b/host/include/ppc/host/cpuinfo.h
index 29ee7f9ef8..38b8eabe2a 100644
--- a/host/include/ppc/host/cpuinfo.h
+++ b/host/include/ppc/host/cpuinfo.h
@@ -1,6 +1,6 @@
 /*
  * SPDX-License-Identifier: GPL-2.0-or-later
- * Host specific cpu indentification for ppc.
+ * Host specific cpu identification for ppc.
  */
 
 #ifndef HOST_CPUINFO_H
diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
index aeb116d919..be167710a3 100644
--- a/hw/ppc/ppc.c
+++ b/hw/ppc/ppc.c
@@ -738,7 +738,7 @@ static target_ulong _cpu_ppc_load_decr(CPUPPCState *env, 
int64_t now)
 decr = __cpu_ppc_load_decr(env, now, tb_env->decr_next);
 
 /*
- * If large decrementer is enabled then the decrementer is signed extened
+ * If large decrementer is enabled then the decrementer is signed extended
  * to 64 bits, otherwise it is a 32 bit value.
  */
 if (env->spr[SPR_LPCR] & LPCR_LD) {
diff --git a/hw/ppc/prep_systemio.c b/hw/ppc/prep_systemio.c
index 5a56f155f5..c96cefb13d 100644
--- a/hw/ppc/prep_systemio.c
+++ b/hw/ppc/prep_systemio.c
@@ -39,7 +39,7 @@
 #define TYPE_PREP_SYSTEMIO "prep-systemio"
 OBJECT_DECLARE_SIMPLE_TYPE(PrepSystemIoState, PREP_SYSTEMIO)
 
-/* Bit as defined in PowerPC Reference Plaform v1.1, sect. 6.1.5, p. 132 */
+/* Bit as defined in PowerPC Reference Platform v1.1, sect. 6.1.5, p. 132 */
 #define PREP_BIT(n) (1 << (7 - (n)))
 
 struct PrepSystemIoState {
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index de3c616b46..1f1aa2a6d4 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2573,7 +2573,7 @@ static void spapr_set_vsmt_mode(SpaprMachineState *spapr, 
Error **errp)
 return;
 }
 
-/* Detemine the VSMT mode to use: */
+/* Determine the VSMT mode to use: */
 if (vsmt_user) {
 if (spapr->vsmt < smp_threads) {
 error_setg(errp, "Cannot support VSMT mode %d"
@@ -3107,7 +3107,7 @@ static int spapr_kvm_type(MachineState *machine, const 
char *vm_type)
 {
 /*
  * The use of g_ascii_strcasecmp() for 'hv' and 'pr' is to
- * accomodate the 'HV' and 'PV' formats that exists in the
+ * accommodate the 'HV' and 'PV' formats that exists in the
  * wild. The 'auto' mode is being introduced already as
  * lower-case, thus we don't need to bother checking for
  * "AUTO".
@@ -4340,7 +4340,7 @@ spapr_cpu_index_to_props(MachineState *machine, unsigned 
cpu_index)
 CPUArchId *core_slot;
 MachineClass *mc = MACHINE_GET_CLASS(machine);
 
-/* make sure possible_cpu are intialized */
+/* make sure possible_cpu are initialized */
 mc->possible_cpu_arch_ids(machine);
 /* get CPU core slot containing thread that matches cpu_index */
 core_slot = spapr_find_cpu_slot(machine, cpu_index, NULL);
@@ -5034,7 +5034,7 @@ static void spapr_machine_2_12_class_options(MachineClass 
*mc)
 
 /* We depend on kvm_enabled() to choose a default value for the
  * hpt-max-page-size capability. Of course we can't do it here
- * because this is too early and the HW accelerator isn't initialzed
+ * because this is too early and the HW accelerator isn't initialized
  * yet. Postpone this to machine init (see default_caps_with_cpu()).
  */
 smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 0;
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index b7dc388f2f..522a2396c7 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -1615,7 +1615,7 @@ static void hypercall_register_types(void)
 spapr_register_hypercall(H_GET_CPU_CHARACTERISTICS,
  h_get_cpu_characteristics);
 
-/* "debugger" hcalls (also used by SLOF). Note: We do -not- differenciate
+/* "debugger" hcalls (also used by SLOF). Note: We do -not- differentiate
  * here between the "CI" and the "CACHE" variants, they will use whatever
  * mapping attributes qemu is using. When using KVM, the kernel will
  * enforce the attributes more strongly
diff --git a/hw/ppc/spapr_nvdimm.c b/hw/ppc/spapr_nvdimm.c
index 60d6d0acc0..b2f009c816 100644
--- a/hw/ppc/spapr_nvdimm.c
+++ b/hw/ppc/spapr_nvdimm.c
@@ -378,7 +378,7 @@ static target_ulong h_scm_bind_mem(PowerP

[PULL 07/17] hw/other: spelling fixes

2023-09-21 Thread Michael Tokarev

Signed-off-by: Michael Tokarev 
Reviewed-by: Peter Maydell 
---
 hw/acpi/aml-build.c  |  6 +++---
 hw/acpi/hmat.c   |  2 +-
 hw/acpi/nvdimm.c |  2 +-
 hw/block/hd-geometry.c   |  4 ++--
 hw/block/pflash_cfi01.c  |  2 +-
 hw/char/cadence_uart.c   |  2 +-
 hw/char/imx_serial.c |  2 +-
 hw/char/serial.c |  2 +-
 hw/core/generic-loader.c |  4 ++--
 hw/core/machine.c|  2 +-
 hw/core/qdev-properties-system.c |  2 +-
 hw/cpu/a15mpcore.c   |  2 +-
 hw/cxl/cxl-events.c  |  2 +-
 hw/cxl/cxl-mailbox-utils.c   |  4 ++--
 hw/dma/omap_dma.c|  4 ++--
 hw/input/hid.c   |  2 +-
 hw/input/tsc2005.c   | 16 
 hw/intc/loongarch_extioi.c   |  2 +-
 hw/intc/loongson_liointc.c   |  2 +-
 hw/intc/omap_intc.c  |  2 +-
 hw/intc/pnv_xive.c   |  2 +-
 hw/intc/spapr_xive.c |  2 +-
 hw/intc/spapr_xive_kvm.c |  6 +++---
 hw/intc/xive.c   |  2 +-
 hw/intc/xive2.c  |  6 +++---
 hw/ipmi/ipmi_bmc_extern.c|  2 +-
 hw/mem/cxl_type3.c   |  6 +++---
 hw/misc/imx7_ccm.c   |  2 +-
 hw/misc/mac_via.c|  2 +-
 hw/misc/stm32f2xx_syscfg.c   |  4 ++--
 hw/misc/trace-events |  2 +-
 hw/misc/zynq_slcr.c  |  2 +-
 hw/nvme/ctrl.c   |  6 +++---
 hw/nvram/eeprom_at24c.c  |  2 +-
 hw/nvram/fw_cfg.c|  2 +-
 hw/rtc/exynos4210_rtc.c  |  2 +-
 hw/rx/rx62n.c|  2 +-
 hw/scsi/lsi53c895a.c |  2 +-
 hw/scsi/mfi.h|  2 +-
 hw/sh4/sh7750_regs.h | 26 +-
 hw/smbios/smbios.c   |  2 +-
 hw/ssi/xilinx_spips.c|  6 +++---
 hw/ssi/xlnx-versal-ospi.c|  2 +-
 hw/timer/etraxfs_timer.c |  2 +-
 hw/timer/renesas_tmr.c   |  2 +-
 hw/virtio/virtio-crypto.c|  4 ++--
 hw/virtio/virtio-mem.c   |  2 +-
 hw/virtio/virtio.c   |  2 +-
 48 files changed, 85 insertions(+), 85 deletions(-)

diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index ea331a20d1..af66bde0f5 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -312,7 +312,7 @@ build_prepend_package_length(GArray *package, unsigned 
length, bool incl_self)
 /*
  * PkgLength is the length of the inclusive length of the data
  * and PkgLength's length itself when used for terms with
- * explitit length.
+ * explicit length.
  */
 length += length_bytes;
 }
@@ -680,7 +680,7 @@ Aml *aml_store(Aml *val, Aml *target)
  *   "Op Operand Operand Target"
  * pattern.
  *
- * Returns: The newly allocated and composed according to patter Aml object.
+ * Returns: The newly allocated and composed according to pattern Aml object.
  */
 static Aml *
 build_opcode_2arg_dst(uint8_t op, Aml *arg1, Aml *arg2, Aml *dst)
@@ -2159,7 +2159,7 @@ void build_fadt(GArray *tbl, BIOSLinker *linker, const 
AcpiFadtData *f,
 /* FADT Minor Version */
 build_append_int_noprefix(tbl, f->minor_ver, 1);
 } else {
-build_append_int_noprefix(tbl, 0, 3); /* Reserved upto ACPI 5.0 */
+build_append_int_noprefix(tbl, 0, 3); /* Reserved up to ACPI 5.0 */
 }
 build_append_int_noprefix(tbl, 0, 8); /* X_FIRMWARE_CTRL */
 
diff --git a/hw/acpi/hmat.c b/hw/acpi/hmat.c
index 3a6d51282a..2d5e199ba9 100644
--- a/hw/acpi/hmat.c
+++ b/hw/acpi/hmat.c
@@ -82,7 +82,7 @@ static void build_hmat_lb(GArray *table_data, HMAT_LB_Info 
*hmat_lb,
 uint32_t base;
 /* Length in bytes for entire structure */
 uint32_t lb_length
-= 32 /* Table length upto and including Entry Base Unit */
+= 32 /* Table length up to and including Entry Base Unit */
 + 4 * num_initiator /* Initiator Proximity Domain List */
 + 4 * num_target /* Target Proximity Domain List */
 + 2 * num_initiator * num_target; /* Latency or Bandwidth Entries */
diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
index 3cbd41629d..9ba90806f2 100644
--- a/hw/acpi/nvdimm.c
+++ b/hw/acpi/nvdimm.c
@@ -1102,7 +1102,7 @@ static void nvdimm_build_common_dsm(Aml *dev,
  * be treated as an integer. Moreover, the integer size depends on
  * DSDT tables revision number. If revision number is < 2, integer
  * size is 32 bits, otherwise it is 64 bits.
- * Because of this CreateField() canot be used if RLEN < Integer Size.
+ * Because of this CreateField() cannot be used if RLEN < Integer Size.
  *
  * Also please note that APCI ASL operator SizeOf() doesn't support
  * Integer and there isn't any other way to figure out the Integer
diff --git a/hw/block/hd-geometry.c b/hw/block/hd-geometry.c
index dae13ab14d..2b0af4430f 100644
--- a/hw/block/hd-geometry.c
+++ b/hw/block/hd-geometry.c
@@ -50

[PULL 11/17] hw/pci-bridge/cxl_upstream: Fix bandwidth entry base unit for SSLBIS

2023-09-21 Thread Michael Tokarev

From: Dave Jiang 

According to ACPI spec 6.5 5.2.28.4 System Locality Latency and Bandwidth
Information Structure, if the "Entry Base Unit" is 1024 for BW and the
matrix entry has the value of 100, the BW is 100 GB/s. So the
entry_base_unit should be changed from 1000 to 1024 given the comment notes
it's 16GB/s for .latency_bandwidth.

Fixes: 882877fc359d ("hw/pci-bridge/cxl-upstream: Add a CDAT table access DOE")
Signed-off-by: Dave Jiang 
Signed-off-by: Jonathan Cameron 
Reviewed-by: Fan Ni 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Michael Tokarev 
---
 hw/pci-bridge/cxl_upstream.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/pci-bridge/cxl_upstream.c b/hw/pci-bridge/cxl_upstream.c
index 9159f48a8c..2b9cf0cc97 100644
--- a/hw/pci-bridge/cxl_upstream.c
+++ b/hw/pci-bridge/cxl_upstream.c
@@ -262,7 +262,7 @@ static int build_cdat_table(CDATSubHeader ***cdat_table, 
void *priv)
 .length = sslbis_size,
 },
 .data_type = HMATLB_DATA_TYPE_ACCESS_BANDWIDTH,
-.entry_base_unit = 1000,
+.entry_base_unit = 1024,
 },
 };
 
-- 
2.39.2

[PULL 15/17] hw/mem/cxl_type3: Add missing copyright and license notice

2023-09-21 Thread Michael Tokarev

From: Jonathan Cameron 

This has been missing from the start. Assume it should match
with cxl/cxl-component-utils.c as both were part of early
postings from Ben.

Reported-by: Philippe Mathieu-Daudé 
Acked-by: Dave Jiang 
Acked-by: Ira Weiny 
Reviewed-by: Fan Ni 
Signed-off-by: Jonathan Cameron 
Signed-off-by: Michael Tokarev 
---
 hw/mem/cxl_type3.c   | 11 +++
 hw/mem/cxl_type3_stubs.c | 10 ++
 2 files changed, 21 insertions(+)

diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index a98a157065..4cdcb3f7e7 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -1,3 +1,14 @@
+/*
+ * CXL Type 3 (memory expander) device
+ *
+ * Copyright(C) 2020 Intel Corporation.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See the
+ * COPYING file in the top-level directory.
+ *
+ * SPDX-License-Identifier: GPL-v2-only
+ */
+
 #include "qemu/osdep.h"
 #include "qemu/units.h"
 #include "qemu/error-report.h"
diff --git a/hw/mem/cxl_type3_stubs.c b/hw/mem/cxl_type3_stubs.c
index f3e4a9fa72..8ba5d3d1f7 100644
--- a/hw/mem/cxl_type3_stubs.c
+++ b/hw/mem/cxl_type3_stubs.c
@@ -1,3 +1,13 @@
+/*
+ * CXL Type 3 (memory expander) device QMP stubs
+ *
+ * Copyright(C) 2020 Intel Corporation.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See the
+ * COPYING file in the top-level directory.
+ *
+ * SPDX-License-Identifier: GPL-v2-only
+ */
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
-- 
2.39.2

[PULL 08/17] subprojects: Use the correct .git suffix in the repository URLs

2023-09-21 Thread Michael Tokarev

From: Thomas Huth 

This avoids the warnings à la:
"warning: redirecting to https://gitlab.com/qemu-project/xyz.git/";

Signed-off-by: Thomas Huth 
Reviewed-by: Michael Tokarev 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Michael Tokarev 
---
 subprojects/berkeley-softfloat-3.wrap | 2 +-
 subprojects/berkeley-testfloat-3.wrap | 2 +-
 subprojects/slirp.wrap| 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/subprojects/berkeley-softfloat-3.wrap 
b/subprojects/berkeley-softfloat-3.wrap
index a8fd87740b..c3e356d42f 100644
--- a/subprojects/berkeley-softfloat-3.wrap
+++ b/subprojects/berkeley-softfloat-3.wrap
@@ -1,5 +1,5 @@
 [wrap-git]
-url = https://gitlab.com/qemu-project/berkeley-softfloat-3
+url = https://gitlab.com/qemu-project/berkeley-softfloat-3.git
 revision = b64af41c3276f97f0e181920400ee056b9c88037
 patch_directory = berkeley-softfloat-3
 depth = 1
diff --git a/subprojects/berkeley-testfloat-3.wrap 
b/subprojects/berkeley-testfloat-3.wrap
index c86dc078a8..b8b12e7629 100644
--- a/subprojects/berkeley-testfloat-3.wrap
+++ b/subprojects/berkeley-testfloat-3.wrap
@@ -1,5 +1,5 @@
 [wrap-git]
-url = https://gitlab.com/qemu-project/berkeley-testfloat-3
+url = https://gitlab.com/qemu-project/berkeley-testfloat-3.git
 revision = e7af9751d9f9fd3b47911f51a5cfd08af256a9ab
 patch_directory = berkeley-testfloat-3
 depth = 1
diff --git a/subprojects/slirp.wrap b/subprojects/slirp.wrap
index 08291a4cf9..a93b048962 100644
--- a/subprojects/slirp.wrap
+++ b/subprojects/slirp.wrap
@@ -1,5 +1,5 @@
 [wrap-git]
-url = https://gitlab.freedesktop.org/slirp/libslirp
+url = https://gitlab.freedesktop.org/slirp/libslirp.git
 revision = 26be815b86e8d49add8c9a8b320239b9594ff03d
 
 [provide]
-- 
2.39.2

[PULL 05/17] hw/pci: spelling fixes

2023-09-21 Thread Michael Tokarev

Signed-off-by: Michael Tokarev 
Reviewed-by: Peter Maydell 
---
 hw/pci-bridge/cxl_downstream.c  | 2 +-
 hw/pci-bridge/pci_expander_bridge.c | 2 +-
 hw/pci-host/bonito.c| 2 +-
 hw/pci-host/designware.c| 4 ++--
 hw/pci-host/dino.c  | 2 +-
 hw/pci-host/gpex-acpi.c | 2 +-
 hw/pci-host/gt64120.c   | 4 ++--
 hw/pci-host/pnv_phb.c   | 2 +-
 hw/pci-host/pnv_phb3.c  | 2 +-
 hw/pci-host/pnv_phb3_msi.c  | 2 +-
 hw/pci-host/pnv_phb4.c  | 6 +++---
 hw/pci/pcie_aer.c   | 2 +-
 hw/pci/shpc.c   | 2 +-
 13 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/hw/pci-bridge/cxl_downstream.c b/hw/pci-bridge/cxl_downstream.c
index 54f507318f..5a2b749c8e 100644
--- a/hw/pci-bridge/cxl_downstream.c
+++ b/hw/pci-bridge/cxl_downstream.c
@@ -42,7 +42,7 @@ static void latch_registers(CXLDownstreamPort *dsp)
CXL2_DOWNSTREAM_PORT);
 }
 
-/* TODO: Look at sharing this code acorss all CXL port types */
+/* TODO: Look at sharing this code across all CXL port types */
 static void cxl_dsp_dvsec_write_config(PCIDevice *dev, uint32_t addr,
   uint32_t val, int len)
 {
diff --git a/hw/pci-bridge/pci_expander_bridge.c 
b/hw/pci-bridge/pci_expander_bridge.c
index 613857b601..535889f7c2 100644
--- a/hw/pci-bridge/pci_expander_bridge.c
+++ b/hw/pci-bridge/pci_expander_bridge.c
@@ -263,7 +263,7 @@ static int pxb_map_irq_fn(PCIDevice *pci_dev, int pin)
 
 /*
  * First carry out normal swizzle to handle
- * multple root ports on a pxb instance.
+ * multiple root ports on a pxb instance.
  */
 pin = pci_swizzle_map_irq_fn(pci_dev, pin);
 
diff --git a/hw/pci-host/bonito.c b/hw/pci-host/bonito.c
index 4701481b9b..ee6cb85e97 100644
--- a/hw/pci-host/bonito.c
+++ b/hw/pci-host/bonito.c
@@ -62,7 +62,7 @@
 #define DPRINTF(fmt, ...)
 #endif
 
-/* from linux soure code. include/asm-mips/mips-boards/bonito64.h*/
+/* from linux source code. include/asm-mips/mips-boards/bonito64.h*/
 #define BONITO_BOOT_BASE0x1fc0
 #define BONITO_BOOT_SIZE0x0010
 #define BONITO_BOOT_TOP (BONITO_BOOT_BASE + BONITO_BOOT_SIZE - 1)
diff --git a/hw/pci-host/designware.c b/hw/pci-host/designware.c
index 388d252ee2..6f5442f108 100644
--- a/hw/pci-host/designware.c
+++ b/hw/pci-host/designware.c
@@ -488,7 +488,7 @@ static void designware_pcie_root_realize(PCIDevice *dev, 
Error **errp)
 
 /*
  * If no inbound iATU windows are configured, HW defaults to
- * letting inbound TLPs to pass in. We emulate that by exlicitly
+ * letting inbound TLPs to pass in. We emulate that by explicitly
  * configuring first inbound window to cover all of target's
  * address space.
  *
@@ -503,7 +503,7 @@ static void designware_pcie_root_realize(PCIDevice *dev, 
Error **errp)
   &designware_pci_host_msi_ops,
   root, "pcie-msi", 0x4);
 /*
- * We initially place MSI interrupt I/O region a adress 0 and
+ * We initially place MSI interrupt I/O region at address 0 and
  * disable it. It'll be later moved to correct offset and enabled
  * in designware_pcie_root_update_msi_mapping() as a part of
  * initialization done by guest OS
diff --git a/hw/pci-host/dino.c b/hw/pci-host/dino.c
index e8eaebca54..82503229fa 100644
--- a/hw/pci-host/dino.c
+++ b/hw/pci-host/dino.c
@@ -1,5 +1,5 @@
 /*
- * HP-PARISC Dino PCI chipset emulation, as in B160L and similiar machines
+ * HP-PARISC Dino PCI chipset emulation, as in B160L and similar machines
  *
  * (C) 2017-2019 by Helge Deller 
  *
diff --git a/hw/pci-host/gpex-acpi.c b/hw/pci-host/gpex-acpi.c
index 7c7316bc96..1092dc3b70 100644
--- a/hw/pci-host/gpex-acpi.c
+++ b/hw/pci-host/gpex-acpi.c
@@ -177,7 +177,7 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg)
 acpi_dsdt_add_pci_route_table(dev, cfg->irq);
 
 /*
- * Resources defined for PXBs are composed by the folling parts:
+ * Resources defined for PXBs are composed of the following parts:
  * 1. The resources the pci-brige/pcie-root-port need.
  * 2. The resources the devices behind pxb need.
  */
diff --git a/hw/pci-host/gt64120.c b/hw/pci-host/gt64120.c
index 82c15edb46..143bf053d7 100644
--- a/hw/pci-host/gt64120.c
+++ b/hw/pci-host/gt64120.c
@@ -331,9 +331,9 @@ static void gt64120_update_pci_cfgdata_mapping(GT64120State 
*s)
 /*
  * The setting of the MByteSwap bit and MWordSwap bit in the PCI Internal
  * Command Register determines how data transactions from the CPU to/from
- * PCI are handled along with the setting of the Endianess bit in the CPU
+ * PCI are handled along with the setting of the Endianness bit in the CPU
  * Configuration Register. See:
- * - Table 16: 32-bit PCI Transaction Endian

[PULL 10/17] hw/cxl: Fix CFMW config memory leak

2023-09-21 Thread Michael Tokarev

From: Li Zhijian 

Allocate targets and targets[n] resources when all sanity checks are
passed to avoid memory leaks.

Cc: qemu-sta...@nongnu.org
Suggested-by: Philippe Mathieu-Daudé 
Signed-off-by: Li Zhijian 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Jonathan Cameron 
Reviewed-by: Fan Ni 
Signed-off-by: Michael Tokarev 
---
 hw/cxl/cxl-host.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/hw/cxl/cxl-host.c b/hw/cxl/cxl-host.c
index 034c7805b3..f0920da956 100644
--- a/hw/cxl/cxl-host.c
+++ b/hw/cxl/cxl-host.c
@@ -39,12 +39,6 @@ static void cxl_fixed_memory_window_config(CXLState 
*cxl_state,
 return;
 }
 
-fw->targets = g_malloc0_n(fw->num_targets, sizeof(*fw->targets));
-for (i = 0, target = object->targets; target; i++, target = target->next) {
-/* This link cannot be resolved yet, so stash the name for now */
-fw->targets[i] = g_strdup(target->value);
-}
-
 if (object->size % (256 * MiB)) {
 error_setg(errp,
"Size of a CXL fixed memory window must be a multiple of 
256MiB");
@@ -64,6 +58,12 @@ static void cxl_fixed_memory_window_config(CXLState 
*cxl_state,
 fw->enc_int_gran = 0;
 }
 
+fw->targets = g_malloc0_n(fw->num_targets, sizeof(*fw->targets));
+for (i = 0, target = object->targets; target; i++, target = target->next) {
+/* This link cannot be resolved yet, so stash the name for now */
+fw->targets[i] = g_strdup(target->value);
+}
+
 cxl_state->fixed_windows = g_list_append(cxl_state->fixed_windows,
  g_steal_pointer(&fw));
 
-- 
2.39.2

[PULL 16/17] docs/cxl: Cleanout some more aarch64 examples.

2023-09-21 Thread Michael Tokarev

From: Jonathan Cameron 

These crossed with the previous fix to get rid of examples
using aarch64 for which support is not yet upstream.

Reviewed-by: Fan Ni 
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1892
Signed-off-by: Jonathan Cameron 
Signed-off-by: Michael Tokarev 
---
 docs/system/devices/cxl.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/system/devices/cxl.rst b/docs/system/devices/cxl.rst
index b742120657..6ab5f72473 100644
--- a/docs/system/devices/cxl.rst
+++ b/docs/system/devices/cxl.rst
@@ -313,7 +313,7 @@ A very simple setup with just one directly attached CXL 
Type 3 Persistent Memory
 
 A very simple setup with just one directly attached CXL Type 3 Volatile Memory 
device::
 
-  qemu-system-aarch64 -M virt,gic-version=3,cxl=on -m 4g,maxmem=8G,slots=8 
-cpu max \
+  qemu-system-x86_64 -M q35,cxl=on -m 4G,maxmem=8G,slots=8 -smp 4 \
   ...
   -object memory-backend-ram,id=vmem0,share=on,size=256M \
   -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \
@@ -323,7 +323,7 @@ A very simple setup with just one directly attached CXL 
Type 3 Volatile Memory d
 
 The same volatile setup may optionally include an LSA region::
 
-  qemu-system-aarch64 -M virt,gic-version=3,cxl=on -m 4g,maxmem=8G,slots=8 
-cpu max \
+  qemu-system-x86_64 -M q35,cxl=on -m 4G,maxmem=8G,slots=8 -smp 4 \
   ...
   -object memory-backend-ram,id=vmem0,share=on,size=256M \
   -object 
memory-backend-file,id=cxl-lsa0,share=on,mem-path=/tmp/lsa.raw,size=256M \
-- 
2.39.2

[PULL 09/17] hw/i386/pc: fix code comment on cumulative flash size

2023-09-21 Thread Michael Tokarev

From: Laszlo Ersek 

- The comment is incorrectly indented / formatted.

- The comment states a 8MB limit, even though the code enforces a 16MB
  limit.

Both of these warts come from commit 0657c657eb37 ("hw/i386/pc: add max
combined fw size as machine configuration option", 2020-12-09); clean them
up.

Arguably, it's also better to be consistent with the binary units (such as
"MiB") that QEMU uses nowadays.

Cc: "Michael S. Tsirkin"  (supporter:PC)
Cc: Marcel Apfelbaum  (supporter:PC)
Cc: Paolo Bonzini  (maintainer:X86 TCG CPUs)
Cc: Richard Henderson  (maintainer:X86 TCG CPUs)
Cc: Eduardo Habkost  (maintainer:X86 TCG CPUs)
Cc: qemu-triv...@nongnu.org
Fixes: 0657c657eb37
Signed-off-by: Laszlo Ersek 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Michael Tokarev 
Signed-off-by: Michael Tokarev 
---
 hw/i386/pc.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 2872f60cdf..3db0743f31 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1746,12 +1746,12 @@ static void pc_machine_set_max_fw_size(Object *obj, 
Visitor *v,
 }
 
 /*
-* We don't have a theoretically justifiable exact lower bound on the base
-* address of any flash mapping. In practice, the IO-APIC MMIO range is
-* [0xFEE0..0xFEE01000] -- see IO_APIC_DEFAULT_ADDRESS --, leaving free
-* only 18MB-4KB below 4G. For now, restrict the cumulative mapping to 8MB 
in
-* size.
-*/
+ * We don't have a theoretically justifiable exact lower bound on the base
+ * address of any flash mapping. In practice, the IO-APIC MMIO range is
+ * [0xFEE0..0xFEE01000] -- see IO_APIC_DEFAULT_ADDRESS --, leaving free
+ * only 18MiB-4KiB below 4GiB. For now, restrict the cumulative mapping to
+ * 16MiB in size.
+ */
 if (value > 16 * MiB) {
 error_setg(errp,
"User specified max allowed firmware size %" PRIu64 " is "
-- 
2.39.2

[PULL 14/17] hw/cxl: Fix out of bound array access

2023-09-21 Thread Michael Tokarev

From: Dmitry Frolov 

According to cxl_interleave_ways_enc(), fw->num_targets is allowed to be up
to 16. This also corresponds to CXL r3.0 spec. So, the fw->target_hbs[]
array is iterated from 0 to 15. But it is statically declared of length 8.
Thus, out of bound array access may occur.

Fixes: c28db9e000 ("hw/pci-bridge: Make PCIe and CXL PXB Devices inherit from 
TYPE_PXB_DEV")
Signed-off-by: Dmitry Frolov 
Reviewed-by: Michael Tokarev 
Link: https://lore.kernel.org/r/20230913101055.754709-1-fro...@swemel.ru
Cc: qemu-sta...@nongnu.org
Signed-off-by: Jonathan Cameron 
Signed-off-by: Michael Tokarev 
---
 include/hw/cxl/cxl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/hw/cxl/cxl.h b/include/hw/cxl/cxl.h
index 56c9e7676e..4944725849 100644
--- a/include/hw/cxl/cxl.h
+++ b/include/hw/cxl/cxl.h
@@ -29,7 +29,7 @@ typedef struct PXBCXLDev PXBCXLDev;
 typedef struct CXLFixedWindow {
 uint64_t size;
 char **targets;
-PXBCXLDev *target_hbs[8];
+PXBCXLDev *target_hbs[16];
 uint8_t num_targets;
 uint8_t enc_int_ways;
 uint8_t enc_int_gran;
-- 
2.39.2

[PULL 12/17] hw/cxl/cxl_device: Replace magic number in CXLError definition

2023-09-21 Thread Michael Tokarev

From: Fan Ni 

Replace the magic number 32 with CXL_RAS_ERR_HEADER_NUM for better code
readability and maintainability.

Signed-off-by: Fan Ni 
Reviewed-by: Davidlohr Bueso 
Reviewed-by: Dave Jiang 
Signed-off-by: Jonathan Cameron 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Michael Tokarev 
---
 include/hw/cxl/cxl_device.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index f717e3f384..51cd0d9ce3 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -300,7 +300,7 @@ REG64(CXL_MEM_DEV_STS, 0)
 typedef struct CXLError {
 QTAILQ_ENTRY(CXLError) node;
 int type; /* Error code as per FE definition */
-uint32_t header[32];
+uint32_t header[CXL_RAS_ERR_HEADER_NUM];
 } CXLError;
 
 typedef QTAILQ_HEAD(, CXLError) CXLErrorList;
-- 
2.39.2

[PULL 13/17] docs/cxl: Change to lowercase as others

2023-09-21 Thread Michael Tokarev

From: Li Zhijian 

Using the same style as elsewhere for topology / topo

Signed-off-by: Li Zhijian 
Link: 
https://lore.kernel.org/r/20230519085802.2106900-2-lizhij...@cn.fujitsu.com
Signed-off-by: Jonathan Cameron 
Reviewed-by: Fan Ni 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Michael Tokarev 
---
 docs/system/devices/cxl.rst | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/system/devices/cxl.rst b/docs/system/devices/cxl.rst
index f12011e230..b742120657 100644
--- a/docs/system/devices/cxl.rst
+++ b/docs/system/devices/cxl.rst
@@ -157,7 +157,7 @@ responsible for allocating appropriate ranges from within 
the CFMWs
 and exposing those via normal memory configurations as would be done
 for system RAM.
 
-Example system Topology. x marks the match in each decoder level::
+Example system topology. x marks the match in each decoder level::
 
   |<--SYSTEM PHYSICAL ADDRESS MAP (1)->|
   |__   __   __|
@@ -187,8 +187,8 @@ Example system Topology. x marks the match in each decoder 
level::
___|___   __|__   __|_   ___|_
(3)|  Root Port 0  | | Root Port 1 | | Root Port 2| | Root Port 3 |
   |  Appears in   | | Appears in  | | Appears in | | Appear in   |
-  |  PCI topology | | PCI Topology| | PCI Topo   | | PCI Topo|
-  |  As 0c:00.0   | | as 0c:01.0  | | as de:00.0 | | as de:01.0  |
+  |  PCI topology | | PCI topology| | PCI topo   | | PCI topo|
+  |  as 0c:00.0   | | as 0c:01.0  | | as de:00.0 | | as de:01.0  |
   |___| |_| || |_|
 |  |   |  |
 |  |   |  |
@@ -272,7 +272,7 @@ Example topology involving a switch::
   |  Root Port 0  |
   |  Appears in   |
   |  PCI topology |
-  |  As 0c:00.0   |
+  |  as 0c:00.0   |
   |___x___|
   |
   |
-- 
2.39.2

Re: [PATCH v2 4/7] block/dirty-bitmap: Clean up local variable shadowing

2023-09-21 Thread Kevin Wolf

Am 20.09.2023 um 20:31 hat Markus Armbruster geschrieben:
> Local variables shadowing other local variables or parameters make the
> code needlessly hard to understand.  Tracked down with -Wshadow=local.
> Clean up: rename both the pair of parameters and the pair of local
> variables.  While there, move the local variables to function scope.
> 
> Suggested-by: Kevin Wolf 
> Signed-off-by: Markus Armbruster 

Reviewed-by: Kevin Wolf

[PULL 17/17] docs/devel/reset.rst: Correct function names

2023-09-21 Thread Michael Tokarev

From: Akihiko Odaki 

resettable_class_set_parent_phases() was mistakenly called
resettable_class_set_parent_reset_phases() in some places.

Signed-off-by: Akihiko Odaki 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Peter Maydell 
Signed-off-by: Michael Tokarev 
---
 docs/devel/reset.rst | 17 -
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/docs/devel/reset.rst b/docs/devel/reset.rst
index 7cc6a6b314..38ed1790f7 100644
--- a/docs/devel/reset.rst
+++ b/docs/devel/reset.rst
@@ -184,21 +184,20 @@ in reset.
 {
 MyDevClass *myclass = MYDEV_CLASS(class);
 ResettableClass *rc = RESETTABLE_CLASS(class);
-resettable_class_set_parent_reset_phases(rc,
- mydev_reset_enter,
- mydev_reset_hold,
- mydev_reset_exit,
- &myclass->parent_phases);
+resettable_class_set_parent_phases(rc,
+   mydev_reset_enter,
+   mydev_reset_hold,
+   mydev_reset_exit,
+   &myclass->parent_phases);
 }
 
 In the above example, we override all three phases. It is possible to override
 only some of them by passing NULL instead of a function pointer to
-``resettable_class_set_parent_reset_phases()``. For example, the following will
+``resettable_class_set_parent_phases()``. For example, the following will
 only override the *enter* phase and leave *hold* and *exit* untouched::
 
-resettable_class_set_parent_reset_phases(rc, mydev_reset_enter,
- NULL, NULL,
- &myclass->parent_phases);
+resettable_class_set_parent_phases(rc, mydev_reset_enter, NULL, NULL,
+   &myclass->parent_phases);
 
 This is equivalent to providing a trivial implementation of the hold and exit
 phases which does nothing but call the parent class's implementation of the
-- 
2.39.2

Re: [PULL 4/5] hw/ufs: Support for UFS logical unit

2023-09-21 Thread Jeuk Kim




On 2023-09-15 4:59 PM, Paolo Bonzini wrote:

On 9/15/23 00:19, Jeuk Kim wrote:
First, ufs-lu has a feature called "unit descriptor". This feature 
shows the status of the ufs-lu


and only works with UFS-specific "query request" commands, not SCSI 
commands.


This looks like something that can be implemented in the UFS subsystem.

UFS also has something called a well-known lu. Unlike typical SCSI 
devices, where each lu is independent,

UFS can control other lu's through the well-known lu.


This can also be implemented in UfsBus.

Finally, UFS-LU will have features that SCSI-HD does not have, such 
as the zone block command.


These should be implemented in scsi-hd as well.

In addition to this, I wanted some scsi commands to behave 
differently from scsi-hd, for example,

the Inquiry command should read "QEMU UFS" instead of "QEMU HARDDISK",
and the mode_sense_page command should have a different result.


Some of these don't have much justification, and others (such as the 
control page) could be done in scsi-hd as well.


We should look into cleaning this up and making ufs-lu share a lot 
more code with scsi-hd; possibly even supporting -device scsi-hd with 
UFS devices.  I am not going to ask you for a revert, but if this is 
not done before 8.2 is out, I will ask you to disable it by default in 
hw/ufs/Kconfig.


In the future, please Cc the SCSI maintainers for UFS patches.

Paolo


Dear Paolo

Hi. I've been looking into how ufs-lu can share code with scsi-hd.

I have verified that ufs-lu can use scsi-hd's code, and I would like to modify 
it to do so.

I've validated two possible fixes.
I'd like to hear your thoughts.

Option1.
As you mentioned, using ufsbus, which inherits from scsibus, removes the ufs-lu 
device type and use scsi-hd. (like -device ufs,id=ufs0 -device scsi-hd,bus=ufs0)
I've verified that this method is implementable, except for one problem.
Because we are using the scsi-hd type instead of the ufs-lu type, the ufs has 
to manage all the ufs-lu specific data (such as the unit descriptor).
However, since there is no ufs_lu_realize() function, we need a way to notify 
the ufs when a new scsi-hd device is added.
Would there be a way to let the ufs know that a new scsi-hd has been added at 
scsi_hd_realize() time?

Option 2.
Use qdev_new() & qdev_realize() to make ufs-lu have a virtual scsi bus and 
scsi-hd.
The ufs-lu can pass through SCSI commands to the virtual scsi-hd.
This is similar to the method used by the device "usb-storage".

With this method, I can keep the ufs-lu device type (ufs_lu_realize() makes it 
convenient to manage ufs-lu related data) and avoid duplicating code with 
scsi-hd.
So I prefer this approach, but the annotation for "usb-storage" is marked with a 
"Hack alert", so I'm not sure if this is the right way.
The code can be found in usb_msd_storage_realize() 
(hw/usb/dev-storage-classic.c:51).

I am wondering if you could give me some advice on this and your preferred 
direction for fixing it.

Thank you so much.

Jeuk

Re: [RFC PATCH v2 03/21] HostMem: Add private property and associate it with RAM_KVM_GMEM

2023-09-21 Thread Xiaoyao Li


On 9/20/2023 11:42 PM, Markus Armbruster wrote:

David Hildenbrand  writes:


On 20.09.23 16:35, Xiaoyao Li wrote:

On 9/20/2023 3:30 PM, Markus Armbruster wrote:

Xiaoyao Li  writes:


On 9/19/2023 5:46 PM, Markus Armbruster wrote:

Xiaoyao Li  writes:


From: Isaku Yamahata 

Add a new property "private" to memory backends. When it's set to true,
it indicates the RAMblock of the backend also requires kvm gmem.

Can you add a brief explanation why you need the property?


It provides a mechanism for user to specify whether the memory can serve as 
private memory (need request kvm gmem).


Yes, but why would a user want such memory?


Because KVM demands it for confidential guest, e.g., TDX guest. KVM
demands that the mem slot needs to have KVM_MEM_PRIVATE set and has
valid gmem associated if the guest accesses it as private memory.


Commit messages should explain why we want the patch.  Documenting "why"
is at least as important as "what".  If "what" is missing, I can read
the patch to find out.  If "why" is missing, I'm reduced to guesswork.


I'll try best to improve the commit message of this patch, and all other 
patches.



I think as long as there is no demand to have a TDX guest with this property be set to 
"off", then just don't add it.

With a TDX VM, it will can be implicitly active. If we ever have to disable it for 
selective memory backends, we can add the property and have something like on/off/auto. 
For now it would be "auto".


Makes sense to me.


OK. I think I get the answer of open #1 in cover letter.

If no other voice, I'll drop this patch and allocate gmem RAM when 
vm_type is TDX.

Re: [PATCH v1 05/22] vfio/common: Extract out vfio_kvm_device_[add/del]_fd

2023-09-21 Thread Cédric Le Goater


On 9/20/23 13:49, Eric Auger wrote:

Hi Zhenzhong,

On 8/30/23 12:37, Zhenzhong Duan wrote:

...which will be used by both legacy and iommufd backend.

I prefer genuine sentences in the commit msg. Also you explain what you
do but not why.

suggestion: Introduce two new helpers, vfio_kvm_device_[add/del]_fd
which take as input a file descriptor which can be either a group fd or
a cdev fd. This uses the new KVM_DEV_VFIO_FILE VFIO KVM device group,
which aliases to the legacy KVM_DEV_VFIO_GROUP.


Ah yes. I didn't understand why the 's/GROUP/FILE/' change in the
VFIO KVM device ioctls. Thanks for clarifying.

What about pre-6.6 kernels without KVM_DEV_VFIO_FILE support ?

C.




vfio_kvm_device_add/del_group then call those new helpers.





Signed-off-by: Yi Liu 
Signed-off-by: Zhenzhong Duan 
---
  hw/vfio/common.c  | 44 +++
  include/hw/vfio/vfio-common.h |  3 +++
  2 files changed, 32 insertions(+), 15 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 67150e4575..949ad6714a 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1759,17 +1759,17 @@ void vfio_reset_handler(void *opaque)
  }
  }
  
-static void vfio_kvm_device_add_group(VFIOGroup *group)

+int vfio_kvm_device_add_fd(int fd)
  {
  #ifdef CONFIG_KVM
  struct kvm_device_attr attr = {
-.group = KVM_DEV_VFIO_GROUP,
-.attr = KVM_DEV_VFIO_GROUP_ADD,
-.addr = (uint64_t)(unsigned long)&group->fd,
+.group = KVM_DEV_VFIO_FILE,
+.attr = KVM_DEV_VFIO_FILE_ADD,
+.addr = (uint64_t)(unsigned long)&fd,
  };
  
  if (!kvm_enabled()) {

-return;
+return 0;
  }
  
  if (vfio_kvm_device_fd < 0) {

@@ -1779,37 +1779,51 @@ static void vfio_kvm_device_add_group(VFIOGroup *group)
  
  if (kvm_vm_ioctl(kvm_state, KVM_CREATE_DEVICE, &cd)) {

  error_report("Failed to create KVM VFIO device: %m");
-return;
+return -ENODEV;

can't you return -errno?

  }
  
  vfio_kvm_device_fd = cd.fd;

  }
  
  if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) {

-error_report("Failed to add group %d to KVM VFIO device: %m",
- group->groupid);
+error_report("Failed to add fd %d to KVM VFIO device: %m",
+ fd);
+return -errno;
  }
  #endif
+return 0;
  }
  
-static void vfio_kvm_device_del_group(VFIOGroup *group)

+static void vfio_kvm_device_add_group(VFIOGroup *group)
+{
+vfio_kvm_device_add_fd(group->fd);

Since vfio_kvm_device_add_fd now returns an error value, it's a pity not
to use it and propagate it. Also you could fill an errp with the error
msg and use it in vfio_connect_container(). But this is a new error
handling there.

+}
+
+int vfio_kvm_device_del_fd(int fd)

not sure we want this to return an error. But if we do, I think it would
be nicer to propagate the error up.

  {
  #ifdef CONFIG_KVM
  struct kvm_device_attr attr = {
-.group = KVM_DEV_VFIO_GROUP,
-.attr = KVM_DEV_VFIO_GROUP_DEL,
-.addr = (uint64_t)(unsigned long)&group->fd,
+.group = KVM_DEV_VFIO_FILE,
+.attr = KVM_DEV_VFIO_FILE_DEL,
+.addr = (uint64_t)(unsigned long)&fd,
  };
  
  if (vfio_kvm_device_fd < 0) {

-return;
+return -EINVAL;
  }
  
  if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) {

-error_report("Failed to remove group %d from KVM VFIO device: %m",
- group->groupid);
+error_report("Failed to remove fd %d from KVM VFIO device: %m",
+ fd);
+return -EBADF;

-errno?

  }
  #endif
+return 0;
+}
+
+static void vfio_kvm_device_del_group(VFIOGroup *group)
+{
+vfio_kvm_device_del_fd(group->fd);
  }
  
  static VFIOAddressSpace *vfio_get_address_space(AddressSpace *as)

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 5e376c436e..598c3ce079 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -220,6 +220,9 @@ struct vfio_device_info *vfio_get_device_info(int fd);
  int vfio_get_device(VFIOGroup *group, const char *name,
  VFIODevice *vbasedev, Error **errp);
  
+int vfio_kvm_device_add_fd(int fd);

+int vfio_kvm_device_del_fd(int fd);
+
  extern const MemoryRegionOps vfio_region_ops;
  typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList;
  extern VFIOGroupList vfio_group_list;

Thanks

Eric

Re: [PATCH v2 7/7] qobject atomics osdep: Make a few macros more hygienic

2023-09-21 Thread Kevin Wolf

Am 20.09.2023 um 20:31 hat Markus Armbruster geschrieben:
> Variables declared in macros can shadow other variables.  Much of the
> time, this is harmless, e.g.:
> 
> #define _FDT(exp)  \
> do {   \
> int ret = (exp);   \
> if (ret < 0) { \
> error_report("error creating device tree: %s: %s",   \
> #exp, fdt_strerror(ret));  \
> exit(1);   \
> }  \
> } while (0)
> 
> Harmless shadowing in h_client_architecture_support():
> 
> target_ulong ret;
> 
> [...]
> 
> ret = do_client_architecture_support(cpu, spapr, vec, fdt_bufsize);
> if (ret == H_SUCCESS) {
> _FDT((fdt_pack(spapr->fdt_blob)));
> [...]
> }
> 
> return ret;
> 
> However, we can get in trouble when the shadowed variable is used in a
> macro argument:
> 
> #define QOBJECT(obj) ({ \
> typeof(obj) o = (obj);  \
> o ? container_of(&(o)->base, QObject, base) : NULL; \
>  })
> 
> QOBJECT(o) expands into
> 
> ({
> --->typeof(o) o = (o);
> o ? container_of(&(o)->base, QObject, base) : NULL;
> })
> 
> Unintended variable name capture at --->.  We'd be saved by
> -Winit-self.  But I could certainly construct more elaborate death
> traps that don't trigger it.
> 
> To reduce the risk of trapping ourselves, we use variable names in
> macros that no sane person would use elsewhere.  Here's our actual
> definition of QOBJECT():
> 
> #define QOBJECT(obj) ({ \
> typeof(obj) _obj = (obj);   \
> _obj ? container_of(&(_obj)->base, QObject, base) : NULL;   \
> })
> 
> Works well enough until we nest macro calls.  For instance, with
> 
> #define qobject_ref(obj) ({ \
> typeof(obj) _obj = (obj);   \
> qobject_ref_impl(QOBJECT(_obj));\
> _obj;   \
> })
> 
> the expression qobject_ref(obj) expands into
> 
> ({
> typeof(obj) _obj = (obj);
> qobject_ref_impl(
> ({
> --->typeof(_obj) _obj = (_obj);
> _obj ? container_of(&(_obj)->base, QObject, base) : NULL;
> }));
> _obj;
> })
> 
> Unintended variable name capture at --->.
> 
> The only reliable way to prevent unintended variable name capture is
> -Wshadow.
> 
> One blocker for enabling it is shadowing hiding in function-like
> macros like
> 
>  qdict_put(dict, "name", qobject_ref(...))
> 
> qdict_put() wraps its last argument in QOBJECT(), and the last
> argument here contains another QOBJECT().
> 
> Use dark preprocessor sorcery to make the macros that give us this
> problem use different variable names on every call.
> 
> Signed-off-by: Markus Armbruster 
> Reviewed-by: Eric Blake 
> ---
>  include/qapi/qmp/qobject.h | 11 +--
>  include/qemu/atomic.h  | 17 -
>  include/qemu/compiler.h|  3 +++
>  include/qemu/osdep.h   | 31 +++
>  4 files changed, 47 insertions(+), 15 deletions(-)
> 
> diff --git a/include/qapi/qmp/qobject.h b/include/qapi/qmp/qobject.h
> index 9003b71fd3..d36cc97805 100644
> --- a/include/qapi/qmp/qobject.h
> +++ b/include/qapi/qmp/qobject.h
> @@ -45,10 +45,17 @@ struct QObject {
>  struct QObjectBase_ base;
>  };
>  
> -#define QOBJECT(obj) ({ \
> +/*
> + * Preprocessory sorcery ahead: use a different identifier for the
> + * local variable in each expansion, so we can nest macro calls
> + * without shadowing variables.
> + */
> +#define QOBJECT_INTERNAL(obj, _obj) ({  \
>  typeof(obj) _obj = (obj);   \
> -_obj ? container_of(&(_obj)->base, QObject, base) : NULL;   \
> +_obj\
> +? container_of(&(_obj)->base, QObject, base) : NULL;\

What happened here? The code in this line (or now two lines) seems to be
unchanged apart from a strange looking newline.

>  })
> +#define QOBJECT(obj) QOBJECT_INTERNAL((obj), MAKE_IDENTFIER(_obj))

Kevin

Re: [RFC PATCH v2 03/21] HostMem: Add private property and associate it with RAM_KVM_GMEM

2023-09-21 Thread David Hildenbrand


I think as long as there is no demand to have a TDX guest with this property be set to 
"off", then just don't add it.

With a TDX VM, it will can be implicitly active. If we ever have to disable it for 
selective memory backends, we can add the property and have something like on/off/auto. 
For now it would be "auto".


Makes sense to me.


OK. I think I get the answer of open #1 in cover letter.



Yes.


If no other voice, I'll drop this patch and allocate gmem RAM when
vm_type is TDX.


Good!

--
Cheers,

David / dhildenb

Re: [RFC PATCH v2 04/21] memory: Introduce memory_region_has_gmem_fd()

2023-09-21 Thread David Hildenbrand


On 14.09.23 05:51, Xiaoyao Li wrote:

Introduce memory_region_has_gmem_fd() to query if the MemoryRegion has
KVM gmem fd allocated.


*probably* best to just squash that into patch #2.

--
Cheers,

David / dhildenb

Re: [RFC PATCH v2 07/21] i386/pc: Drop pc_machine_kvm_type()

2023-09-21 Thread David Hildenbrand


On 14.09.23 05:51, Xiaoyao Li wrote:

pc_machine_kvm_type() was introduced by commit e21be724eaf5 ("i386/xen:
add pc_machine_kvm_type to initialize XEN_EMULATE mode") to do Xen
specific initialization by utilizing kvm_type method.

commit eeedfe6c6316 ("hw/xen: Simplify emulated Xen platform init")
moves the Xen specific initialization to pc_basic_device_init().

There is no need to keep the PC specific kvm_type() implementation
anymore.


So we'll fallback to kvm_arch_get_default_type(), which simply returns 0.


On the other hand, later patch will implement kvm_type()
method for all x86/i386 machines to support KVM_X86_SW_PROTECTED_VM.



^ I suggest dropping that and merging that patch ahead-of-time as a 
simple cleanup.


Reviewed-by: David Hildenbrand 

--
Cheers,

David / dhildenb

Re: [PATCH 01/52] migration/rdma: Clean up qemu_rdma_poll()'s return type

2023-09-21 Thread Zhijian Li (Fujitsu)



On 18/09/2023 22:41, Markus Armbruster wrote:
> qemu_rdma_poll()'s return type is uint64_t, even though it returns 0,
> -1, or @ret, which is int.  Its callers assign the return value to int
> variables, then check whether it's negative.  Unclean.
> 
> Return int instead.
> 
> Signed-off-by: Markus Armbruster

Reviewed-by: Li Zhijian

Re: [PATCH 02/52] migration/rdma: Clean up qemu_rdma_data_init()'s return type

2023-09-21 Thread Zhijian Li (Fujitsu)



On 18/09/2023 22:41, Markus Armbruster wrote:
> qemu_rdma_data_init() return type is void *.  It actually returns
> RDMAContext *, and all its callers assign the value to an
> RDMAContext *.  Unclean.
> 
> Return RDMAContext * instead.
> 
> Signed-off-by: Markus Armbruster

Reviewed-by: Li Zhijian

Re: [PATCH 03/52] migration/rdma: Clean up rdma_delete_block()'s return type

2023-09-21 Thread Zhijian Li (Fujitsu)



On 18/09/2023 22:41, Markus Armbruster wrote:
> rdma_delete_block() always returns 0, which its only caller ignores.
> Return void instead.
> 
> Signed-off-by: Markus Armbruster

Reviewed-by: Li Zhijian

Re: [PATCH 04/52] migration/rdma: Drop fragile wr_id formatting

2023-09-21 Thread Zhijian Li (Fujitsu)



On 18/09/2023 22:41, Markus Armbruster wrote:
> wrid_desc[] uses 4001 pointers to map four integer values to strings.
> 
> print_wrid() accesses wrid_desc[] out of bounds when passed a negative
> argument.  It returns null for values 2..1999 and 2001..3999.
> 
> qemu_rdma_poll() and qemu_rdma_block_for_wrid() print wrid_desc[wr_id]
> and passes print_wrid(wr_id) to tracepoints.  Could conceivably crash
> trying to format a null string.  I believe access out of bounds is not
> possible.
> 
> Not worth cleaning up.  Dumb down to show just numeric wr_id.

Yeah, a numeric wr_id is enough


> 
> Signed-off-by: Markus Armbruster

Reviewed-by: Li Zhijian

Re: [RFC PATCH v2 02/21] RAMBlock: Add support of KVM private gmem

2023-09-21 Thread David Hildenbrand


On 14.09.23 05:50, Xiaoyao Li wrote:

From: Chao Peng 

Add KVM gmem support to RAMBlock so both normal hva based memory
and kvm gmem fd based private memory can be associated in one RAMBlock.

Introduce new flag RAM_KVM_GMEM. It calls KVM ioctl to create private
gmem for the RAMBlock when it's set.



But who sets RAM_KVM_GMEM and when? Don't we simply allocate it for all 
RAMBlocks under such special VMs? What's the downside of doing that?



--
Cheers,

David / dhildenb

Re: [RFC PATCH v2 05/21] kvm: Enable KVM_SET_USER_MEMORY_REGION2 for memslot

2023-09-21 Thread David Hildenbrand


On 14.09.23 05:51, Xiaoyao Li wrote:

From: Chao Peng 

Switch to KVM_SET_USER_MEMORY_REGION2 when supported by KVM.

With KVM_SET_USER_MEMORY_REGION2, QEMU can set up memory region that
backend'ed both by hva-based shared memory and gmem fd based private
memory.

Signed-off-by: Chao Peng 
Codeveloped-by: Xiaoyao Li 


"Co-developed-by".

--
Cheers,

David / dhildenb

Re: [PATCH 07/52] migration/rdma: Give qio_channel_rdma_source_funcs internal linkage

2023-09-21 Thread Zhijian Li (Fujitsu)



On 18/09/2023 22:41, Markus Armbruster wrote:
> Signed-off-by: Markus Armbruster 

Reviewed-by: Li Zhijian

Re: [RFC PATCH v2 00/21] QEMU gmem implemention

2023-09-21 Thread David Hildenbrand



This version still leave some opens to be discussed:
1. whether we need "private" propery to be user-settable?

     It seems unnecessary because vm-type is determined. If the VM is
     confidential-guest, then the RAM of the guest must be able to be
     mapped as private, i.e., have kvm gmem backend. So QEMU can
     determine the value of "private" property automatiacally based on vm
     type.

     This also aligns with the board internal MemoryRegion that needs to
     have kvm gmem backend, e.g., TDX requires OVMF to act as private
     memory so bios memory region needs to have kvm gmem fd associated.
     QEMU no doubt will do it internally automatically.


Would it make sense to have some regions without "pivate" semantics?
Like NVDIMMs?


Of course it can have regions without "private" semantics.


I meant "RAM memory regions on such a special VM". Does it make sense to 
have !private there? I assume "for now, not".




2. hugepage support.

     KVM gmem can be allocated from hugetlbfs. How does QEMU determine
     when to allocate KVM gmem with KVM_GUEST_MEMFD_ALLOW_HUGEPAGE. The
     easiest solution is create KVM gmem with
KVM_GUEST_MEMFD_ALLOW_HUGEPAGE
     only when memory backend is HostMemoryBackendFile of hugetlbfs.


Good question.

Probably "if the memory backend uses huge pages, also use huge pages for
the private gmem" makes sense.

... but it becomes a mess with preallocation ... which is what people
should actually be using with hugetlb. Andeventual double
memory-consumption ... but maybe that's all been taken care of already?

Probably it's best to leave hugetlb support as future work and start
with something minimal.



As Sean replied, I had some misunderstanding of
KVM_GUEST_MEMFD_ALLOW_HUGEPAGE. If it's for THP, I think we can allow it
for every gmem.


Right, just like we do a MADV_HUGEPAGE rather blindly on all memory.

--
Cheers,

David / dhildenb

Re: [PATCH 4/4] target/i386: add live migration support for FRED

2023-09-21 Thread Yang, Weijiang


On 9/1/2023 1:30 PM, Li, Xin3 wrote:

FRED CPU states are managed in 10 FRED MSRs, in addtion to a few existing
CPU registers and MSRs, e.g., the CR4.FRED bit.

Add the 10 new FRED MSRs to x86 CPUArchState for live migration support.

Tested-by: Shan Kang 
Signed-off-by: Xin Li 
---
  target/i386/cpu.h | 24 +++
  target/i386/kvm/kvm.c | 54 +++
  target/i386/machine.c | 10 
  3 files changed, 88 insertions(+)

diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 924819a64c..a36a1a58c4 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -529,6 +529,20 @@ typedef enum X86Seg {
  #define MSR_IA32_XFD0x01c4
  #define MSR_IA32_XFD_ERR0x01c5
  
+#define MSR_IA32_PL0_SSP0x06a4   /* Stack level 0 shadow stack pointer in ring 0 */

+
+/* FRED MSRs */
+#define MSR_IA32_FRED_RSP0  0x01cc   /* Stack level 0 
regular stack pointer */
+#define MSR_IA32_FRED_RSP1  0x01cd   /* Stack level 1 
regular stack pointer */
+#define MSR_IA32_FRED_RSP2  0x01ce   /* Stack level 2 
regular stack pointer */
+#define MSR_IA32_FRED_RSP3  0x01cf   /* Stack level 3 
regular stack pointer */
+#define MSR_IA32_FRED_STKLVLS   0x01d0   /* FRED exception 
stack levels */
+#define MSR_IA32_FRED_SSP0  MSR_IA32_PL0_SSP /* Stack level 0 
shadow stack pointer in ring 0 */
+#define MSR_IA32_FRED_SSP1  0x01d1   /* Stack level 1 
shadow stack pointer in ring 0 */
+#define MSR_IA32_FRED_SSP2  0x01d2   /* Stack level 2 
shadow stack pointer in ring 0 */
+#define MSR_IA32_FRED_SSP3  0x01d3   /* Stack level 3 
shadow stack pointer in ring 0 */
+#define MSR_IA32_FRED_CONFIG0x01d4   /* FRED Entrypoint 
and interrupt stack level */
+
  #define MSR_IA32_BNDCFGS0x0d90
  #define MSR_IA32_XSS0x0da0
  #define MSR_IA32_UMWAIT_CONTROL 0xe1
@@ -1680,6 +1694,16 @@ typedef struct CPUArchState {
  target_ulong cstar;
  target_ulong fmask;
  target_ulong kernelgsbase;
+target_ulong fred_rsp0;
+target_ulong fred_rsp1;
+target_ulong fred_rsp2;
+target_ulong fred_rsp3;
+target_ulong fred_stklvls;
+target_ulong fred_ssp0;
+target_ulong fred_ssp1;
+target_ulong fred_ssp2;
+target_ulong fred_ssp3;
+target_ulong fred_config;
  #endif
  
  uint64_t tsc_adjust;

diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 639a242ad8..4b241c82d8 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -3401,6 +3401,18 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
  kvm_msr_entry_add(cpu, MSR_KERNELGSBASE, env->kernelgsbase);
  kvm_msr_entry_add(cpu, MSR_FMASK, env->fmask);
  kvm_msr_entry_add(cpu, MSR_LSTAR, env->lstar);
+if (env->features[FEAT_7_1_EAX] & CPUID_7_1_EAX_FRED) {
+kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP0, env->fred_rsp0);
+kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP1, env->fred_rsp1);
+kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP2, env->fred_rsp2);
+kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP3, env->fred_rsp3);
+kvm_msr_entry_add(cpu, MSR_IA32_FRED_STKLVLS, env->fred_stklvls);
+kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP0, env->fred_ssp0);
+kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP1, env->fred_ssp1);
+kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP2, env->fred_ssp2);
+kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP3, env->fred_ssp3);
+kvm_msr_entry_add(cpu, MSR_IA32_FRED_CONFIG, env->fred_config);
+}
  }
  #endif
  
@@ -3901,6 +3913,18 @@ static int kvm_get_msrs(X86CPU *cpu)

  kvm_msr_entry_add(cpu, MSR_KERNELGSBASE, 0);
  kvm_msr_entry_add(cpu, MSR_FMASK, 0);
  kvm_msr_entry_add(cpu, MSR_LSTAR, 0);
+if (env->features[FEAT_7_1_EAX] & CPUID_7_1_EAX_FRED) {
+kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP0, 0);
+kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP1, 0);
+kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP2, 0);
+kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP3, 0);
+kvm_msr_entry_add(cpu, MSR_IA32_FRED_STKLVLS, 0);
+kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP0, 0);
+kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP1, 0);
+kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP2, 0);
+kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP3, 0);
+kvm_msr_entry_add(cpu, MSR_IA32_FRED_CONFIG, 0);
+}
  }
  #endif
  kvm_msr_entry_add(cpu, MSR_KVM_SYSTEM_TIME, 0);
@@ -4123,6 +4147,36 @@ static int kvm_get_msrs(X86CPU *cpu)
  case MSR_LSTAR:
  env->lstar = msrs[i].data;
  break;
+case MSR_IA32_FRED_RSP0:
+env->fred_rsp0 = msrs[i].da

Re: [PATCH 08/52] migration/rdma: Fix qemu_rdma_accept() to return failure on errors

2023-09-21 Thread Zhijian Li (Fujitsu)



On 18/09/2023 22:41, Markus Armbruster wrote:
> qemu_rdma_accept() returns 0 in some cases even when it didn't
> complete its job due to errors.  Impact is not obvious.  I figure the
> caller will soon fail again with a misleading error message.
> 
> Fix it to return -1 on any failure.
> 
> Signed-off-by: Markus Armbruster 

I noticed that ret initialization is also meaningless in qemu_rdma_accept()

3354 static int qemu_rdma_accept(RDMAContext *rdma)
3355 {
3356 RDMACapabilities cap;
3357 struct rdma_conn_param conn_param = {
3358 .responder_resources = 2,
3359 .private_data = &cap,
3360 .private_data_len = 
sizeof(cap),
3361  };
3362 RDMAContext *rdma_return_path = NULL;
3363 struct rdma_cm_event *cm_event;
3364 struct ibv_context *verbs;
3365 int ret = -EINVAL; < drop it ?
3366 int idx;


Reviewed-by: Li Zhijian

Re: [PATCH 09/52] migration/rdma: Put @errp parameter last

2023-09-21 Thread Zhijian Li (Fujitsu)



On 18/09/2023 22:41, Markus Armbruster wrote:
> include/qapi/error.h demands:
> 
>   * - Functions that use Error to report errors have an Error **errp
>   *   parameter.  It should be the last parameter, except for functions
>   *   taking variable arguments.
> 
> qemu_rdma_connect() does not conform.  Clean it up.
> 
> Signed-off-by: Markus Armbruster 

Reviewed-by: Li Zhijian

Re: [RFC PATCH v2 00/21] QEMU gmem implemention

2023-09-21 Thread David Hildenbrand


2. hugepage support.

 KVM gmem can be allocated from hugetlbfs. How does QEMU determine


Not yet it can't.  gmem only supports THP, hugetlbfs is a future thing, if it's
ever supported.  I wouldn't be at all surprised if we end up going down a 
slightly
different route and don't use hugetlbfs directly.


Agreed. Certainly future work.




 when to allocate KVM gmem with KVM_GUEST_MEMFD_ALLOW_HUGEPAGE. The
 easiest solution is create KVM gmem with KVM_GUEST_MEMFD_ALLOW_HUGEPAGE
 only when memory backend is HostMemoryBackendFile of hugetlbfs.


Good question.

Probably "if the memory backend uses huge pages, also use huge pages for the
private gmem" makes sense.

... but it becomes a mess with preallocation ... which is what people should
actually be using with hugetlb. Andeventual double memory-consumption ...
but maybe that's all been taken care of already?

Probably it's best to leave hugetlb support as future work and start with
something minimal.



3. What is KVM_X86_SW_PROTECTED_VM going to look like? and do we need it?



Why implement it when you have to ask others for a motivation? ;)

Personally, I'm not sure if it is really useful, especially in this state.


Yeah, as of today, KVM_X86_SW_PROTECTED_VM is mainly a development vehicle,
e.g. so that testing gmem doesn't require TDX/SNP hardware, debugging gmem 
guests
isn't brutally painful, etc.

Longer term, I have aspirations of being able to back most VMs with gmem, but
that's going to require quite a bit more work, e.g. gmem needs to be mappable
(when hardware allows it) so that gmem doesn't all but require double mapping,
KVM obviously needs to be able to read/write gmem, etc.

The value proposition is that having a guest-first memory type will allow KVM to
optimize and harden gmem in ways that wouldn't be feasible for a more generic
memory implementation.  E.g. memory isn't mapped into host userspace by default
(makes it harder to accidentally corrupt the guest), the guest can have *larger*
mappings than host userspace, guest memory can be served from a dedicated pool
(similar-ish to hugetlb), the pool can be omitted from the direct map, etc.

Thanks for that information. Personally, I don't believe "to back most 
VMs with gmem", but that's a different discussion.


As a development vehicle to get TDX up and running it might be very 
valuable indeed. But it doesn't necessarily have to be merged in QEMU 
for that case -- especially in a semi-finished form.


--
Cheers,

David / dhildenb

[PATCH v2] plugins/hotblocks: Fix potential deadlock in plugin_exit() function

2023-09-21 Thread Cong Liu

This patch fixes a potential deadlock in the plugin_exit() function of QEMU.
The original code does not release the lock mutex if it is NULL. This patch
adds a check for it being NULL and releases the mutex in that case.

Signed-off-by: Cong Liu 
Suggested-by: Philippe Mathieu-Daudé 
---
 contrib/plugins/hotblocks.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/plugins/hotblocks.c b/contrib/plugins/hotblocks.c
index 6b74d25fead6..b99b93ad8dc7 100644
--- a/contrib/plugins/hotblocks.c
+++ b/contrib/plugins/hotblocks.c
@@ -69,9 +69,9 @@ static void plugin_exit(qemu_plugin_id_t id, void *p)
 }
 
 g_list_free(it);
-g_mutex_unlock(&lock);
 }
 
+g_mutex_unlock(&lock);
 qemu_plugin_outs(report->str);
 }
 
-- 
2.34.1

Re: [PATCH 10/52] migration/rdma: Eliminate error_propagate()

2023-09-21 Thread Zhijian Li (Fujitsu)



On 18/09/2023 22:41, Markus Armbruster wrote:
> When all we do with an Error we receive into a local variable is
> propagating to somewhere else, we can just as well receive it there
> right away.
> 
> Signed-off-by: Markus Armbruster 

Reviewed-by: Li Zhijian 


> ---
>   migration/rdma.c | 19 +++
>   1 file changed, 7 insertions(+), 12 deletions(-)
> 
> diff --git a/migration/rdma.c b/migration/rdma.c
> index 2b40bbcbb0..960fff5860 100644
> --- a/migration/rdma.c
> +++ b/migration/rdma.c
> @@ -2445,7 +2445,6 @@ static void qemu_rdma_cleanup(RDMAContext *rdma)
>   static int qemu_rdma_source_init(RDMAContext *rdma, bool pin_all, Error 
> **errp)
>   {
>   int ret, idx;
> -Error *local_err = NULL, **temp = &local_err;
>   
>   /*
>* Will be validated against destination's actual capabilities
> @@ -2453,14 +2452,14 @@ static int qemu_rdma_source_init(RDMAContext *rdma, 
> bool pin_all, Error **errp)
>*/
>   rdma->pin_all = pin_all;
>   
> -ret = qemu_rdma_resolve_host(rdma, temp);
> +ret = qemu_rdma_resolve_host(rdma, errp);
>   if (ret) {
>   goto err_rdma_source_init;
>   }
>   
>   ret = qemu_rdma_alloc_pd_cq(rdma);
>   if (ret) {
> -ERROR(temp, "rdma migration: error allocating pd and cq! Your 
> mlock()"
> +ERROR(errp, "rdma migration: error allocating pd and cq! Your 
> mlock()"
>   " limits may be too low. Please check $ ulimit -a # and 
> "
>   "search for 'ulimit -l' in the output");
>   goto err_rdma_source_init;
> @@ -2468,13 +2467,13 @@ static int qemu_rdma_source_init(RDMAContext *rdma, 
> bool pin_all, Error **errp)
>   
>   ret = qemu_rdma_alloc_qp(rdma);
>   if (ret) {
> -ERROR(temp, "rdma migration: error allocating qp!");
> +ERROR(errp, "rdma migration: error allocating qp!");
>   goto err_rdma_source_init;
>   }
>   
>   ret = qemu_rdma_init_ram_blocks(rdma);
>   if (ret) {
> -ERROR(temp, "rdma migration: error initializing ram blocks!");
> +ERROR(errp, "rdma migration: error initializing ram blocks!");
>   goto err_rdma_source_init;
>   }
>   
> @@ -2489,7 +2488,7 @@ static int qemu_rdma_source_init(RDMAContext *rdma, 
> bool pin_all, Error **errp)
>   for (idx = 0; idx < RDMA_WRID_MAX; idx++) {
>   ret = qemu_rdma_reg_control(rdma, idx);
>   if (ret) {
> -ERROR(temp, "rdma migration: error registering %d control!",
> +ERROR(errp, "rdma migration: error registering %d control!",
>   idx);
>   goto err_rdma_source_init;
>   }
> @@ -2498,7 +2497,6 @@ static int qemu_rdma_source_init(RDMAContext *rdma, 
> bool pin_all, Error **errp)
>   return 0;
>   
>   err_rdma_source_init:
> -error_propagate(errp, local_err);
>   qemu_rdma_cleanup(rdma);
>   return -1;
>   }
> @@ -4103,7 +4101,6 @@ void rdma_start_incoming_migration(const char 
> *host_port, Error **errp)
>   {
>   int ret;
>   RDMAContext *rdma;
> -Error *local_err = NULL;
>   
>   trace_rdma_start_incoming_migration();
>   
> @@ -4113,13 +4110,12 @@ void rdma_start_incoming_migration(const char 
> *host_port, Error **errp)
>   return;
>   }
>   
> -rdma = qemu_rdma_data_init(host_port, &local_err);
> +rdma = qemu_rdma_data_init(host_port, errp);
>   if (rdma == NULL) {
>   goto err;
>   }
>   
> -ret = qemu_rdma_dest_init(rdma, &local_err);
> -
> +ret = qemu_rdma_dest_init(rdma, errp);
>   if (ret) {
>   goto err;
>   }
> @@ -4142,7 +4138,6 @@ void rdma_start_incoming_migration(const char 
> *host_port, Error **errp)
>   cleanup_rdma:
>   qemu_rdma_cleanup(rdma);
>   err:
> -error_propagate(errp, local_err);
>   if (rdma) {
>   g_free(rdma->host);
>   g_free(rdma->host_port);

[PATCH] Revert "tap: setting error appropriately when calling net_init_tap_one()"

2023-09-21 Thread Akihiko Odaki

This reverts commit 46d4d36d0bf2b24b205f2f604f0905db80264eef.

The reverted commit changed to emit warnings instead of errors when
vhost is requested but vhost initialization fails if vhostforce option
is not set.

However, vhostforce is not meant to ignore vhost errors. It was once
introduced as an option to commit 5430a28fe4 ("vhost: force vhost off
for non-MSI guests") to force enabling vhost for non-MSI guests, which
will have worse performance with vhost. The option was deprecated with
commit 1e7398a140 ("vhost: enable vhost without without MSI-X") and
changed to behave identical with the vhost option for compatibility.

Worse, commit bf769f742c ("virtio: del net client if net_init_tap_one
failed") changed to delete the client when vhost fails even when the
failure only results in a warning. The leads to an assertion failure
for the -netdev command line option.

The reverted commit was intended to avoid that the vhost initialization
failure won't result in a corrupted netdev. This problem should have
been fixed by deleting netdev when the initialization fails instead of
ignoring the failure with an arbitrary option. Fortunately, commit
bf769f742c ("virtio: del net client if net_init_tap_one failed"),
mentioned earlier, implements this behavior.

Restore the correct semantics and fix the assertion failure for the
-netdev command line option by reverting the problematic commit.

Signed-off-by: Akihiko Odaki 
---
 include/net/vhost_net.h |  3 ---
 net/tap.c   | 22 +-
 2 files changed, 5 insertions(+), 20 deletions(-)

diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h
index c37aba35e6..c6a5361a2a 100644
--- a/include/net/vhost_net.h
+++ b/include/net/vhost_net.h
@@ -4,9 +4,6 @@
 #include "net/net.h"
 #include "hw/virtio/vhost-backend.h"
 
-#define VHOST_NET_INIT_FAILED \
-"vhost-net requested but could not be initialized"
-
 struct vhost_net;
 typedef struct vhost_net VHostNetState;
 
diff --git a/net/tap.c b/net/tap.c
index 1bf085d422..c6639d9f20 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -730,11 +730,7 @@ static void net_init_tap_one(const NetdevTapOptions *tap, 
NetClientState *peer,
 if (vhostfdname) {
 vhostfd = monitor_fd_param(monitor_cur(), vhostfdname, &err);
 if (vhostfd == -1) {
-if (tap->has_vhostforce && tap->vhostforce) {
-error_propagate(errp, err);
-} else {
-warn_report_err(err);
-}
+error_propagate(errp, err);
 goto failed;
 }
 if (!g_unix_set_fd_nonblocking(vhostfd, true, NULL)) {
@@ -745,13 +741,8 @@ static void net_init_tap_one(const NetdevTapOptions *tap, 
NetClientState *peer,
 } else {
 vhostfd = open("/dev/vhost-net", O_RDWR);
 if (vhostfd < 0) {
-if (tap->has_vhostforce && tap->vhostforce) {
-error_setg_errno(errp, errno,
- "tap: open vhost char device failed");
-} else {
-warn_report("tap: open vhost char device failed: %s",
-strerror(errno));
-}
+error_setg_errno(errp, errno,
+ "tap: open vhost char device failed");
 goto failed;
 }
 if (!g_unix_set_fd_nonblocking(vhostfd, true, NULL)) {
@@ -764,11 +755,8 @@ static void net_init_tap_one(const NetdevTapOptions *tap, 
NetClientState *peer,
 
 s->vhost_net = vhost_net_init(&options);
 if (!s->vhost_net) {
-if (tap->has_vhostforce && tap->vhostforce) {
-error_setg(errp, VHOST_NET_INIT_FAILED);
-} else {
-warn_report(VHOST_NET_INIT_FAILED);
-}
+error_setg(errp,
+   "vhost-net requested but could not be initialized");
 goto failed;
 }
 } else if (vhostfdname) {
-- 
2.42.0

Re: [PATCH 11/52] migration/rdma: Drop rdma_add_block() error handling

2023-09-21 Thread Zhijian Li (Fujitsu)



On 18/09/2023 22:41, Markus Armbruster wrote:
> rdma_add_block() can't fail.  Return void, and drop the unreachable
> error handling.
> 
> Signed-off-by: Markus Armbruster
> ---
>   migration/rdma.c | 30 +-
>   1 file changed, 9 insertions(+), 21 deletions(-)
> 

[...]

>* during dynamic page registration.
>*/
> -static int qemu_rdma_init_ram_blocks(RDMAContext *rdma)
> +static void qemu_rdma_init_ram_blocks(RDMAContext *rdma)
>   {
>   RDMALocalBlocks *local = &rdma->local_ram_blocks;
>   int ret;
> @@ -646,14 +645,11 @@ static int qemu_rdma_init_ram_blocks(RDMAContext *rdma)
>   assert(rdma->blockmap == NULL);
>   memset(local, 0, sizeof *local);
>   ret = foreach_not_ignored_block(qemu_rdma_init_one_block, rdma);
> -if (ret) {
> -return ret;
> -}
> +assert(!ret);

Why we still need a new assert(), can we remove the ret together.

 foreach_not_ignored_block(qemu_rdma_init_one_block, rdma);
 trace_qemu_rdma_init_ram_blocks(local->nb_blocks);


Thanks
Zhijian

>   trace_qemu_rdma_init_ram_blocks(local->nb_blocks);

Re: [PATCH v3] hw/i386/pc: improve physical address space bound check for 32-bit systems

2023-09-21 Thread Ani Sinha




> On 21-Sep-2023, at 12:47 PM, Ani Sinha  wrote:
> 
> 32-bit systems do not have a reserved memory for hole64 and hotplugging memory
> devices are not supported on those systems. Therefore, the maximum limit of 
> the
> guest physical address in the absence of additional memory devices effectively
> coincides with the end of "above 4G memory space" region. When users configure
> additional memory devices, we need to properly account for the additional 
> device
> memory region so as to find the maximum value of the guest physical address
> and enforce that it is within the physical address space of the processor. For
> 32-bit, this maximum PA will be outside the range of the processor's address
> space.
> 
> With this change, for example, previously this was allowed:
> 
> $ ./qemu-system-x86_64 -cpu pentium -m size=10G
> 
> Now it is no longer allowed:
> 
> $ ./qemu-system-x86_64 -cpu pentium -m size=10G
> qemu-system-x86_64: Address space limit 0x < 0x2bfff phys-bits 
> too low (32)
> 
> For 32-bit, hotplugging additional memory is no longer allowed.
> 
> $ ./qemu-system-i386 -m size=1G,maxmem=3G,slots=2
> qemu-system-i386: Address space limit 0x < 0x1 phys-bits too 
> low (32)
> 
> The above is still allowed for older machine types in order to support
> compatibility. Therefore, this still works:
> 
> $ ./qemu-system-i386 -machine pc-i440fx-8.1 -m size=1G,maxmem=3G,slots=2
> 
> After calling CPUID with EAX=0x8001, all AMD64 compliant processors
> have the longmode-capable-bit turned on in the extended feature flags (bit 29)
> in EDX. The absence of CPUID longmode can be used to differentiate between
> 32-bit and 64-bit processors and is the recommended approach. QEMU takes this
> approach elsewhere (for example, please see x86_cpu_realizefn()) and with
> this change, pc_max_used_gpa() also takes the same approach to detect 32-bit
> processors.
> 
> Unit tests are modified to not run those tests that use memory hotplug
> on 32-bit x86 architecture.
> 
> Finally, a new compatibility flag is introduced to retain the old behavior
> for pc_max_used_gpa() for machines 8.1 and older.

This patch has a bug. I forgot to add the knob for older q35 machines. Will fix 
in next version.

> 
> Suggested-by: David Hildenbrand 
> Signed-off-by: Ani Sinha 
> ---
> hw/i386/pc.c   | 34 +++---
> hw/i386/pc_piix.c  |  4 
> include/hw/i386/pc.h   |  3 +++
> tests/qtest/bios-tables-test.c | 26 ++
> tests/qtest/numa-test.c|  7 ++-
> 5 files changed, 62 insertions(+), 12 deletions(-)
> 
> changelog:
> v3: still accounting for additional memory device region above 4G.
> unit tests fixed (not running for 32-bit where mem hotplug is used).
> 
> v2: removed memory hotplug region from max_gpa. added compat knobs.
> 
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index 54838c0c41..0aa2f6b6c0 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -907,12 +907,39 @@ static uint64_t pc_get_cxl_range_end(PCMachineState 
> *pcms)
> static hwaddr pc_max_used_gpa(PCMachineState *pcms, uint64_t pci_hole64_size)
> {
> X86CPU *cpu = X86_CPU(first_cpu);
> +PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
> +MachineState *ms = MACHINE(pcms);
> +uint64_t devmem_start = 0;
> +ram_addr_t devmem_size = 0;
> 
> -/* 32-bit systems don't have hole64 thus return max CPU address */
> -if (cpu->phys_bits <= 32) {
> -return ((hwaddr)1 << cpu->phys_bits) - 1;
> +/*
> + * 32-bit systems don't have hole64 but they might have a region for
> + * memory devices. Even if additional hotplugged memory devices might
> + * not be usable by most guest OSes, we need to still consider them for
> + * calculating the highest possible GPA so that we can properly report
> + * if someone configures them on a CPU that cannot possibly address them.
> + */
> +if (!(cpu->env.features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM)) {
> +/* 32-bit systems */
> +if (pcmc->fixed_32bit_mem_addr_check) {
> +if (pcmc->has_reserved_memory &&
> +(ms->ram_size < ms->maxram_size)) {
> +pc_get_device_memory_range(pcms, &devmem_start,
> +   &devmem_size);
> +if (!pcmc->broken_reserved_end) {
> +devmem_start += devmem_size;
> +}
> +return devmem_start - 1;
> +} else {
> +return pc_above_4g_end(pcms) - 1;
> +}
> +} else {
> +/* old value for compatibility reasons */
> +return ((hwaddr)1 << cpu->phys_bits) - 1;
> +}
> }
> 
> +/* 64-bit systems */
> return pc_pci_hole64_start() + pci_hole64_size - 1;
> }
> 
> @@ -1867,6 +1894,7 @@ static void pc_machine_class_init(ObjectClass *oc, void 
> *data)
> pcmc->pvh_enabled = true;
> pcmc->kvmclock_c

Re: [PATCH v3] hw/i386/pc: improve physical address space bound check for 32-bit systems

2023-09-21 Thread Ani Sinha




> On 21-Sep-2023, at 1:45 PM, David Hildenbrand  wrote:
> 
> On 21.09.23 09:17, Ani Sinha wrote:
>> 32-bit systems do not have a reserved memory for hole64 and hotplugging 
>> memory
>> devices are not supported on those systems. Therefore, the maximum limit of 
>> the
>> guest physical address in the absence of additional memory devices 
>> effectively
>> coincides with the end of "above 4G memory space" region. When users 
>> configure
>> additional memory devices, we need to properly account for the additional 
>> device
>> memory region so as to find the maximum value of the guest physical address
>> and enforce that it is within the physical address space of the processor. 
>> For
>> 32-bit, this maximum PA will be outside the range of the processor's address
>> space.
>> With this change, for example, previously this was allowed:
>> $ ./qemu-system-x86_64 -cpu pentium -m size=10G
>> Now it is no longer allowed:
>> $ ./qemu-system-x86_64 -cpu pentium -m size=10G
>> qemu-system-x86_64: Address space limit 0x < 0x2bfff phys-bits 
>> too low (32)
>> For 32-bit, hotplugging additional memory is no longer allowed.
> 
> "32-bit without PAE/PSE36"
> 
>> $ ./qemu-system-i386 -m size=1G,maxmem=3G,slots=2
>> qemu-system-i386: Address space limit 0x < 0x1 phys-bits too 
>> low (32)
> 
> We always place the device memory region above 4G. Without PAE/PSE36, you 
> cannot ever possibly make use hotplugged memory, because it would reside > 4g.
> 
> So while the user could have started that QEMU instance, even with an OS that 
> would support memory hotplug, a DIMM above 4G would not have been usable.
> 
> So we're now properly failing for a setup that doesn't make any sense. Good :)
> 
> ... if someone ever cares about making that work, we would have to let the 
> device memory region start below 4g (and obviously, not exceed 4g).
> 
> 
> So while
> 
> ./qemu-system-i386 -m size=1G,maxmem=3G,slots=2
> 
> fails (because pentium cannot access that memory), what should work is
> 
> ./qemu-system-i386 -m size=1G,maxmem=3G,slots=2 -cpu pentium2
> 
> or
> 
> ./qemu-system-i386 -m size=1G,maxmem=3G,slots=2 -cpu pentium,pse36=on
> 
> Because that CPU could actually address that memory somehow (PAE/PSE36).
> 
> 
> So IMHO, we're now forbidding setups that are impossible.
> 
>> The above is still allowed for older machine types in order to support
>> compatibility. Therefore, this still works:
>> $ ./qemu-system-i386 -machine pc-i440fx-8.1 -m size=1G,maxmem=3G,slots=2
> 
> Makes sense. (probably nobody cares, but better safe than sorry)
> 
>> After calling CPUID with EAX=0x8001, all AMD64 compliant processors
>> have the longmode-capable-bit turned on in the extended feature flags (bit 
>> 29)
>> in EDX. The absence of CPUID longmode can be used to differentiate between
>> 32-bit and 64-bit processors and is the recommended approach. QEMU takes this
>> approach elsewhere (for example, please see x86_cpu_realizefn()) and with
>> this change, pc_max_used_gpa() also takes the same approach to detect 32-bit
>> processors.
>> Unit tests are modified to not run those tests that use memory hotplug
>> on 32-bit x86 architecture.
> 
> We could use a different CPU (pentium2) to still run these tests. "pentium2" 
> should work I assume?

Yes it does.

> [...]
> 
>> @@ -907,12 +907,39 @@ static uint64_t pc_get_cxl_range_end(PCMachineState 
>> *pcms)
>>  static hwaddr pc_max_used_gpa(PCMachineState *pcms, uint64_t 
>> pci_hole64_size)
>>  {
>>  X86CPU *cpu = X86_CPU(first_cpu);
>> +PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
>> +MachineState *ms = MACHINE(pcms);
>> +uint64_t devmem_start = 0;
>> +ram_addr_t devmem_size = 0;
>>  -/* 32-bit systems don't have hole64 thus return max CPU address */
>> -if (cpu->phys_bits <= 32) {
>> -return ((hwaddr)1 << cpu->phys_bits) - 1;
>> +/*
>> + * 32-bit systems don't have hole64 but they might have a region for
>> + * memory devices. Even if additional hotplugged memory devices might
>> + * not be usable by most guest OSes, we need to still consider them for
>> + * calculating the highest possible GPA so that we can properly report
>> + * if someone configures them on a CPU that cannot possibly address 
>> them.
>> + */
>> +if (!(cpu->env.features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM)) {
>> +/* 32-bit systems */
>> +if (pcmc->fixed_32bit_mem_addr_check) {
>> +if (pcmc->has_reserved_memory &&
>> +(ms->ram_size < ms->maxram_size)) {
>> +pc_get_device_memory_range(pcms, &devmem_start,
>> +   &devmem_size);
>> +if (!pcmc->broken_reserved_end) {
> 
> I think you can remove that check. "pcmc->fixed_32bit_mem_addr_check && 
> pcmc->broken_reserved_end" can never hold at the same time.

Yeah my bad, I was being too defensive. Will remove.

> 
> broken_reserved_end is only set for QEMU <=

Re: [PATCH v1 09/22] vfio/container: Introduce vfio_[attach/detach]_device

2023-09-21 Thread Cédric Le Goater


On 8/30/23 12:37, Zhenzhong Duan wrote:

From: Eric Auger 

We want the VFIO devices to be able to use two different
IOMMU callbacks, the legacy VFIO one and the new iommufd one.

Introduce vfio_[attach/detach]_device which aim at hiding the
underlying IOMMU backend (IOCTLs, datatypes, ...).

Once vfio_attach_device completes, the device is attached
to a security context and its fd can be used. Conversely
When vfio_detach_device completes, the device has been
detached to the security context.

In this patch, only the vfio-pci device gets converted to use
the new API. Subsequent patches will handle other devices.

Signed-off-by: Eric Auger 
Signed-off-by: Yi Liu 
Signed-off-by: Zhenzhong Duan 
---
  hw/vfio/container.c   | 66 +++
  hw/vfio/pci.c | 50 --
  hw/vfio/trace-events  |  2 +-
  include/hw/vfio/vfio-common.h |  3 ++
  4 files changed, 76 insertions(+), 45 deletions(-)

diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 175cdbbdff..74556da0c7 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -1083,3 +1083,69 @@ int vfio_eeh_as_op(AddressSpace *as, uint32_t op)
  }
  return vfio_eeh_container_op(container, op);
  }
+
+static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp)
+{
+char *tmp, group_path[PATH_MAX], *group_name;
+int ret, groupid;
+ssize_t len;
+
+tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
+len = readlink(tmp, group_path, sizeof(group_path));
+g_free(tmp);
+
+if (len <= 0 || len >= sizeof(group_path)) {
+ret = len < 0 ? -errno : -ENAMETOOLONG;
+error_setg_errno(errp, -ret, "no iommu_group found");
+return ret;
+}
+
+group_path[len] = 0;
+
+group_name = basename(group_path);
+if (sscanf(group_name, "%d", &groupid) != 1) {
+error_setg_errno(errp, errno, "failed to read %s", group_path);
+return -errno;
+}
+return groupid;
+}


VFIO has 4 other  routines reading the iommu_group from sysfs :

  vfio_ccw_get_group()
  vfio_ap_get_group()
  vfio_base_device_init()
  sysfs_find_group_file()

which could use this helper. Thanks for introducing it !




+
+int vfio_attach_device(char *name, VFIODevice *vbasedev,
+   AddressSpace *as, Error **errp)
+{
+int groupid = vfio_device_groupid(vbasedev, errp);
+VFIODevice *vbasedev_iter;
+VFIOGroup *group;
+int ret;
+
+if (groupid < 0) {
+return groupid;
+}
+
+group = vfio_get_group(groupid, as, errp);
+if (!group) {
+return -ENOENT;
+}
+
+QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
+if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
+error_setg(errp, "device is already attached");
+vfio_put_group(group);
+return -EBUSY;
+}
+}
+ret = vfio_get_device(group, name, vbasedev, errp);
+if (ret) {
+vfio_put_group(group);
+}
+
+return ret;
+}
+
+void vfio_detach_device(VFIODevice *vbasedev)
+{
+VFIOGroup *group = vbasedev->group;
+
+vfio_put_base_device(vbasedev);
+vfio_put_group(group);
+}
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index a205c6b113..34f65ecd17 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2828,10 +2828,10 @@ static void vfio_populate_device(VFIOPCIDevice *vdev, 
Error **errp)
  
  static void vfio_put_device(VFIOPCIDevice *vdev)

  {
+vfio_detach_device(&vdev->vbasedev);
+
  g_free(vdev->vbasedev.name);
  g_free(vdev->msix);
-
-vfio_put_base_device(&vdev->vbasedev);
  }
  
  static void vfio_err_notifier_handler(void *opaque)

@@ -2978,13 +2978,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
  {
  VFIOPCIDevice *vdev = VFIO_PCI(pdev);
  VFIODevice *vbasedev = &vdev->vbasedev;
-VFIODevice *vbasedev_iter;
-VFIOGroup *group;
-char *tmp, *subsys, group_path[PATH_MAX], *group_name;
+char *tmp, *subsys;
  Error *err = NULL;
-ssize_t len;
  struct stat st;
-int groupid;
  int i, ret;
  bool is_mdev;
  char uuid[UUID_FMT_LEN];
@@ -3015,38 +3011,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
  vbasedev->type = VFIO_DEVICE_TYPE_PCI;
  vbasedev->dev = DEVICE(vdev);
  
-tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);

-len = readlink(tmp, group_path, sizeof(group_path));
-g_free(tmp);
-
-if (len <= 0 || len >= sizeof(group_path)) {
-error_setg_errno(errp, len < 0 ? errno : ENAMETOOLONG,
- "no iommu_group found");
-goto error;
-}
-
-group_path[len] = 0;
-
-group_name = basename(group_path);
-if (sscanf(group_name, "%d", &groupid) != 1) {
-error_setg_errno(errp, errno, "failed to read %s", group_path);
-goto error;
-}
-
-trace_vfio_realize(vbasedev->name, groupid);
-
-group = vfio_get_group(groupid, pci_device_iommu_address_spa

1 2 3 >

1 - 100 of 236 matches

Mail list logo