[PATCH 0/4] ACPI related fixes

2021-02-04 Thread isaku . yamahata
From: Isaku Yamahata 

Miscellaneous bug fixes related to ACPI to play nice with guest BIOSes/OSes
by conforming to ACPI spec better.

Isaku Yamahata (3):
  acpi/core: always set SCI_EN when SMM isn't supported
  acpi: set fadt.smi_cmd to zero when SMM is not supported
  hw/i386: declare ACPI mother board resource for MMCONFIG region

Sean Christopherson (1):
  i386: acpi: Don't build HPET ACPI entry if HPET is disabled

 hw/acpi/core.c |  11 ++-
 hw/acpi/ich9.c |   2 +-
 hw/acpi/piix4.c|   3 +-
 hw/i386/acpi-build.c   | 188 +++--
 hw/isa/vt82c686.c  |   2 +-
 include/hw/acpi/acpi.h |   4 +-
 6 files changed, 200 insertions(+), 10 deletions(-)

-- 
2.17.1




[PATCH 2/4] acpi: set fadt.smi_cmd to zero when SMM is not supported

2021-02-04 Thread isaku . yamahata
From: Isaku Yamahata 

>From table 5.9 SMI_CMD of ACPI spec
> This field is reserved and must be zero on system
> that does not support System Management mode.

So when smm is not enabled, set it to zero to comform to the spec.

Signed-off-by: Isaku Yamahata 
---
 hw/i386/acpi-build.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index f56d699c7f..005bcc2886 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -139,6 +139,8 @@ const struct AcpiGenericAddress x86_nvdimm_acpi_dsmio = {
 static void init_common_fadt_data(MachineState *ms, Object *o,
   AcpiFadtData *data)
 {
+X86MachineState *x86ms = X86_MACHINE(ms);
+bool smm_enabled = x86_machine_is_smm_enabled(x86ms);
 uint32_t io = object_property_get_uint(o, ACPI_PM_PROP_PM_IO_BASE, NULL);
 AmlAddressSpace as = AML_AS_SYSTEM_IO;
 AcpiFadtData fadt = {
@@ -159,12 +161,12 @@ static void init_common_fadt_data(MachineState *ms, 
Object *o,
 .rtc_century = RTC_CENTURY,
 .plvl2_lat = 0xfff /* C2 state not supported */,
 .plvl3_lat = 0xfff /* C3 state not supported */,
-.smi_cmd = ACPI_PORT_SMI_CMD,
+.smi_cmd = smm_enabled ? ACPI_PORT_SMI_CMD : 0,
 .sci_int = object_property_get_uint(o, ACPI_PM_PROP_SCI_INT, NULL),
 .acpi_enable_cmd =
-object_property_get_uint(o, ACPI_PM_PROP_ACPI_ENABLE_CMD, NULL),
+smm_enabled ? object_property_get_uint(o, 
ACPI_PM_PROP_ACPI_ENABLE_CMD, NULL) : 0,
 .acpi_disable_cmd =
-object_property_get_uint(o, ACPI_PM_PROP_ACPI_DISABLE_CMD, NULL),
+smm_enabled ? object_property_get_uint(o, 
ACPI_PM_PROP_ACPI_DISABLE_CMD, NULL) : 0,
 .pm1a_evt = { .space_id = as, .bit_width = 4 * 8, .address = io },
 .pm1a_cnt = { .space_id = as, .bit_width = 2 * 8,
   .address = io + 0x04 },
-- 
2.17.1




[PATCH 3/4] hw/i386: declare ACPI mother board resource for MMCONFIG region

2021-02-04 Thread isaku . yamahata
From: Isaku Yamahata 

Declare PNP0C01 device to reserve MMCONFIG region to conform to the
spec better and play nice with guest BIOSes/OSes.

According to PCI Firmware Specification, MMCONFIG region must be
reserved by declaring a motherboard resource. It's optional to reserve
the region in memory map by Int 15 E820h or EFIGetMemoryMap.
If guest BIOS doesn't reserve the region in memory map without the
reservation by mother board resource, guest linux abandons to use
MMCFG.

TDVF [0] [1] doesn't reserve MMCONFIG the region in memory map.
On the other hand OVMF reserves it in memory map without declaring a
motherboard resource. With memory map reservation, linux guest uses
MMCONFIG region. However it doesn't comply to PCI Firmware
specification.

[0] TDX: Intel Trust Domain Extension

https://software.intel.com/content/www/us/en/develop/articles/intel-trust-domain-extensions.html
[1] TDX Virtual Firmware
https://github.com/tianocore/edk2-staging/tree/TDVF

Signed-off-by: Isaku Yamahata 
Acked-by: Jiewen Yao 
---
 hw/i386/acpi-build.c | 172 +++
 1 file changed, 172 insertions(+)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 005bcc2886..6e38f67120 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -1062,6 +1062,177 @@ static void build_q35_pci0_int(Aml *table)
 aml_append(table, sb_scope);
 }
 
+static Aml *build_q35_dram_controller(void)
+{
+/*
+ * DSDT is created with revision 1 which means 32bit integer.
+ * When the method of _CRS is called to determine MMCONFIG region,
+ * only port io is allowed to access PCI configuration space.
+ * It means qword access isn't allowed.
+ *
+ * Device(DRAC)
+ * {
+ * Name(_HID, EisaId("PNP0C01"))
+ * OperationRegion(DRR0, PCI_Config, 0x0060, 0x8)
+ * Field(DRR0, DWordAcc, Lock, Preserve)
+ * {
+ * PEBL, 4,
+ * PEBH, 4
+ * }
+ * Name(RBUF, ResourceTemplate()
+ * {
+ * QWordMemory(ResourceConsumer,
+ * PosDecode,
+ * MinFixed,
+ * MaxFixed,
+ * NonCacheable,
+ * ReadWrite,
+ * 0x, // Granularity
+ * 0x, // Range Minimum
+ * 0x, // Range Maxium
+ * 0x, // Translation Offset,
+ * 0x, // Length,
+ * ,,
+ * _MCF,
+ * AddressRangeMemory,
+ * TypeStatic
+ * )
+ * })
+ * Method(_CRS, 0x0, NotSerialized)
+ * {
+ * CreateDWordField(RBUF, DRAC._MCF._MIN, MINL)
+ * CreateDWordField(RBUF, DRAC._MCF._MIN + 4, MINH)
+ * CreateDWordField(RBUF, DRAC._MCF._MAX, MAXL)
+ * CreateDWordField(RBUF, DRAC._MCF._MAX + 4, MAXH)
+ * CreateQWordField(RBUF, DRAC._MCF._LEN, _LEN)
+ *
+ * Local0 = PEBL
+ * Local1 = Local0 & 0x1  // PCIEXBAR PCIEBAREN
+ * Local2 = Local0 & 0x6  // PCIEXBAR LENGTH
+ * Local3 = Local0 & ~0x7 // PCIEXBAR base address low 32bit
+ * Local4 = PEBH  // PCIEXBAR base address high 32bit
+ * If (Local1 == 1) {
+ * MINL = Local3
+ * MINH = Local4
+ * MAXL = Local3
+ * MAXH = Local4
+ *
+ * If (Local2 == 0) {
+ * _LEN = 256 * 1024 * 1024
+ * }
+ * If (Local2 == 0x2) {
+ * _LEN = 128 * 1024 * 1024
+ * }
+ * If (Local2 == 0x4) {
+ * _LEN = 64 * 1024 * 1024
+ * }
+ * }
+ * return (RBUF)
+ * }
+ * }
+ */
+
+Aml *dev;
+Aml *field;
+Aml *rbuf;
+Aml *resource_template;
+Aml *crs;
+
+/* DRAM controller */
+dev = aml_device("DRAC");
+
+aml_append(dev, aml_name_decl("_HID", aml_string("PNP0C01")));
+/* 5.1.6 PCIEXBAR: Bus 0:Device 0:Function 0:offset 0x60 */
+aml_append(dev, aml_operation_region("DRR0", AML_PCI_CONFIG,
+ aml_int(0x0060), 0x8));
+field = aml_field("DRR0", AML_DWORD_ACC, AML_NOLOCK, AML_PRESERVE);
+aml_append(field, aml_named_field("PEBL", 32));
+aml_append(field, aml_named_field("PEBH", 32));
+aml_append(dev, field);
+
+resource_template = aml_resource_template();
+aml_append(resource_template, aml_qword_memory(AML_POS_DECODE,
+   AML_MIN_FIXED,
+   AML_MAX_FIXED,
+ 

[PATCH 1/4] acpi/core: always set SCI_EN when SMM isn't supported

2021-02-04 Thread isaku . yamahata
From: Isaku Yamahata 

If SMM is not supported, ACPI fixed hardware doesn't support
legacy-mode. ACPI-only platform. Where SCI_EN in PM1_CNT register is
always set.
The bit tells OS legacy mode(SCI_EN cleared) or ACPI mode(SCI_EN set).

ACPI spec 4.8.10.1 PM1 Event Grouping
PM1 Eanble Registers
> For ACPI-only platforms (where SCI_EN is always set)

Signed-off-by: Isaku Yamahata 
---
 hw/acpi/core.c | 11 ++-
 hw/acpi/ich9.c |  2 +-
 hw/acpi/piix4.c|  3 ++-
 hw/isa/vt82c686.c  |  2 +-
 include/hw/acpi/acpi.h |  4 +++-
 5 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/hw/acpi/core.c b/hw/acpi/core.c
index 7170bff657..1e004d0078 100644
--- a/hw/acpi/core.c
+++ b/hw/acpi/core.c
@@ -579,6 +579,10 @@ void acpi_pm1_cnt_update(ACPIREGS *ar,
  bool sci_enable, bool sci_disable)
 {
 /* ACPI specs 3.0, 4.7.2.5 */
+if (ar->pm1.cnt.acpi_only) {
+return;
+}
+
 if (sci_enable) {
 ar->pm1.cnt.cnt |= ACPI_BITMASK_SCI_ENABLE;
 } else if (sci_disable) {
@@ -608,11 +612,13 @@ static const MemoryRegionOps acpi_pm_cnt_ops = {
 };
 
 void acpi_pm1_cnt_init(ACPIREGS *ar, MemoryRegion *parent,
-   bool disable_s3, bool disable_s4, uint8_t s4_val)
+   bool disable_s3, bool disable_s4, uint8_t s4_val,
+   bool acpi_only)
 {
 FWCfgState *fw_cfg;
 
 ar->pm1.cnt.s4_val = s4_val;
+ar->pm1.cnt.acpi_only = acpi_only;
 ar->wakeup.notify = acpi_notify_wakeup;
 qemu_register_wakeup_notifier(&ar->wakeup);
 
@@ -638,6 +644,9 @@ void acpi_pm1_cnt_init(ACPIREGS *ar, MemoryRegion *parent,
 void acpi_pm1_cnt_reset(ACPIREGS *ar)
 {
 ar->pm1.cnt.cnt = 0;
+if (ar->pm1.cnt.acpi_only) {
+ar->pm1.cnt.cnt |= ACPI_BITMASK_SCI_ENABLE;
+}
 }
 
 /* ACPI GPE */
diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c
index 5ff4e01c36..1a34d7f621 100644
--- a/hw/acpi/ich9.c
+++ b/hw/acpi/ich9.c
@@ -282,7 +282,7 @@ void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm,
 acpi_pm_tmr_init(&pm->acpi_regs, ich9_pm_update_sci_fn, &pm->io);
 acpi_pm1_evt_init(&pm->acpi_regs, ich9_pm_update_sci_fn, &pm->io);
 acpi_pm1_cnt_init(&pm->acpi_regs, &pm->io, pm->disable_s3, pm->disable_s4,
-  pm->s4_val);
+  pm->s4_val, !smm_enabled);
 
 acpi_gpe_init(&pm->acpi_regs, ICH9_PMIO_GPE0_LEN);
 memory_region_init_io(&pm->io_gpe, OBJECT(lpc_pci), &ich9_gpe_ops, pm,
diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
index 669be5bbf6..0cddf91de5 100644
--- a/hw/acpi/piix4.c
+++ b/hw/acpi/piix4.c
@@ -496,7 +496,8 @@ static void piix4_pm_realize(PCIDevice *dev, Error **errp)
 
 acpi_pm_tmr_init(&s->ar, pm_tmr_timer, &s->io);
 acpi_pm1_evt_init(&s->ar, pm_tmr_timer, &s->io);
-acpi_pm1_cnt_init(&s->ar, &s->io, s->disable_s3, s->disable_s4, s->s4_val);
+acpi_pm1_cnt_init(&s->ar, &s->io, s->disable_s3, s->disable_s4, s->s4_val,
+  !s->smm_enabled);
 acpi_gpe_init(&s->ar, GPE_LEN);
 
 s->powerdown_notifier.notify = piix4_pm_powerdown_req;
diff --git a/hw/isa/vt82c686.c b/hw/isa/vt82c686.c
index a6f5a0843d..071b64b497 100644
--- a/hw/isa/vt82c686.c
+++ b/hw/isa/vt82c686.c
@@ -240,7 +240,7 @@ static void vt82c686b_pm_realize(PCIDevice *dev, Error 
**errp)
 
 acpi_pm_tmr_init(&s->ar, pm_tmr_timer, &s->io);
 acpi_pm1_evt_init(&s->ar, pm_tmr_timer, &s->io);
-acpi_pm1_cnt_init(&s->ar, &s->io, false, false, 2);
+acpi_pm1_cnt_init(&s->ar, &s->io, false, false, 2, false);
 }
 
 static Property via_pm_properties[] = {
diff --git a/include/hw/acpi/acpi.h b/include/hw/acpi/acpi.h
index 22b0b65bb2..9e8a76f2e2 100644
--- a/include/hw/acpi/acpi.h
+++ b/include/hw/acpi/acpi.h
@@ -128,6 +128,7 @@ struct ACPIPM1CNT {
 MemoryRegion io;
 uint16_t cnt;
 uint8_t s4_val;
+bool acpi_only;
 };
 
 struct ACPIGPE {
@@ -163,7 +164,8 @@ void acpi_pm1_evt_init(ACPIREGS *ar, acpi_update_sci_fn 
update_sci,
 
 /* PM1a_CNT: piix and ich9 don't implement PM1b CNT. */
 void acpi_pm1_cnt_init(ACPIREGS *ar, MemoryRegion *parent,
-   bool disable_s3, bool disable_s4, uint8_t s4_val);
+   bool disable_s3, bool disable_s4, uint8_t s4_val,
+   bool acpi_only);
 void acpi_pm1_cnt_update(ACPIREGS *ar,
  bool sci_enable, bool sci_disable);
 void acpi_pm1_cnt_reset(ACPIREGS *ar);
-- 
2.17.1




Re: gitlab containers are broken

2021-02-04 Thread Thomas Huth

On 04/02/2021 07.27, Richard Henderson wrote:

On 2/3/21 8:03 PM, Thomas Huth wrote:

On 04/02/2021 00.04, Richard Henderson wrote:

Something has gone wrong with the building of the containers
in gitlab, because *all* off them are installing Alpine Linux.

https://gitlab.com/rth7680/qemu/-/jobs/1006336396#L155


I think that's ok ... the output about alpine that you see there is just the
output from the container that builds the final container. Later you can see
some "yum install" lines in that output, too, that's where the CentOS container
gets build. And the final compilation job runs on CentOS, too:

  https://gitlab.com/rth7680/qemu/-/jobs/1006336699#L35

(look for the string "Red Hat" there)


Hmm.  Is there any way to get the full output of the container build?  At
present it's being truncated:

#7 [4/5] RUN yum install -y bzip2 bzip2-devel ccache csnappy-de...


In particular, I'm trying to add a new test, and I have added libffi-devel.i686
to the fedora-i386-cross.docker file, but then the actual build fails because
the libffi header file is missing.

I know you may need the actual patch to comment, but pointers to how to debug
this sort of failure are welcome.


I don't have a clue, all that container magic has been done by Daniel 
initially - maybe he can help (now on CC:) ...


 Thomas




[PATCH 4/4] i386: acpi: Don't build HPET ACPI entry if HPET is disabled

2021-02-04 Thread isaku . yamahata
From: Sean Christopherson 

Omit HPET AML if the HPET is disabled, QEMU is not emulating it and the
guest may get confused by seeing HPET in the ACPI tables without a
"physical" device present.

Signed-off-by: Sean Christopherson 
---
 hw/i386/acpi-build.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 6e38f67120..a4fcd14a93 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -1401,7 +1401,9 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
 aml_append(sb_scope, dev);
 aml_append(dsdt, sb_scope);
 
-build_hpet_aml(dsdt);
+if (misc->has_hpet) {
+build_hpet_aml(dsdt);
+}
 build_piix4_isa_bridge(dsdt);
 build_isa_devices_aml(dsdt);
 if (pm->pcihp_bridge_en || pm->pcihp_root_en) {
@@ -1446,7 +1448,9 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
 
 aml_append(dsdt, sb_scope);
 
-build_hpet_aml(dsdt);
+if (misc->has_hpet) {
+build_hpet_aml(dsdt);
+}
 build_q35_isa_bridge(dsdt);
 build_isa_devices_aml(dsdt);
 build_q35_pci0_int(dsdt);
-- 
2.17.1




Re: [PATCH 0/4] ACPI related fixes

2021-02-04 Thread no-reply
Patchew URL: 
https://patchew.org/QEMU/cover.1612424814.git.isaku.yamah...@intel.com/



Hi,

This series seems to have some coding style problems. See output below for
more information:

Type: series
Message-id: cover.1612424814.git.isaku.yamah...@intel.com
Subject: [PATCH 0/4] ACPI related fixes

=== TEST SCRIPT BEGIN ===
#!/bin/bash
git rev-parse base > /dev/null || exit 0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
./scripts/checkpatch.pl --mailback base..
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
From https://github.com/patchew-project/qemu
 * [new tag] patchew/cover.1612424814.git.isaku.yamah...@intel.com -> 
patchew/cover.1612424814.git.isaku.yamah...@intel.com
Switched to a new branch 'test'
8eea4e1 i386: acpi: Don't build HPET ACPI entry if HPET is disabled
4f0ab1b hw/i386: declare ACPI mother board resource for MMCONFIG region
6947812 acpi: set fadt.smi_cmd to zero when SMM is not supported
4ae92fd acpi/core: always set SCI_EN when SMM isn't supported

=== OUTPUT BEGIN ===
1/4 Checking commit 4ae92fd8e8a5 (acpi/core: always set SCI_EN when SMM isn't 
supported)
2/4 Checking commit 6947812c8e69 (acpi: set fadt.smi_cmd to zero when SMM is 
not supported)
ERROR: line over 90 characters
#41: FILE: hw/i386/acpi-build.c:167:
+smm_enabled ? object_property_get_uint(o, 
ACPI_PM_PROP_ACPI_ENABLE_CMD, NULL) : 0,

ERROR: line over 90 characters
#44: FILE: hw/i386/acpi-build.c:169:
+smm_enabled ? object_property_get_uint(o, 
ACPI_PM_PROP_ACPI_DISABLE_CMD, NULL) : 0,

total: 2 errors, 0 warnings, 23 lines checked

Patch 2/4 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

3/4 Checking commit 4f0ab1b9744c (hw/i386: declare ACPI mother board resource 
for MMCONFIG region)
4/4 Checking commit 8eea4e187ac8 (i386: acpi: Don't build HPET ACPI entry if 
HPET is disabled)
=== OUTPUT END ===

Test command exited with code: 1


The full log is available at
http://patchew.org/logs/cover.1612424814.git.isaku.yamah...@intel.com/testing.checkpatch/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

Re: [PATCH] arm: xlnx-versal: fix virtio-mmio base address assignment

2021-02-04 Thread Philippe Mathieu-Daudé
Hi,

Please Cc the maintainers when posting your patch:

./scripts/get_maintainer.pl -f hw/arm/xlnx-versal-virt.c
Alistair Francis  (maintainer:Xilinx ZynqMP and...)
"Edgar E. Iglesias"  (maintainer:Xilinx ZynqMP
and...)
Peter Maydell  (maintainer:Xilinx ZynqMP and...)
qemu-...@nongnu.org (open list:Xilinx ZynqMP and...)

On 2/4/21 7:58 AM, schspa wrote:
> 
> At the moment the following QEMU command line triggers an assertion
> failure On xlnx-versal SOC:
>   qemu-system-aarch64 \
>   -machine xlnx-versal-virt -nographic -smp 2 -m 128 \
>   -fsdev local,id=shareid,path=${HOME}/work,security_model=none \
>   -device virtio-9p-device,fsdev=shareid,mount_tag=share \
>   -fsdev local,id=shareid1,path=${HOME}/Music,security_model=none \
>   -device virtio-9p-device,fsdev=shareid1,mount_tag=share1
> 
>   qemu-system-aarch64: ../migration/savevm.c:860:
>   vmstate_register_with_alias_id:
>   Assertion `!se->compat || se->instance_id == 0' failed.
> 
> This problem was fixed on arm virt platform in patch
>  
> https://lists.nongnu.org/archive/html/qemu-devel/2016-07/msg01119.html

Please use instead "in commit f58b39d2d5b ("virtio-mmio: format
transport base address in BusClass.get_dev_path")".

> It works perfectly on arm virt platform. but there is still there on
> xlnx-versal SOC.
> 
> The main difference between arm virt and xlnx-versal is they use
> different way to create virtio-mmio qdev. on arm virt, it calls
> sysbus_create_simple("virtio-mmio", base, pic[irq]); which will call
> sysbus_mmio_map internally and assign base address to subsys device
> mmio correctly. but xlnx-versal's implements won't do this.
> 
> However, xlnx-versal can't switch to sysbus_create_simple() to create
> virtio-mmio device. It's because xlnx-versal's cpu use
> VersalVirt.soc.fpd.apu.mr as it's memory. which is subregion of
> system_memory. sysbus_create_simple will add virtio to system_memory,
> which can't be accessed by cpu.
> 
> We can solve this by simply assign mmio[0].addr directly. makes
> virtio_mmio_bus_get_dev_path to produce correct unique device path.
> 
> Signed-off-by: schspa 
> ---
>  hw/arm/xlnx-versal-virt.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/hw/arm/xlnx-versal-virt.c b/hw/arm/xlnx-versal-virt.c
> index 8482cd6196..87b92ec6c3 100644
> --- a/hw/arm/xlnx-versal-virt.c
> +++ b/hw/arm/xlnx-versal-virt.c
> @@ -490,6 +490,7 @@ static void create_virtio_regions(VersalVirt *s)
>  object_property_add_child(OBJECT(&s->soc), name, OBJECT(dev));
>  sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
>  sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, pic_irq);
> +    SYS_BUS_DEVICE(dev)->mmio[0].addr = base;

The proper API call is:

   sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base);

>  mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0);
>  memory_region_add_subregion(&s->soc.mr_ps, base, mr);
>  g_free(name);
> 




Re: [PATCH v4 2/3] virtiofsd: optionally return inode pointer from lo_do_lookup()

2021-02-04 Thread Greg Kurz
On Wed, 3 Feb 2021 17:00:06 +
Stefan Hajnoczi  wrote:

> On Wed, Feb 03, 2021 at 03:20:14PM +0100, Greg Kurz wrote:
> > On Wed,  3 Feb 2021 11:37:18 +
> > Stefan Hajnoczi  wrote:
> > 
> > > lo_do_lookup() finds an existing inode or allocates a new one. It
> > > increments nlookup so that the inode stays alive until the client
> > > releases it.
> > > 
> > > Existing callers don't need the struct lo_inode so the function doesn't
> > > return it. Extend the function to optionally return the inode. The next
> > > commit will need it.
> > > 
> > > Signed-off-by: Stefan Hajnoczi 
> > > ---
> > >  tools/virtiofsd/passthrough_ll.c | 29 +
> > >  1 file changed, 21 insertions(+), 8 deletions(-)
> > > 
> > > diff --git a/tools/virtiofsd/passthrough_ll.c 
> > > b/tools/virtiofsd/passthrough_ll.c
> > > index e63cbd3fb7..c87a1f3d72 100644
> > > --- a/tools/virtiofsd/passthrough_ll.c
> > > +++ b/tools/virtiofsd/passthrough_ll.c
> > > @@ -831,11 +831,13 @@ static int do_statx(struct lo_data *lo, int dirfd, 
> > > const char *pathname,
> > >  }
> > >  
> > >  /*
> > > - * Increments nlookup and caller must release refcount using
> > > - * lo_inode_put(&parent).
> > > + * Increments nlookup on the inode on success. unref_inode_lolocked() 
> > > must be
> > > + * called eventually to decrement nlookup again. If inodep is non-NULL, 
> > > the
> > > + * inode pointer is stored and the caller must call lo_inode_put().
> > >   */
> > >  static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char 
> > > *name,
> > > -struct fuse_entry_param *e)
> > > +struct fuse_entry_param *e,
> > > +struct lo_inode **inodep)
> > >  {
> > >  int newfd;
> > >  int res;
> > > @@ -845,6 +847,10 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t 
> > > parent, const char *name,
> > >  struct lo_inode *inode = NULL;
> > >  struct lo_inode *dir = lo_inode(req, parent);
> > >  
> > > +if (inodep) {
> > > +*inodep = NULL;
> > > +}
> > > +
> > 
> > Is this side-effect needed ? If lo_do_lookup() returns an error, it
> > rather seems that the caller shouldn't expect anything to be written
> > here, i.e. the content of *inodep still belongs to the caller and
> > whatever value it previously put in there (as patch 3/3 does) should
> > be preserved IMHO.
> > 
> > Apart from that LGTM.
> 
> I like this approach because it prevents accessing uninitialized memory
> in the caller:
> 
>   struct lo_inode *inode;
> 
>   if (lo_do_lookup(..., &inodep) != 0) {
> goto err;
>   }
>   ...
> 
>   err:
>   lo_inode_put(&inode); <-- uninitialized in the error case!

My point is that it is the caller's business to ensure that inode
doesn't contain garbage if it is to be used irrespective of the
outcome of lo_do_lookup(). This is precisely what patch 3/3 does,
so I don't understand the ultimate purpose of nullifying the
inode pointer _again_ in lo_do_lookup()...


pgpIcABmD1V5b.pgp
Description: OpenPGP digital signature


Re: [PATCH v2 23/36] block: adapt bdrv_append() for inserting filters

2021-02-04 Thread Vladimir Sementsov-Ogievskiy

04.02.2021 00:33, Kevin Wolf wrote:

Am 27.11.2020 um 15:45 hat Vladimir Sementsov-Ogievskiy geschrieben:

bdrv_append is not very good for inserting filters: it does extra
permission update as part of bdrv_set_backing_hd(). During this update
filter may conflict with other parents of top_bs.

Instead, let's first do all graph modifications and after it update
permissions.


This sounds like it fixes a bug. If so, should we have a test like for
the other cases fixed by this series?


Hm. I considered it mostly like a lack not a bug. We just have to workaround this lack by 
"inactive" mode of filters. But adding a test is good idea anyway. Will do.




Note: bdrv_append() is still only works for backing-child based
filters. It's something to improve later.

It simplifies the fact that bdrv_append() used to append new nodes,
without backing child. Let's add an assertion.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  block.c | 28 +---
  1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/block.c b/block.c
index 02da1a90bc..7094922509 100644
--- a/block.c
+++ b/block.c
@@ -4998,22 +4998,28 @@ int bdrv_replace_node(BlockDriverState *from, 
BlockDriverState *to,
  int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
  Error **errp)
  {
-Error *local_err = NULL;
+int ret;
+GSList *tran = NULL;
  
-bdrv_set_backing_hd(bs_new, bs_top, &local_err);

-if (local_err) {
-error_propagate(errp, local_err);
-return -EPERM;
+assert(!bs_new->backing);
+
+ret = bdrv_attach_child_noperm(bs_new, bs_top, "backing",
+   &child_of_bds, bdrv_backing_role(bs_new),
+   &bs_new->backing, &tran, errp);
+if (ret < 0) {
+goto out;
  }


I don't think changing bs->backing without bdrv_set_backing_hd() is
correct at the moment. We lose a few things:

1. The bdrv_is_backing_chain_frozen() check
2. Updating backing_hd->inherits_from if necessary
3. bdrv_refresh_limits()

If I'm not missing anything, all of these are needed in the context of
bdrv_append().


I decided that bdrv_append() is only for appending new nodes, so frozen and 
inherts_from checks are not needed. And I've added assert(!bs_new->backing)...

Checking this now:

- appending filters is obvious
- bdrv_append_temp_snapshot() creates new qcow2 node based on tmp file, don't 
see any backing initialization (and it would be rather strange)
- external_snapshot_prepare() do check if (bdrv_cow_child(state->new_bs)) {  
error-out }

So everything is OK. I should describe it in commit message and add a comment 
to bdrv_append.




-bdrv_replace_node(bs_top, bs_new, &local_err);
-if (local_err) {
-error_propagate(errp, local_err);
-bdrv_set_backing_hd(bs_new, NULL, &error_abort);
-return -EPERM;
+ret = bdrv_replace_node_noperm(bs_top, bs_new, true, &tran, errp);
+if (ret < 0) {
+goto out;
  }
  
-return 0;

+ret = bdrv_refresh_perms(bs_new, errp);
+out:
+tran_finalize(tran, ret);
+
+return ret;
  }


Kevin




--
Best regards,
Vladimir



Re: [PATCH 11/20] ui: add an optional get_flags callback to GraphicHwOps

2021-02-04 Thread Gerd Hoffmann
On Thu, Feb 04, 2021 at 02:21:02AM +0400, Marc-André Lureau wrote:
> On Wed, Feb 3, 2021 at 7:48 PM Gerd Hoffmann  wrote:
> 
> > > +static int
> > > +virtio_gpu_get_flags(void *opaque)
> > > +{
> > > +VirtIOGPUBase *g = opaque;
> > > +int flags = GRAPHIC_FLAGS_NONE;
> > > +
> > > +if (virtio_gpu_virgl_enabled(g->conf))
> > > +flags |= GRAPHIC_FLAGS_GL;
> > > +
> > > +if (virtio_gpu_dmabuf_enabled(g->conf))
> > > +flags |= GRAPHIC_FLAGS_DMABUF;
> >
> > fbe6ba76ac01 ui: add an optional get_flags callback to GraphicHwOps
> > ERROR: braces {} are necessary for all arms of this statement
> > #50: FILE: hw/display/virtio-gpu-base.c:123:
> > +if (virtio_gpu_virgl_enabled(g->conf))
> > [...]
> >
> > ERROR: braces {} are necessary for all arms of this statement
> > #53: FILE: hw/display/virtio-gpu-base.c:126:
> > +if (virtio_gpu_dmabuf_enabled(g->conf))
> > [...]
> >
> > total: 2 errors, 0 warnings, 68 lines checked
> >
> 
> If you queued the series, do you mind squashing a style fix? Otherwise I
> can resend.

Whatever is easier for you (note there are more codestyle warnings in
following patches, see patchew report).

take care,
  Gerd




Re: [PATCH 4/4] hw/usb/bus: Remove the "full-path" property

2021-02-04 Thread Gerd Hoffmann
  Hi,

>  enum USBDeviceFlags {
> -USB_DEV_FLAG_FULL_PATH,
> +USB_DEV_FLAG_FULL_PATH, /* unused since QEMU v6.0 */

Why not just drop it?  Any remaining users?

take care,
  Gerd




Re: [PATCH v4 06/14] qapi/introspect.py: replace 'extra' dict with 'comment' argument

2021-02-04 Thread Markus Armbruster
John Snow  writes:

> On 2/3/21 9:23 AM, Markus Armbruster wrote:
>> John Snow  writes:
>> 
>>> This is only used to pass in a dictionary with a comment already set, so
>>> skip the runaround and just accept the comment.
>>>
>>> This works because _tree_to_qlit() treats 'if': None; 'comment': None
>>> exactly like absent 'if'; 'comment'.
>> Confusing, because the two paragraphs talk about two different
>> things:
>> 1. Actual arguments for @extra are either None or {'comment':
>> comment}.
>> Simplify: replace parameter @extra by parameter @comment.
>> 2. Dumb down the return value to always be of the form
>>  (obj {'if': ifcond, 'comment': comment})
>> 
>
> I think you are drawing attention to the fact that 'if' and 'comment'
> are now always present in this dict instead of conditionally present.

Correct.

> (else, I have misread you. (I think you are missing a comma.))

I am!  I meant to write

(obj, {'if': ifcond, 'comment': comment})

>> I suspect splitting the patch is easier than crafting a clear commit
>> message for the combined one.
>> 
>
> I wouldn't have considered to break out such a small change into two
> even smaller changes, but as you are in charge here ... Okey Dokey.
>
> (meta-tangent: [1])
[...]
> [1] As a matter of process, I sometimes find it cumbersome to
> intentionally engineer an intermediary state when I jumped straight
> from A->C in my actual editing.

Yes, the extra work can be cumbersome.  But then writing a neat commit
message for a commit that does two things can also be cumbersome.
"Split and write two straightforward commit messages" has proven easier
for me many times.

> I will usually keep such intermediary forms when they come about
> naturally in the course of development, but rarely seek to add them 
> artificially -- it feels like a major bummer to engineer, test, and
> scrutinize code that's only bound to be deleted immediately after. 
> Sometimes, it feels like a waste of reviewer effort, too.

It depends.  Sometimes "don't split and write a complicated commit
message" is easier.

Which way you get to "commit message(s) don't confuse Markus" in this
particular case is up to you :)

> It's been years and I still don't think I have any real intuitive
> sense for this, which is ...unfortunate.

It's been years, and my intuition still evolves.




Re:Re: [PATCH] blockjob: Fix crash with IOthread when block commit after snapshot

2021-02-04 Thread Michael Qiu
Hi, Kevin


Any comments about this patch? 
The lock release action is added by the commit 132ada80 "block: Adjust 
AioContexts when attaching nodes"


My patch is to avoid some crash case., and indeed touch the code about that 
commit.


Thanks,
Michael
At 2021-02-03 15:45:07, "Vladimir Sementsov-Ogievskiy" 
 wrote:
>subject should start with [PATCH v5]
>
>03.02.2021 05:40, 08005...@163.com wrote:
>> From: Michael Qiu 
>> 
>> v5: reformat the commit log with backtrace of main thread
>>  Add a boolean variable to make main thread could re-acquire
>>  aio_context on success path.
>> 
>> v4: rebase to latest code
>> 
>> v3: reformat the commit log, remove duplicate content
>
>patch history shouldn't go into commit message. So you should place it under 
>'---' [*], after calling git format-patch
>
>> 
>> Currently, if guest has workloads, IO thread will acquire aio_context
>> lock before do io_submit, it leads to segmentfault when do block commit
>> after snapshot. Just like below:
>> 
>> Program received signal SIGSEGV, Segmentation fault.
>> 
>> [Switching to Thread 0x7f7c7d91f700 (LWP 99907)]
>> 0x5576d0f65aab in bdrv_mirror_top_pwritev at ../block/mirror.c:1437
>> 1437../block/mirror.c: No such file or directory.
>> (gdb) p s->job
>> $17 = (MirrorBlockJob *) 0x0
>> (gdb) p s->stop
>> $18 = false
>> 
>> Call trace of IO thread:
>> 0  0x5576d0f65aab in bdrv_mirror_top_pwritev at ../block/mirror.c:1437
>> 1  0x5576d0f7f3ab in bdrv_driver_pwritev at ../block/io.c:1174
>> 2  0x5576d0f8139d in bdrv_aligned_pwritev at ../block/io.c:1988
>> 3  0x5576d0f81b65 in bdrv_co_pwritev_part at ../block/io.c:2156
>> 4  0x5576d0f8e6b7 in blk_do_pwritev_part at ../block/block-backend.c:1260
>> 5  0x5576d0f8e84d in blk_aio_write_entry at ../block/block-backend.c:1476
>> ...
>> 
>> Switch to qemu main thread:
>> 0  0x7f903be704ed in __lll_lock_wait at
>> /lib/../lib64/libpthread.so.0
>> 1  0x7f903be6bde6 in _L_lock_941 at /lib/../lib64/libpthread.so.0
>> 2  0x7f903be6bcdf in pthread_mutex_lock at
>> /lib/../lib64/libpthread.so.0
>> 3  0x564b21456889 in qemu_mutex_lock_impl at
>> ../util/qemu-thread-posix.c:79
>> 4  0x564b213af8a5 in block_job_add_bdrv at ../blockjob.c:224
>> 5  0x564b213b00ad in block_job_create at ../blockjob.c:440
>> 6  0x564b21357c0a in mirror_start_job at ../block/mirror.c:1622
>> 7  0x564b2135a9af in commit_active_start at ../block/mirror.c:1867
>> 8  0x564b2133d132 in qmp_block_commit at ../blockdev.c:2768
>> 9  0x564b2141fef3 in qmp_marshal_block_commit at
>> qapi/qapi-commands-block-core.c:346
>> 10 0x564b214503c9 in do_qmp_dispatch_bh at
>> ../qapi/qmp-dispatch.c:110
>> 11 0x564b21451996 in aio_bh_poll at ../util/async.c:164
>> 12 0x564b2146018e in aio_dispatch at ../util/aio-posix.c:381
>> 13 0x564b2145187e in aio_ctx_dispatch at ../util/async.c:306
>> 14 0x7f9040239049 in g_main_context_dispatch at
>> /lib/../lib64/libglib-2.0.so.0
>> 15 0x564b21447368 in main_loop_wait at ../util/main-loop.c:232
>> 16 0x564b21447368 in main_loop_wait at ../util/main-loop.c:255
>> 17 0x564b21447368 in main_loop_wait at ../util/main-loop.c:531
>> 18 0x564b212304e1 in qemu_main_loop at ../softmmu/runstate.c:721
>> 19 0x564b20f7975e in main at ../softmmu/main.c:50
>> 
>> In IO thread when do bdrv_mirror_top_pwritev, the job is NULL, and stop field
>> is false, this means the MirrorBDSOpaque "s" object has not been initialized
>> yet, and this object is initialized by block_job_create(), but the initialize
>> process is stuck in acquiring the lock.
>> 
>> In this situation, IO thread come to bdrv_mirror_top_pwritev(),which means 
>> that
>> mirror-top node is already inserted into block graph, but its bs->opaque->job
>> is not initialized.
>> 
>> The root cause is that qemu main thread do release/acquire when hold the 
>> lock,
>> at the same time, IO thread get the lock after release stage, and the crash
>> occured.
>> 
>> Actually, in this situation, job->job.aio_context will not equal to
>> qemu_get_aio_context(), and will be the same as bs->aio_context,
>> thus, no need to release the lock, becasue bdrv_root_attach_child()
>> will not change the context.
>> 
>> This patch fix this issue.
>> 
>> Fixes: 132ada80 "block: Adjust AioContexts when attaching nodes"
>> 
>> Signed-off-by: Michael Qiu 
>
>I feel like there may be more problems (like the fact that drained section 
>should be expanded, and
>that expanding doesn't help as Michael said), but I think that temporary 
>releasing locks is unsafe
>thing, and if we can avoid it for some cases it's good, especially if it fixes 
>some bug:
>
>Reviewed-by: Vladimir Sementsov-Ogievskiy 
>
>> ---
>
>[*] patch history and anything that you don't want to put into final commit 
>message goes here.
>
>>   blockjob.c | 10 --
>>   1 file changed, 8 insertions(+), 2 deletions(-)
>> 
>> diff --git a/blockjob.c b/blockjob.c
>> index db3a21699c..d9dca36f65 1

Re: [PULL 11/24] tcg/optimize: Use tcg_constant_internal with constant folding

2021-02-04 Thread David Hildenbrand

On 04.02.21 08:55, David Hildenbrand wrote:

On 04.02.21 07:41, David Hildenbrand wrote:



Am 04.02.2021 um 03:22 schrieb Richard Henderson :

On 2/1/21 10:45 AM, Richard W.M. Jones wrote:

This commit breaks running certain s390x binaries, at least
the "mount" command (or a library it uses) breaks.

More details in this BZ:

https://bugzilla.redhat.com/show_bug.cgi?id=1922248

Could we revert this change since it seems to have caused other
problems as well?


Well, the other problems have been fixed (which were in fact latent, and could
have been produced by other means).  I would not like to sideline this patch
set indefinitely.

Could you give me some help extracting the relevant binaries?  "Begin with an
s390x host" is a non-starter.



Hi,

I‘m planning on reproducing it today or tomorrow. Especially, finding a 
reproducer and trying reproducing on x86-64 host.


FWIW, on an x86-64 host, I can boot F32, Fedora rawhide, and RHEL8.X
just fine from qcow2 (so "mount" seems to work in that environment as
expected). Maybe it's really s390x-host specific? I'll give it a try.



F33 qcow2 [1] fails booting on an s390x/TCG host.

I tried "-cpu qemu" and "-qemu qemu=vx=off". The same image boots on 
x86-64/TCG host just fine.



With

commit 8f17a975e60b773d7c366a81c0d9bbe304f30859
Author: Richard Henderson 
Date:   Mon Mar 30 19:52:02 2020 -0700

tcg/optimize: Adjust TempOptInfo allocation

The image boots just fine on s390x/TCG as well.


[1] 
https://dl.fedoraproject.org/pub/fedora-secondary/releases/33/Cloud/s390x/images/Fedora-Cloud-Base-33-1.2.s390x.qcow2


--
Thanks,

David / dhildenb




Re: [PATCH 1/3] tests/acceptance: Move the pseries test to a separate file

2021-02-04 Thread Philippe Mathieu-Daudé
Hi Thomas,

On 1/13/21 6:30 AM, Thomas Huth wrote:
> On 12/01/2021 19.50, Wainer dos Santos Moschetta wrote:
>> Hi,
>>
>> On 1/12/21 1:40 PM, Thomas Huth wrote:
>>> Let's gather the POWER-related tests in a separate file.
>>
>>
>> Did you consider having others ppc/ppc64 boot tests together too?
>>
>> Some candidates:
>>
>> tests/acceptance/boot_linux.py:BootLinuxPPC64.test_pseries_tcg
>> tests/acceptance/boot_linux_console.py:BootLinuxConsole.test_ppc64_e500
>> tests/acceptance/boot_linux_console.py:BootLinuxConsole.test_ppc_g3beige
>> tests/acceptance/boot_linux_console.py:BootLinuxConsole.test_ppc_mac99
> 
> The e500, g3beige and mac99 tests are depending on the
> do_test_advcal_2018() function in that file, so I think they should
> rather stay there.
> 
>> tests/acceptance/ppc_prep_40p.py:IbmPrep40pMachine.test_factory_firmware_and_netbsd
>> tests/acceptance/ppc_prep_40p.py:IbmPrep40pMachine.test_openbios_192m
>> tests/acceptance/ppc_prep_40p.py:IbmPrep40pMachine.test_openbios_and_netbsd
>>
> 
> That's a good point, I did not notice that file when writing my patches.
> Philippe, since you've created this ppc_prep_40p.py file, what do you
> think, should it be merged with the other ppc tests, or shall we rather
> keep this separate?

The choice was deliberate: the PReP machine has a different set of
maintainers. If possible when we have a particular section in
MAINTAINERS I'd like to use it as separation, to let the maintainers
track changes in tests.

In this example, Hervé is interested to look for PReP related files,
but doesn't have bandwidth to look at all PPC patches.

If this doesn't scale, I suggested (was it on the list or directly
to Willian?) to add a Python script to map Avocado test tags to
MAINTAINERS entry, so 1/ maintainers could run all tests linked to
their subsystem by naming the subsystem, and 2/ when a test fails
we know which maintainer to contact.

Regards,

Phil.



Re: [PATCH 4/4] i386: acpi: Don't build HPET ACPI entry if HPET is disabled

2021-02-04 Thread Philippe Mathieu-Daudé
On 2/4/21 9:04 AM, isaku.yamah...@gmail.com wrote:
> From: Sean Christopherson 
> 
> Omit HPET AML if the HPET is disabled, QEMU is not emulating it and the
> guest may get confused by seeing HPET in the ACPI tables without a
> "physical" device present.
> 
> Signed-off-by: Sean Christopherson 
> ---
>  hw/i386/acpi-build.c | 8 ++--
>  1 file changed, 6 insertions(+), 2 deletions(-)

Reviewed-by: Philippe Mathieu-Daudé 




Re: [RFC 08/10] vhost: Add x-vhost-enable-shadow-vq qmp

2021-02-04 Thread Eugenio Perez Martin
On Tue, Feb 2, 2021 at 4:38 PM Eric Blake  wrote:
>
> On 1/29/21 2:54 PM, Eugenio Pérez wrote:
> > Command to enable shadow virtqueue looks like:
> >
> > { "execute": "x-vhost-enable-shadow-vq", "arguments": { "name": "dev0", 
> > "enable": true } }
> >
> > Signed-off-by: Eugenio Pérez 
> > ---
> >  qapi/net.json | 23 +++
> >  hw/virtio/vhost.c |  6 ++
> >  2 files changed, 29 insertions(+)
> >
> > diff --git a/qapi/net.json b/qapi/net.json
> > index c31748c87f..6170d69798 100644
> > --- a/qapi/net.json
> > +++ b/qapi/net.json
> > @@ -77,6 +77,29 @@
> >  ##
> >  { 'command': 'netdev_del', 'data': {'id': 'str'} }
> >
> > +##
> > +# @x-vhost-enable-shadow-vq:
>
> This spelling is the preferred form...[1]
>
> > +#
> > +# Use vhost shadow virtqueue.
> > +#
> > +# @name: the device name of the virtual network adapter
> > +#
> > +# @enable: true to use he alternate shadow VQ notification path
> > +#
> > +# Returns: Error if failure, or 'no error' for success
>
> This line...[2]
>
> > +#
> > +# Since: 5.3
>
> The next release is 6.0, not 5.3.
>
> > +#
> > +# Example:
> > +#
> > +# -> { "execute": "x-vhost_enable_shadow_vq", "arguments": {"enable": 
> > true} }
>
> [1]...but doesn't match the example.
>
> > +# <- { "return": { "enabled" : true } }
>
> [2]...doesn't match this comment.  I'd just drop the line, since there
> is no explicit return listed.
>

Hi Eric.

Thanks for your comments, they will be addressed in the next revision.

> > +#
> > +##
> > +{ 'command': 'x-vhost-enable-shadow-vq',
> > +  'data': {'name': 'str', 'enable': 'bool'},
> > +  'if': 'defined(CONFIG_VHOST_KERNEL)' }
> > +
> >  ##
> >  # @NetLegacyNicOptions:
> >  #
> > diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> > index 040f68ff2e..42836e45f3 100644
> > --- a/hw/virtio/vhost.c
> > +++ b/hw/virtio/vhost.c
> > @@ -15,6 +15,7 @@
> >
> >  #include "qemu/osdep.h"
> >  #include "qapi/error.h"
> > +#include "qapi/qapi-commands-net.h"
> >  #include "hw/virtio/vhost.h"
> >  #include "qemu/atomic.h"
> >  #include "qemu/range.h"
> > @@ -1841,3 +1842,8 @@ int vhost_net_set_backend(struct vhost_dev *hdev,
> >
> >  return -1;
> >  }
> > +
> > +void qmp_x_vhost_enable_shadow_vq(const char *name, bool enable, Error 
> > **errp)
> > +{
> > +error_setg(errp, "Shadow virtqueue still not implemented.");
>
> error_setg() should not be passed a trailing '.'.
>

Oh, sorry I missed the comment in the error_setg doc.

I copy&pasted from the call to error_setg "Migration disabled: vhost
lacks VHOST_F_LOG_ALL feature.". I'm wondering if it's a good moment
to delete the dot there too, since other tools could depend on parsing
it.

Thanks!

> --
> Eric Blake, Principal Software Engineer
> Red Hat, Inc.   +1-919-301-3226
> Virtualization:  qemu.org | libvirt.org
>




Re: [PULL 11/24] tcg/optimize: Use tcg_constant_internal with constant folding

2021-02-04 Thread Richard W.M. Jones
On Thu, Feb 04, 2021 at 09:38:45AM +0100, David Hildenbrand wrote:
> On 04.02.21 08:55, David Hildenbrand wrote:
> >On 04.02.21 07:41, David Hildenbrand wrote:
> >>
> >>>Am 04.02.2021 um 03:22 schrieb Richard Henderson 
> >>>:
> >>>
> >>>On 2/1/21 10:45 AM, Richard W.M. Jones wrote:
> This commit breaks running certain s390x binaries, at least
> the "mount" command (or a library it uses) breaks.
> 
> More details in this BZ:
> 
> https://bugzilla.redhat.com/show_bug.cgi?id=1922248
> 
> Could we revert this change since it seems to have caused other
> problems as well?
> >>>
> >>>Well, the other problems have been fixed (which were in fact latent, and 
> >>>could
> >>>have been produced by other means).  I would not like to sideline this 
> >>>patch
> >>>set indefinitely.
> >>>
> >>>Could you give me some help extracting the relevant binaries?  "Begin with 
> >>>an
> >>>s390x host" is a non-starter.
> >>>
> >>
> >>Hi,
> >>
> >>I‘m planning on reproducing it today or tomorrow. Especially, finding a 
> >>reproducer and trying reproducing on x86-64 host.
> >
> >FWIW, on an x86-64 host, I can boot F32, Fedora rawhide, and RHEL8.X
> >just fine from qcow2 (so "mount" seems to work in that environment as
> >expected). Maybe it's really s390x-host specific? I'll give it a try.
> >
> 
> F33 qcow2 [1] fails booting on an s390x/TCG host.

What did the failure look like?

> I tried "-cpu qemu" and "-qemu qemu=vx=off". The same image boots on
> x86-64/TCG host just fine.
> 
> 
> With
> 
> commit 8f17a975e60b773d7c366a81c0d9bbe304f30859
> Author: Richard Henderson 
> Date:   Mon Mar 30 19:52:02 2020 -0700
> 
> tcg/optimize: Adjust TempOptInfo allocation
> 
> The image boots just fine on s390x/TCG as well.

Let me try this in a minute on my original test machine.

Rich.

> 
> [1] 
> https://dl.fedoraproject.org/pub/fedora-secondary/releases/33/Cloud/s390x/images/Fedora-Cloud-Base-33-1.2.s390x.qcow2
> 
> -- 
> Thanks,
> 
> David / dhildenb

-- 
Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones
Read my programming and virtualization blog: http://rwmj.wordpress.com
virt-top is 'top' for virtual machines.  Tiny program with many
powerful monitoring features, net stats, disk stats, logging, etc.
http://people.redhat.com/~rjones/virt-top




Re: [PATCH 01/12] ui: Replace the word 'whitelist'

2021-02-04 Thread Philippe Mathieu-Daudé
On 2/3/21 11:04 AM, Daniel P. Berrangé wrote:
> On Tue, Feb 02, 2021 at 09:58:13PM +0100, Philippe Mathieu-Daudé wrote:
>> Follow the inclusive terminology from the "Conscious Language in your
>> Open Source Projects" guidelines [*] and replace the words "whitelist"
>> appropriately.
>>
>> [*] https://github.com/conscious-lang/conscious-lang-docs/blob/main/faq.md
>>
>> Signed-off-by: Philippe Mathieu-Daudé 
>> ---
>>  ui/console.c   | 2 +-
>>  ui/vnc-auth-sasl.c | 4 ++--
>>  2 files changed, 3 insertions(+), 3 deletions(-)
>>

>> diff --git a/ui/vnc-auth-sasl.c b/ui/vnc-auth-sasl.c
>> index f67111a3662..dde4b8d4144 100644
>> --- a/ui/vnc-auth-sasl.c
>> +++ b/ui/vnc-auth-sasl.c
>> @@ -288,7 +288,7 @@ static int protocol_client_auth_sasl_step(VncState *vs, 
>> uint8_t *data, size_t le
>>  goto authreject;
>>  }
>>  
>> -/* Check username whitelist ACL */
>> +/* Check username allowlist ACL */
> 
> ACL expands to "access control list" so this original comment
> was already redundant, and so is the replacement. Using
> acronyms is bad practice, so I'd suggest we go for
> 
>   "Check the username access control list"

OK will do, thanks.




Re: [PATCH] arm: xlnx-versal: fix virtio-mmio base address assignment

2021-02-04 Thread schspa
On Thu, 2021-02-04 at 09:19 +0100, Philippe Mathieu-Daudé wrote:
> Hi,
> 
> Please Cc the maintainers when posting your patch:
> 
> ./scripts/get_maintainer.pl -f hw/arm/xlnx-versal-virt.c
> Alistair Francis  (maintainer:Xilinx ZynqMP
> and...)
> "Edgar E. Iglesias"  (maintainer:Xilinx
> ZynqMP
> and...)
> Peter Maydell  (maintainer:Xilinx ZynqMP
> and...)
> qemu-...@nongnu.org (open list:Xilinx ZynqMP and...)
> 

Thanks for reminding, I will pay attention next time

> On 2/4/21 7:58 AM, schspa wrote:
> > 
> > At the moment the following QEMU command line triggers an assertion
> > failure On xlnx-versal SOC:
> >   qemu-system-aarch64 \
> >   -machine xlnx-versal-virt -nographic -smp 2 -m 128 \
> >   -fsdev local,id=shareid,path=${HOME}/work,security_model=none
> > \
> >   -device virtio-9p-device,fsdev=shareid,mount_tag=share \
> >   -fsdev
> > local,id=shareid1,path=${HOME}/Music,security_model=none \
> >   -device virtio-9p-device,fsdev=shareid1,mount_tag=share1
> > 
> >   qemu-system-aarch64: ../migration/savevm.c:860:
> >   vmstate_register_with_alias_id:
> >   Assertion `!se->compat || se->instance_id == 0' failed.
> > 
> > This problem was fixed on arm virt platform in patch
> >  
> >  
> > https://lists.nongnu.org/archive/html/qemu-devel/2016-07/msg01119.html
> 
> Please use instead "in commit f58b39d2d5b ("virtio-mmio: format
> transport base address in BusClass.get_dev_path")".
> 

Thanks, I will upload a new patch to fix it if there is no need to do
further change for the next question.

> > It works perfectly on arm virt platform. but there is still there
> > on
> > xlnx-versal SOC.
> > 
> > The main difference between arm virt and xlnx-versal is they use
> > different way to create virtio-mmio qdev. on arm virt, it calls
> > sysbus_create_simple("virtio-mmio", base, pic[irq]); which will
> > call
> > sysbus_mmio_map internally and assign base address to subsys device
> > mmio correctly. but xlnx-versal's implements won't do this.
> > 
> > However, xlnx-versal can't switch to sysbus_create_simple() to
> > create
> > virtio-mmio device. It's because xlnx-versal's cpu use
> > VersalVirt.soc.fpd.apu.mr as it's memory. which is subregion of
> > system_memory. sysbus_create_simple will add virtio to
> > system_memory,
> > which can't be accessed by cpu.
> > 
> > We can solve this by simply assign mmio[0].addr directly. makes
> > virtio_mmio_bus_get_dev_path to produce correct unique device path.
> > 
> > Signed-off-by: schspa 
> > ---
> >  hw/arm/xlnx-versal-virt.c | 1 +
> >  1 file changed, 1 insertion(+)
> > 
> > diff --git a/hw/arm/xlnx-versal-virt.c b/hw/arm/xlnx-versal-virt.c
> > index 8482cd6196..87b92ec6c3 100644
> > --- a/hw/arm/xlnx-versal-virt.c
> > +++ b/hw/arm/xlnx-versal-virt.c
> > @@ -490,6 +490,7 @@ static void create_virtio_regions(VersalVirt
> > *s)
> >  object_property_add_child(OBJECT(&s->soc), name,
> > OBJECT(dev));
> >  sysbus_realize_and_unref(SYS_BUS_DEVICE(dev),
> > &error_fatal);
> >  sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, pic_irq);
> > +    SYS_BUS_DEVICE(dev)->mmio[0].addr = base;
> 
> The proper API call is:
> 
>    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base);
> 

Can't to call this API, because this api will map virtio device memory
region to system_map. and it can't be add to &s->soc.mr_ps again. I'm
willing to change it to proper api but can't find a proper one.

> >  mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0);
> >  memory_region_add_subregion(&s->soc.mr_ps, base, mr);
> >  g_free(name);
> > 
> 

-- 
schspa 




Re: [PATCH v2 23/36] block: adapt bdrv_append() for inserting filters

2021-02-04 Thread Kevin Wolf
Am 04.02.2021 um 09:30 hat Vladimir Sementsov-Ogievskiy geschrieben:
> 04.02.2021 00:33, Kevin Wolf wrote:
> > Am 27.11.2020 um 15:45 hat Vladimir Sementsov-Ogievskiy geschrieben:
> > >   int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
> > >   Error **errp)
> > >   {
> > > -Error *local_err = NULL;
> > > +int ret;
> > > +GSList *tran = NULL;
> > > -bdrv_set_backing_hd(bs_new, bs_top, &local_err);
> > > -if (local_err) {
> > > -error_propagate(errp, local_err);
> > > -return -EPERM;
> > > +assert(!bs_new->backing);
> > > +
> > > +ret = bdrv_attach_child_noperm(bs_new, bs_top, "backing",
> > > +   &child_of_bds, 
> > > bdrv_backing_role(bs_new),
> > > +   &bs_new->backing, &tran, errp);
> > > +if (ret < 0) {
> > > +goto out;
> > >   }
> > 
> > I don't think changing bs->backing without bdrv_set_backing_hd() is
> > correct at the moment. We lose a few things:
> > 
> > 1. The bdrv_is_backing_chain_frozen() check
> > 2. Updating backing_hd->inherits_from if necessary
> > 3. bdrv_refresh_limits()
> > 
> > If I'm not missing anything, all of these are needed in the context of
> > bdrv_append().
> 
> I decided that bdrv_append() is only for appending new nodes, so
> frozen and inherts_from checks are not needed. And I've added
> assert(!bs_new->backing)...
> 
> Checking this now:
> 
> - appending filters is obvious
> - bdrv_append_temp_snapshot() creates new qcow2 node based on tmp
>   file, don't see any backing initialization (and it would be rather
>   strange)

Yes, the internal uses are obviously unproblematic for the frozen check.

> - external_snapshot_prepare() do check if
>   (bdrv_cow_child(state->new_bs)) {  error-out }

Ok, the only thing bdrv_set_backing_hd() can and must check is whether
the link to the old backing file was frozen, and we know that we don't
have an old backing file. Makes sense.

Same thing for inherits_from, we only do this if the the new backing
file (i.e. the old active layer for bdrv_append) was already in the
backing chain of the new node.

> So everything is OK. I should describe it in commit message and add a
> comment to bdrv_append.

What about bdrv_refresh_limits()? The node gains a new backing file, so
I think the limits could change.

Ideally, bdrv_child_cb_attach/detach() would take care of this, but at
the moment they don't.

Kevin




Re: [PATCH 09/12] qemu-options: Replace the word 'blacklist'

2021-02-04 Thread Philippe Mathieu-Daudé
On 2/3/21 11:25 AM, Daniel P. Berrangé wrote:
> On Tue, Feb 02, 2021 at 09:58:21PM +0100, Philippe Mathieu-Daudé wrote:
>> Follow the inclusive terminology from the "Conscious Language in your
>> Open Source Projects" guidelines [*] and replace the word "blacklist"
>> appropriately.
>>
>> [*] https://github.com/conscious-lang/conscious-lang-docs/blob/main/faq.md
>>
>> Signed-off-by: Philippe Mathieu-Daudé 
>> ---
>>  qemu-options.hx | 4 ++--
>>  1 file changed, 2 insertions(+), 2 deletions(-)
>>
>> diff --git a/qemu-options.hx b/qemu-options.hx
>> index d0410f05125..75997ee2ea6 100644
>> --- a/qemu-options.hx
>> +++ b/qemu-options.hx
>> @@ -4275,11 +4275,11 @@ DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \
>>  "by the kernel, but typically no longer used by 
>> modern\n" \
>>  "C library implementations.\n" \
>>  "use 'elevateprivileges' to allow or deny QEMU process 
>> to elevate\n" \
>> -"its privileges by blacklisting all set*uid|gid 
>> system calls.\n" \
>> +"its privileges by denylisting all set*uid|gid 
>> system calls.\n" \
> 
> The original description is a bit wierd in how it reads/explains it, so
> I think it needs bigger changes:
> 
> "use 'elevateprivileges' to allow or deny the QEMU 
> process ability
> "to elevate privileges using set*uid|gid system calls.\n" 
> \
> 
>>  "The value 'children' will deny set*uid|gid system 
>> calls for\n" \
>>  "main QEMU process but will allow forks and execves 
>> to run unprivileged\n" \
>>  "use 'spawn' to avoid QEMU to spawn new threads or 
>> processes by\n" \
>> -" blacklisting *fork and execve\n" \
>> +" denylisting *fork and execve\n" \
> 
> denylisting is a very strange term to use - its not really a word IMHO.
> Better as
> 
> " preventing *fork and execve\n" \
> 
> or
> 
> " blocking *fork and execve\n" \

While 'preventing' sounds nicer, 'blocking' is simpler to understand
from a technical English speaker, so I took your 2nd suggestion, thanks.




Re: [PULL 11/24] tcg/optimize: Use tcg_constant_internal with constant folding

2021-02-04 Thread David Hildenbrand

On 04.02.21 10:03, Richard W.M. Jones wrote:

On Thu, Feb 04, 2021 at 09:38:45AM +0100, David Hildenbrand wrote:

On 04.02.21 08:55, David Hildenbrand wrote:

On 04.02.21 07:41, David Hildenbrand wrote:



Am 04.02.2021 um 03:22 schrieb Richard Henderson :

On 2/1/21 10:45 AM, Richard W.M. Jones wrote:

This commit breaks running certain s390x binaries, at least
the "mount" command (or a library it uses) breaks.

More details in this BZ:

https://bugzilla.redhat.com/show_bug.cgi?id=1922248

Could we revert this change since it seems to have caused other
problems as well?


Well, the other problems have been fixed (which were in fact latent, and could
have been produced by other means).  I would not like to sideline this patch
set indefinitely.

Could you give me some help extracting the relevant binaries?  "Begin with an
s390x host" is a non-starter.



Hi,

I‘m planning on reproducing it today or tomorrow. Especially, finding a 
reproducer and trying reproducing on x86-64 host.


FWIW, on an x86-64 host, I can boot F32, Fedora rawhide, and RHEL8.X
just fine from qcow2 (so "mount" seems to work in that environment as
expected). Maybe it's really s390x-host specific? I'll give it a try.



F33 qcow2 [1] fails booting on an s390x/TCG host.


What did the failure look like?


It starts booting just fine until

[   10.869011] Core dump to |/bin/false pipe failed
[   10.915968] systemd[1]: Finished Create list of static device nodes for the 
current kernel.
[   10.946424] systemd[1]: systemd-journald.service: Main process exited, 
code=killed, status=31/SYS
[   10.966677] systemd[1]: systemd-journald.service: Failed with result 
'signal'.
[   11.017545] systemd[1]: Failed to start Journal Service.
[FAILED] Failed to start Journal Service.
See 'systemctl status systemd-journald.service' for details.

which repeats a couple of times. Then things go nuts

[   32.488899] systemd[1]: Failed to start Rule-based Manager for Device Events 
and Files.
[FAILED] Failed to start Rule-based…r for Device Events and Files.
See 'systemctl status systemd-udevd.service' for details.
[   32.501449] systemd[1]: systemd-udevd.service: Scheduled restart job, 
restart counter is at 1.
[   32.502134] systemd[1]: Stopped Rule-based Manager for Device Events and 
Files.


Looks also related to /dev / udev.




I tried "-cpu qemu" and "-qemu qemu=vx=off". The same image boots on
x86-64/TCG host just fine.


With

commit 8f17a975e60b773d7c366a81c0d9bbe304f30859
Author: Richard Henderson 
Date:   Mon Mar 30 19:52:02 2020 -0700

 tcg/optimize: Adjust TempOptInfo allocation

The image boots just fine on s390x/TCG as well.


Let me try this in a minute on my original test machine.


That's the commit exactly before the problematic one (didn't want to mess with 
reverts for now).

--
Thanks,

David / dhildenb




[Bug 1912224] Re: qemu may freeze during drive-mirroring on fragmented FS

2021-02-04 Thread Alexandre arents
It helps a lot, and it goes fast !

live block migration of 400GB raw disk:
master  1130s (362MB/s) qemu  unstable/frozen
master+fix  445s  (920MB/s) qemu  stable

Thanks Max, It will be nice to have this one merged.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1912224

Title:
  qemu may freeze during drive-mirroring on fragmented FS

Status in QEMU:
  New

Bug description:
  
  We have odd behavior in operation where qemu freeze during long
  seconds, We started an thread about that issue here:
  https://lists.gnu.org/archive/html/qemu-devel/2020-11/msg05623.html

  It happens at least during openstack nova snapshot (qemu blockdev-mirror)
  or live block migration(which include network copy of disk).

  After further troubleshoots, it seems related to FS fragmentation on
  host.

  reproducible at least on:
  Ubuntu 18.04.3/4.18.0-25-generic/qemu-4.0
  Ubuntu 16.04.6/5.10.6/qemu-5.2.0-rc2

  # Lets create a dedicated file system on a SSD/Nvme 60GB disk in my case:
  $sudo mkfs.ext4 /dev/sda3
  $sudo mount /dev/sda3 /mnt
  $df -h /mnt
  Filesystem  Size  Used Avail Use% Mounted on
  /dev/sda3 59G   53M   56G   1% /mnt

  #Create a fragmented disk on it using 2MB Chunks (about 30min):
  $sudo python3 create_fragged_disk.py /mnt 2
  Filling up FS by Creating chunks files in:  /mnt/chunks
  We are probably full as expected!!:  [Errno 28] No space left on device
  Creating fragged disk file:  /mnt/disk

  $ls -lhs 
  59G -rw-r--r-- 1 root root 59G Jan 15 14:08 /mnt/disk

  $ sudo e4defrag -c /mnt/disk
   Total/best extents 41971/30
   Average size per extent1466 KB
   Fragmentation score2
   [0-30 no problem: 31-55 a little bit fragmented: 56- needs defrag]
   This file (/mnt/disk) does not need defragmentation.
   Done.

  # the tool^^^ says it is not enough fragmented to be able to defrag.

  #Inject an image on fragmented disk
  sudo chown ubuntu /mnt/disk
  wget 
https://cloud-images.ubuntu.com/bionic/current/bionic-server-cloudimg-amd64.img
  qemu-img convert -O raw  bionic-server-cloudimg-amd64.img \
   bionic-server-cloudimg-amd64.img.raw
  dd conv=notrunc iflag=fullblock if=bionic-server-cloudimg-amd64.img.raw \
  of=/mnt/disk bs=1M
  virt-customize -a /mnt/disk --root-password password:

  # logon run console activity ex: ping -i 0.3 127.0.0.1
  $qemu-system-x86_64 -m 2G -enable-kvm  -nographic \
  -chardev socket,id=test,path=/tmp/qmp-monitor,server,nowait \
  -mon chardev=test,mode=control \
  -drive 
file=/mnt/disk,format=raw,if=none,id=drive-virtio-disk0,cache=none,discard\
  -device 
virtio-blk-pci,scsi=off,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1,write-cache=on

  $sync
  $echo 3 | sudo tee -a /proc/sys/vm/drop_caches

  #start drive-mirror via qmp on another SSD/nvme partition
  nc -U /tmp/qmp-monitor
  {"execute":"qmp_capabilities"}
  
{"execute":"drive-mirror","arguments":{"device":"drive-virtio-disk0","target":"/home/ubuntu/mirror","sync":"full","format":"qcow2"}}
  ^^^ qemu console may start to freeze at this step.

  NOTE:
   - smaller chunk sz and bigger disk size the worst it is.
 In operation we also have issue on 400GB disk size with average 13MB/extent
   - Reproducible also on xfs

  
  Expected behavior:
  ---
  QEMU should remain steady, eventually only have decrease storage Performance
  or mirroring, because of fragmented fs.

  Observed behavior:
  ---
  Perf of mirroring is still quite good even on fragmented FS,
  but it breaks qemu.

  
  ##  create_fragged_disk.py 
  import sys
  import os
  import tempfile
  import glob
  import errno

  MNT_DIR = sys.argv[1]
  CHUNK_SZ_MB = int(sys.argv[2])
  CHUNKS_DIR = MNT_DIR + '/chunks'
  DISK_FILE = MNT_DIR + '/disk'

  if not os.path.exists(CHUNKS_DIR):
  os.makedirs(CHUNKS_DIR)

  with open("/dev/urandom", "rb") as f_rand:
   mb_rand=f_rand.read(1024 * 1024)

  print("Filling up FS by Creating chunks files in: ",CHUNKS_DIR)
  try:
  while True:
  tp = tempfile.NamedTemporaryFile(dir=CHUNKS_DIR,delete=False)
  for x in range(CHUNK_SZ_MB):
  tp.write(mb_rand)
  os.fsync(tp)
  tp.close()
  except Exception as ex:
  print("We are probably full as expected!!: ",ex)

  chunks = glob.glob(CHUNKS_DIR + '/*')

  print("Creating fragged disk file: ",DISK_FILE)
  with open(DISK_FILE, "w+b") as f_disk:
  for chunk in chunks:
  try:
  os.unlink(chunk)
  for x in range(CHUNK_SZ_MB):
  f_disk.write(mb_rand)
  os.fsync(f_disk)
  except IOError as ex:
  if ex.errno != errno.ENOSPC:
  raise
  ###3

To manage

Re: [RFC 05/10] vhost: Add vhost_dev_from_virtio

2021-02-04 Thread Eugenio Perez Martin
On Thu, Feb 4, 2021 at 4:14 AM Jason Wang  wrote:
>
>
> On 2021/2/2 下午6:17, Eugenio Perez Martin wrote:
> > On Tue, Feb 2, 2021 at 4:31 AM Jason Wang  wrote:
> >>
> >> On 2021/2/1 下午4:28, Eugenio Perez Martin wrote:
> >>> On Mon, Feb 1, 2021 at 7:13 AM Jason Wang  wrote:
>  On 2021/1/30 上午4:54, Eugenio Pérez wrote:
> > Signed-off-by: Eugenio Pérez 
> > ---
> > include/hw/virtio/vhost.h |  1 +
> > hw/virtio/vhost.c | 17 +
> > 2 files changed, 18 insertions(+)
> >
> > diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
> > index 4a8bc75415..fca076e3f0 100644
> > --- a/include/hw/virtio/vhost.h
> > +++ b/include/hw/virtio/vhost.h
> > @@ -123,6 +123,7 @@ uint64_t vhost_get_features(struct vhost_dev *hdev, 
> > const int *feature_bits,
> > void vhost_ack_features(struct vhost_dev *hdev, const int 
> > *feature_bits,
> > uint64_t features);
> > bool vhost_has_free_slot(void);
> > +struct vhost_dev *vhost_dev_from_virtio(const VirtIODevice *vdev);
> >
> > int vhost_net_set_backend(struct vhost_dev *hdev,
> >   struct vhost_vring_file *file);
> > diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> > index 28c7d78172..8683d507f5 100644
> > --- a/hw/virtio/vhost.c
> > +++ b/hw/virtio/vhost.c
> > @@ -61,6 +61,23 @@ bool vhost_has_free_slot(void)
> > return slots_limit > used_memslots;
> > }
> >
> > +/*
> > + * Get the vhost device associated to a VirtIO device.
> > + */
> > +struct vhost_dev *vhost_dev_from_virtio(const VirtIODevice *vdev)
> > +{
> > +struct vhost_dev *hdev;
> > +
> > +QLIST_FOREACH(hdev, &vhost_devices, entry) {
> > +if (hdev->vdev == vdev) {
> > +return hdev;
> > +}
> > +}
> > +
> > +assert(hdev);
> > +return NULL;
> > +}
>  I'm not sure this can work in the case of multiqueue. E.g vhost-net
>  multiqueue is a N:1 mapping between vhost devics and virtio devices.
> 
>  Thanks
> 
> >>> Right. We could add an "vdev vq index" parameter to the function in
> >>> this case, but I guess the most reliable way to do this is to add a
> >>> vhost_opaque value to VirtQueue, as Stefan proposed in previous RFC.
> >>
> >> So the question still, it looks like it's easier to hide the shadow
> >> virtqueue stuffs at vhost layer instead of expose them to virtio layer:
> >>
> >> 1) vhost protocol is stable ABI
> >> 2) no need to deal with virtio stuffs which is more complex than vhost
> >>
> >> Or are there any advantages if we do it at virtio layer?
> >>
> > As far as I can tell, we will need the virtio layer the moment we
> > start copying/translating buffers.
> >
> > In this series, the virtio dependency can be reduced if qemu does not
> > check the used ring _F_NO_NOTIFY flag before writing to irqfd. It
> > would enable packed queues and IOMMU immediately, and I think the cost
> > should not be so high. In the previous RFC this check was deleted
> > later anyway, so I think it was a bad idea to include it from the start.
>
>
> I am not sure I understand here. For vhost, we can still do anything we
> want, e.g accessing guest memory etc. Any blocker that prevent us from
> copying/translating buffers? (Note that qemu will propagate memory
> mappings to vhost).
>

There is nothing that forbids us to access directly, but if we don't
reuse the virtio layer functionality we would have to duplicate every
access function. "Need" was a too strong word maybe :).

In other words: for the shadow vq vring exposed for the device, qemu
treats it as a driver, and this functionality needs to be added to
qemu. But for accessing the guest's one do not reuse virtio.c would be
a bad idea in my opinion.

> Thanks
>
>
> >
> >
> >
> >
> >
> >> Thanks
> >>
> >>
> >>> I need to take this into account in qmp_x_vhost_enable_shadow_vq too.
> >>>
> > +
> > static void vhost_dev_sync_region(struct vhost_dev *dev,
> >   MemoryRegionSection *section,
> >   uint64_t mfirst, uint64_t mlast,
> >
>




Re: [PULL 11/24] tcg/optimize: Use tcg_constant_internal with constant folding

2021-02-04 Thread Richard W.M. Jones
> > commit 8f17a975e60b773d7c366a81c0d9bbe304f30859
> > Author: Richard Henderson 
> > Date:   Mon Mar 30 19:52:02 2020 -0700
> > 
> > tcg/optimize: Adjust TempOptInfo allocation
> > 
> > The image boots just fine on s390x/TCG as well.
> 
> Let me try this in a minute on my original test machine.

I got the wrong end of the stick as David pointed out in the other email.

However I did test things again this morning (all on s390 host), and
current head (1ed9228f63ea4b) fails same as before ("mount" command
fails).

Also I downloaded:

  
https://dl.fedoraproject.org/pub/fedora-secondary/releases/33/Cloud/s390x/images/Fedora-Cloud-Base-33-1.2.s390x.qcow2

and booted it on 1ed9228f63ea4b using this command:

  $ ~/d/qemu/build/s390x-softmmu/qemu-system-s390x -machine accel=tcg -m 2048 
-drive file=Fedora-Cloud-Base-33-1.2.s390x.qcow2,format=qcow2,if=virtio -serial 
stdio

Lots of core dumps inside the guest, same as David saw.

I then reset qemu back to 8f17a975e60b773d ("tcg/optimize: Adjust
TempOptInfo allocation"), rebuilt qemu, tested the same command and
cloud image, and that booted up much happier with no failures or core
dumps.

Isn't it kind of weird that this would only affect an s390 host?  I
don't understand why the host would make a difference if we're doing
TCG.

Rich.

-- 
Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones
Read my programming and virtualization blog: http://rwmj.wordpress.com
virt-df lists disk usage of guests without needing to install any
software inside the virtual machine.  Supports Linux and Windows.
http://people.redhat.com/~rjones/virt-df/




Re: [PULL 11/24] tcg/optimize: Use tcg_constant_internal with constant folding

2021-02-04 Thread Richard W.M. Jones


I have this s390 machine for another 99 hours now, so if you want me
to test patches then send them my way.

Rich.

-- 
Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones
Read my programming and virtualization blog: http://rwmj.wordpress.com
libguestfs lets you edit virtual machines.  Supports shell scripting,
bindings from many languages.  http://libguestfs.org




Re: [PATCH v2] virtiofsd: vu_dispatch locking should never fail

2021-02-04 Thread Stefan Hajnoczi
On Wed, Feb 03, 2021 at 07:24:34PM +0100, Greg Kurz wrote:
> pthread_rwlock_rdlock() and pthread_rwlock_wrlock() can fail if a
> deadlock condition is detected or the current thread already owns
> the lock. They can also fail, like pthread_rwlock_unlock(), if the
> mutex wasn't properly initialized. None of these are ever expected
> to happen with fv_VuDev::vu_dispatch_rwlock.
> 
> Some users already check the return value and assert, some others
> don't. Introduce rdlock/wrlock/unlock wrappers that just do the
> former and use them everywhere for improved consistency and
> robustness.
> 
> This is just cleanup. It doesn't fix any actual issue.
> 
> Signed-off-by: Greg Kurz 
> ---
> 
> v2: - open-code helpers instead of defining them with a macro (Vivek, Stefan)
> - fixed rd/wr typo in fv_queue_thread() (Stefan)
> - make it clear in the changelog this is just cleanup (Stefan)
> 
>  tools/virtiofsd/fuse_virtio.c | 49 +--
>  1 file changed, 35 insertions(+), 14 deletions(-)

Reviewed-by: Stefan Hajnoczi 


signature.asc
Description: PGP signature


Re: [PULL 11/24] tcg/optimize: Use tcg_constant_internal with constant folding

2021-02-04 Thread David Hildenbrand

On 04.02.21 10:29, Richard W.M. Jones wrote:

commit 8f17a975e60b773d7c366a81c0d9bbe304f30859
Author: Richard Henderson 
Date:   Mon Mar 30 19:52:02 2020 -0700

 tcg/optimize: Adjust TempOptInfo allocation

The image boots just fine on s390x/TCG as well.


Let me try this in a minute on my original test machine.


I got the wrong end of the stick as David pointed out in the other email.

However I did test things again this morning (all on s390 host), and
current head (1ed9228f63ea4b) fails same as before ("mount" command
fails).

Also I downloaded:

   
https://dl.fedoraproject.org/pub/fedora-secondary/releases/33/Cloud/s390x/images/Fedora-Cloud-Base-33-1.2.s390x.qcow2

and booted it on 1ed9228f63ea4b using this command:

   $ ~/d/qemu/build/s390x-softmmu/qemu-system-s390x -machine accel=tcg -m 2048 
-drive file=Fedora-Cloud-Base-33-1.2.s390x.qcow2,format=qcow2,if=virtio -serial 
stdio

Lots of core dumps inside the guest, same as David saw.

I then reset qemu back to 8f17a975e60b773d ("tcg/optimize: Adjust
TempOptInfo allocation"), rebuilt qemu, tested the same command and
cloud image, and that booted up much happier with no failures or core
dumps.

Isn't it kind of weird that this would only affect an s390 host?  I
don't understand why the host would make a difference if we're doing
TCG.


I assume an existing BUG in the s390x TCG backend ... which makes it 
harder to debug :)


--
Thanks,

David / dhildenb




Re: [PULL 16/21] hw/timer: Refactor NPCM7XX Timer to use CLK clock

2021-02-04 Thread Philippe Mathieu-Daudé
Hi,

On Tue, Jan 12, 2021 at 6:20 PM Peter Maydell 
wrote:
>
> From: Hao Wu 
>
> This patch makes NPCM7XX Timer to use a the timer clock generated by the
> CLK module instead of the magic number TIMER_REF_HZ.
>
> Reviewed-by: Havard Skinnemoen 
> Reviewed-by: Tyrone Ting 
> Signed-off-by: Hao Wu 
> Message-id: 20210108190945.949196-3-wuhao...@google.com
> Reviewed-by: Peter Maydell 
> Signed-off-by: Peter Maydell 
> ---
>  include/hw/misc/npcm7xx_clk.h|  6 -
>  include/hw/timer/npcm7xx_timer.h |  1 +
>  hw/arm/npcm7xx.c |  5 
>  hw/timer/npcm7xx_timer.c | 39 +++-
>  4 files changed, 24 insertions(+), 27 deletions(-)

Is that a spurious error (building with Clang)?

Running test qtest-arm/npcm7xx_timer-test
ERROR:../tests/qtest/npcm7xx_timer-test.c:475:test_periodic_interrupt:
assertion failed (tim_read(td, TISR) == tim_timer_bit(td)): (0x
== 0x0004)
ERROR:../tests/qtest/npcm7xx_timer-test.c:476:test_periodic_interrupt:
'qtest_get_irq(global_qtest, tim_timer_irq(td))' should be TRUE
FAIL 155 qtest-arm/npcm7xx_timer-test
/arm/npcm7xx_timer/tim[2]/timer[2]/periodic_interrupt
make: *** [Makefile.mtest:1033: run-test-127] Error 1



Re: [PATCH 02/12] qga: Rename config key 'blacklist' as 'denylist'

2021-02-04 Thread Michal Suchánek
Hello,

On Tue, Feb 02, 2021 at 09:58:14PM +0100, Philippe Mathieu-Daudé wrote:
> Follow the inclusive terminology from the "Conscious Language in your
> Open Source Projects" guidelines [*] and replace the word "blacklist"
> appropriately.
> 
> [*] https://github.com/conscious-lang/conscious-lang-docs/blob/main/faq.md
> 
> Signed-off-by: Philippe Mathieu-Daudé 
> ---
>  docs/interop/qemu-ga.rst   |  2 +-
>  qga/main.c | 15 +++
>  tests/test-qga.c   |  8 
>  tests/data/test-qga-config |  2 +-
>  4 files changed, 17 insertions(+), 10 deletions(-)
> 
> diff --git a/docs/interop/qemu-ga.rst b/docs/interop/qemu-ga.rst
> index 3063357bb5d..9a590bf95cb 100644
> --- a/docs/interop/qemu-ga.rst
> +++ b/docs/interop/qemu-ga.rst
> @@ -125,7 +125,7 @@ pidfilestring
>  fsfreeze-hook  string
>  statedir   string
>  verboseboolean
> -blacklist  string list
> +denylist   string list
>  =  ===

this changes the config option but not the commandline option creating
disconnect between commanndline and config:

docs/interop/qemu-ga.rst:.. option:: -b, --blacklist=LIST
docs/interop/qemu-ga.rst:blacklist  string list

When making these 'inclusive' changes at least make sure you don't break
the software in question.

A simple grep would have told you.

Thanks

Michal



Re: [PATCH v4 2/3] virtiofsd: optionally return inode pointer from lo_do_lookup()

2021-02-04 Thread Stefan Hajnoczi
On Thu, Feb 04, 2021 at 09:25:28AM +0100, Greg Kurz wrote:
> On Wed, 3 Feb 2021 17:00:06 +
> Stefan Hajnoczi  wrote:
> 
> > On Wed, Feb 03, 2021 at 03:20:14PM +0100, Greg Kurz wrote:
> > > On Wed,  3 Feb 2021 11:37:18 +
> > > Stefan Hajnoczi  wrote:
> > > 
> > > > lo_do_lookup() finds an existing inode or allocates a new one. It
> > > > increments nlookup so that the inode stays alive until the client
> > > > releases it.
> > > > 
> > > > Existing callers don't need the struct lo_inode so the function doesn't
> > > > return it. Extend the function to optionally return the inode. The next
> > > > commit will need it.
> > > > 
> > > > Signed-off-by: Stefan Hajnoczi 
> > > > ---
> > > >  tools/virtiofsd/passthrough_ll.c | 29 +
> > > >  1 file changed, 21 insertions(+), 8 deletions(-)
> > > > 
> > > > diff --git a/tools/virtiofsd/passthrough_ll.c 
> > > > b/tools/virtiofsd/passthrough_ll.c
> > > > index e63cbd3fb7..c87a1f3d72 100644
> > > > --- a/tools/virtiofsd/passthrough_ll.c
> > > > +++ b/tools/virtiofsd/passthrough_ll.c
> > > > @@ -831,11 +831,13 @@ static int do_statx(struct lo_data *lo, int 
> > > > dirfd, const char *pathname,
> > > >  }
> > > >  
> > > >  /*
> > > > - * Increments nlookup and caller must release refcount using
> > > > - * lo_inode_put(&parent).
> > > > + * Increments nlookup on the inode on success. unref_inode_lolocked() 
> > > > must be
> > > > + * called eventually to decrement nlookup again. If inodep is 
> > > > non-NULL, the
> > > > + * inode pointer is stored and the caller must call lo_inode_put().
> > > >   */
> > > >  static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char 
> > > > *name,
> > > > -struct fuse_entry_param *e)
> > > > +struct fuse_entry_param *e,
> > > > +struct lo_inode **inodep)
> > > >  {
> > > >  int newfd;
> > > >  int res;
> > > > @@ -845,6 +847,10 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t 
> > > > parent, const char *name,
> > > >  struct lo_inode *inode = NULL;
> > > >  struct lo_inode *dir = lo_inode(req, parent);
> > > >  
> > > > +if (inodep) {
> > > > +*inodep = NULL;
> > > > +}
> > > > +
> > > 
> > > Is this side-effect needed ? If lo_do_lookup() returns an error, it
> > > rather seems that the caller shouldn't expect anything to be written
> > > here, i.e. the content of *inodep still belongs to the caller and
> > > whatever value it previously put in there (as patch 3/3 does) should
> > > be preserved IMHO.
> > > 
> > > Apart from that LGTM.
> > 
> > I like this approach because it prevents accessing uninitialized memory
> > in the caller:
> > 
> >   struct lo_inode *inode;
> > 
> >   if (lo_do_lookup(..., &inodep) != 0) {
> > goto err;
> >   }
> >   ...
> > 
> >   err:
> >   lo_inode_put(&inode); <-- uninitialized in the error case!
> 
> My point is that it is the caller's business to ensure that inode
> doesn't contain garbage if it is to be used irrespective of the
> outcome of lo_do_lookup(). This is precisely what patch 3/3 does,
> so I don't understand the ultimate purpose of nullifying the
> inode pointer _again_ in lo_do_lookup()...

APIs should be designed to eliminate classes of errors where possible
IMO. Taking care regarding the uninitialized pointer in the error case
could be the caller's responsibility, but what's the advantage?

(There's a related thing with lo_inode_put(&inode) where it sets *inode
= NULL to eliminate use-after-free bugs in callers. It would have been
possible to use the same approach as free(3) where it's the caller's
responsiblity, but that API design decision in free(3) has caused
many bugs in applications.)

Stefan


signature.asc
Description: PGP signature


Re: [PATCH v15 00/23] i386 cleanup PART 2

2021-02-04 Thread Claudio Fontana
On 2/3/21 11:07 PM, Alex Bennée wrote:
> For patch 17 on onwards it was just seeing what the actual benefit of
> the derived class was - I think I get it later on but you should
> mention it up front.
> 
> I do think we need to address the ordering constraint in 21 - are we
> introducing one or just formalising what has been created? If we are
> introducing one then can we a) do it a better way with the structuring
> of QOM or b) enforce it so new models don't run into unexpected bugs.


What patch 21 tried to do is to improve on the existing call method of 
"realizefn" for cpus.
To be honest it ended up not really achieving the goal, only removing one open 
call to qemu_init_vcpu in the target code.

The actual problem of the completely freak call order of realizefn, where the 
object model and device model interactions just really get in the way and 
create more problems than they solve,
remains largely untouched.

The problem is everything that has been plugged on top of realizing cpus now, 
which depends on the existing call order, which makes it almost impossible in 
my view to untangle properly.
As an example, the addition of a new cpu (cpu_list_add) should theoretically be 
done in the common cpu code, but it can't, due to the web of dependencies of 
the cpu_index being already updated before the common code is reached (tcg 
plugins are also a blocker there IIRC, but it is by no means the only one).

cpu_exec_realizefn then remains the place where this is done, which is called 
directly inside the target/xxx/cpu.c code.
Add to it the fact that we cannot do all framework operations in hw/core/cpu.c, 
because of the common_ss / specific_ss code split necessity,
and you get a web of constraints that is likely impossible to navigate.

To answer your questions:

a) we are introducing a more strict order in this patch, in the sense that 
implementations in target/xxx/cpu.c are not free to call qemu_init_vcpu where 
they please, instead the call is included in common code, triggered by the 
parent_realize() call.

b) this is basically automatically enforced by the fact that the call is not in 
target/ anymore


--

As can be seen by the patch, for some targets, in particular the ones requiring 
a cpu_reset() after qemu_init_vcpu, this slightly changes the initialization,
as between qemu_init_vcpu and cpu_reset() you now have the common code:

/* qdev_get_machine() can return something that's not TYPE_MACHINE  

 * if this is one of the user-only emulators; in that case there's  

 * no need to check the ignore_memory_transaction_failures board flag.  

 */
if (object_dynamic_cast(machine, TYPE_MACHINE)) {
ObjectClass *oc = object_get_class(machine);
MachineClass *mc = MACHINE_CLASS(oc);

if (mc) {
cpu->ignore_memory_transaction_failures =
mc->ignore_memory_transaction_failures;
}
}

if (dev->hotplugged) {
cpu_synchronize_post_init(cpu);
cpu_resume(cpu);
}

which was executed later before.

--

Only as a result of your comment I now noticed the last part about hotplug, 
which looks a bit scary tbh.
I wonder if there is some automated test that covers cpu device hotplug?

And regardless of the fact that I could not see any issue, I am tempted to drop 
patch 21 entirely now.

Let me know what you think,

Thanks,

Claudio




> 
> On Wed, 3 Feb 2021 at 17:10, Claudio Fontana  wrote:
>>
>> Hi Alex,
>>
>> thanks for your review,
>>
>> On 2/3/21 5:57 PM, Alex Bennée wrote:
>>>
>>> Claudio Fontana  writes:
>>>
>>> 
>>>
>>> Final comments. I think overall this series is looking pretty good
>>> although I got a bit lost at the end when we started expanding on the
>>> AccelClass.
>>> The main yuck was the start-up ordering constraint which
>>
>> To be sure, are you referring to tcg_accel_ops_init(), ie your comments 
>> towards the end of PATCH 17?
>>
>> Ciao,
>>
>> Claudio
>>
>>> would be nice to remove or failing that catch with some asserts so weird
>>> bugs don't get introduced.
>>>
>>> Paolo, is it worth picking up some of the early patches to reduce the
>>> patch delta going forward?
>>>
>>
> 
> 




Re: [PATCH v4 3/3] virtiofsd: prevent opening of special files (CVE-2020-35517)

2021-02-04 Thread Stefan Hajnoczi
On Wed, Feb 03, 2021 at 04:14:41PM -0500, Vivek Goyal wrote:
> On Wed, Feb 03, 2021 at 05:05:14PM +, Stefan Hajnoczi wrote:
> > On Wed, Feb 03, 2021 at 11:08:58AM -0500, Vivek Goyal wrote:
> > > On Wed, Feb 03, 2021 at 05:02:37PM +0100, Greg Kurz wrote:
> > > > On Wed, 3 Feb 2021 10:28:50 -0500
> > > > Vivek Goyal  wrote:
> > > > 
> > > > > On Wed, Feb 03, 2021 at 11:37:19AM +, Stefan Hajnoczi wrote:
> > > > > 
> > > > > [..]
> > > > > > @@ -1727,36 +1764,38 @@ static void lo_create(fuse_req_t req, 
> > > > > > fuse_ino_t parent, const char *name,
> > > > > >  
> > > > > >  update_open_flags(lo->writeback, lo->allow_direct_io, fi);
> > > > > >  
> > > > > > -fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & 
> > > > > > ~O_NOFOLLOW,
> > > > > > -mode);
> > > > > > +/* Try to create a new file but don't open existing files */
> > > > > > +fd = openat(parent_inode->fd, name, fi->flags | O_CREAT | 
> > > > > > O_EXCL, mode);
> > > > > >  err = fd == -1 ? errno : 0;
> > > > > > +
> > > > > >  lo_restore_cred(&old);
> > > > > >  
> > > > > > -if (!err) {
> > > > > > -ssize_t fh;
> > > > > > -
> > > > > > -pthread_mutex_lock(&lo->mutex);
> > > > > > -fh = lo_add_fd_mapping(lo, fd);
> > > > > > -pthread_mutex_unlock(&lo->mutex);
> > > > > > -if (fh == -1) {
> > > > > > -close(fd);
> > > > > > -err = ENOMEM;
> > > > > > -goto out;
> > > > > > -}
> > > > > > +/* Ignore the error if file exists and O_EXCL was not given */
> > > > > > +if (err && !(err == EEXIST && !(fi->flags & O_EXCL))) {
> > > > > 
> > > > > Can this check be simplified to.
> > > > >if (err && (err == EEXIST && (fi->flags & O_EXCL)) {
> > > > 
> > > > I guess you meant :
> > > > 
> > > > if (err && (err != EEXIST || fi->flags & O_EXCL) {
> > > 
> > > This sounds correct. I forgot to take into account that if error is
> > > not -EEXIST, we still want to bail out irrespective of O_EXCL.
> > 
> > I thought about De Morgan's law too but found the OR expression is not
> > easier to read than the AND expression :(. If you prefer it written this
> > way I can change it though.
> 
> I personally find this one to read. And not because of AND but because
> of double logical negation (!x) in previous expression.
> 
> But I am not particular about it. If you don't find it easier to
> read, I can live with previous one.

Okay, I'll convert it. Dave also mentioned he finds the AND version
strange.

Stefan


signature.asc
Description: PGP signature


Re: [PATCH v3] target/i386/hvf: add vmware-cpuid-freq cpu feature

2021-02-04 Thread Roman Bolshakov
On Fri, Jan 22, 2021 at 06:05:18PM +0300, yaroshchuk2...@gmail.com wrote:
> From: Vladislav Yaroshchuk 
> 
> For `-accel hvf` cpu_x86_cpuid() is wrapped with hvf_cpu_x86_cpuid() to
> add paravirtualization cpuid leaf 0x4010
> https://lkml.org/lkml/2008/10/1/246
> 
> Leaf 0x4010, Timing Information:
> EAX: (Virtual) TSC frequency in kHz.
> EBX: (Virtual) Bus (local apic timer) frequency in kHz.
> ECX, EDX: RESERVED (Per above, reserved fields are set to zero).
> 
> On macOS TSC and APIC Bus frequencies can be readed by sysctl call with
> names `machdep.tsc.frequency` and `hw.busfrequency`
> 
> This options is required for Darwin-XNU guest to be synchronized with
> host
> 
> Leaf 0x4000 not exposes HVF leaving hypervisor signature empty
> 
> Signed-off-by: Vladislav Yaroshchuk 
> ---
>  target/i386/hvf/hvf.c | 96 ++-
>  1 file changed, 95 insertions(+), 1 deletion(-)
> 

I'd prefer to have generic expose-accel option for CPU and
vmware-cpuid-freq=on would depend on expose-accel=on.

Regardless of that,

Reviewed-by: Roman Bolshakov 
Tested-by: Roman Bolshakov 

Thanks,
Roman



Re: [PATCH] trace: add meson custom_target() depend_files for tracetool

2021-02-04 Thread Philippe Mathieu-Daudé
On 1/27/21 9:56 PM, Philippe Mathieu-Daudé wrote:
> Hi Stefan,
> 
> On Mon, Jan 25, 2021 at 5:40 PM Philippe Mathieu-Daudé
>  wrote:>
>> On 1/25/21 12:09 PM, Stefan Hajnoczi wrote:
>>> Re-generate tracetool output when the tracetool source code changes. Use
>>> the same approach as qapi_gen_depends and introduce a tracetool_depends
>>> files list so meson is aware of the dependencies.
>>>
>>> Signed-off-by: Stefan Hajnoczi 
>>> ---
>>>  meson.build   | 28 +++-
>>>  trace/meson.build | 21 ++---
>>>  2 files changed, 41 insertions(+), 8 deletions(-)
> 
> Please do not merge "trace: make the 'log' backend timestamp configurable"
> without this patch -- even better, queue this one directly after it -- as this
> gave me troubles with the multiple directories I'm using to build:
> 
> In file included from trace/trace-io.c:5:
> trace/trace-io.h: In function ‘_nocheck__trace_qio_channel_command_wait’:
> trace/trace-io.h:1756:13: error: ‘message_with_timestamp’ undeclared
> (first use in this function); did you mean ‘error_with_timestamp’?
> 1756 | if (message_with_timestamp) {
>  | ^~
>  | error_with_timestamp
> ninja: build stopped: subcommand failed.

I just realized bisecting, this problem is present when jumping from
*after* 0572d6cd29d to *before* it. Range [7fd51e68c34 -> 0572d6cd29d].

I doubt there is much we can do :(




Re: [PATCH v21 00/20] Initial support for multi-process Qemu

2021-02-04 Thread Daniel P . Berrangé
On Wed, Feb 03, 2021 at 12:02:05PM -0800, Elena Ufimtseva wrote:
> On Wed, Feb 03, 2021 at 04:11:50PM +, Stefan Hajnoczi wrote:
> > On Fri, Jan 29, 2021 at 11:46:01AM -0500, Jagannathan Raman wrote:
> > > This is the v21 of the patchset. This version has the following changes:
> > 
> > The docs/multi-process.rst file caused Sphinx to fail with "document
> > isn't included in any toctree". I moved it into the system emulator
> > documentation to resolve this. The new path is
> > docs/system/multi-process.rst.
> >
> 
> Hi Stefan
> 
> Ah, we did not --enable-docs and .travis.yml also disables them.
> Will include into the tests we do for submission.

FWIW if you're relying on travis for testing before submission that's
something you'll want to change real soon. Travis has discontinued its
free service and turned it into a one time free trial. So all QEMU
primary CI is moving to GitLab CI now and we'll be deleting remaining
travis CI jobs any day now.  All you need todo is have a fork of the
main qemu repo on gitlab, and when you push to your forkk the CI
pipeline will run in GitLab.


Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




[PULL 00/27] Block patches

2021-02-04 Thread Stefan Hajnoczi
The following changes since commit 77f3804ab7ed94b471a14acb260e5aeacf26193f:

  Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging 
(2021-02-02 16:47:51 +)

are available in the Git repository at:

  https://gitlab.com/stefanha/qemu.git tags/block-pull-request

for you to fetch changes up to 026362226f1ff6a1168524a326bbd6347ad40e85:

  docs: fix Parallels Image "dirty bitmap" section (2021-02-03 16:48:21 +)


Pull request

The pull request includes Multi-Process QEMU, GitLab repo URL updates, and even
a block layer patch to fix the Parallels Image format specification!



Denis V. Lunev (1):
  docs: fix Parallels Image "dirty bitmap" section

Elena Ufimtseva (8):
  multi-process: add configure and usage information
  io: add qio_channel_writev_full_all helper
  io: add qio_channel_readv_full_all_eof & qio_channel_readv_full_all
helpers
  multi-process: define MPQemuMsg format and transmission functions
  multi-process: introduce proxy object
  multi-process: add proxy communication functions
  multi-process: Forward PCI config space acceses to the remote process
  multi-process: perform device reset in the remote process

Jagannathan Raman (11):
  memory: alloc RAM from file at offset
  multi-process: Add config option for multi-process QEMU
  multi-process: setup PCI host bridge for remote device
  multi-process: setup a machine object for remote device process
  multi-process: Initialize message handler in remote device
  multi-process: Associate fd of a PCIDevice with its object
  multi-process: setup memory manager for remote device
  multi-process: PCI BAR read/write handling for proxy & remote
endpoints
  multi-process: Synchronize remote memory
  multi-process: create IOHUB object to handle irq
  multi-process: Retrieve PCI info from remote process

John G Johnson (1):
  multi-process: add the concept description to
docs/devel/qemu-multiprocess

Stefan Hajnoczi (6):
  .github: point Repo Lockdown bot to GitLab repo
  gitmodules: use GitLab repos instead of qemu.org
  gitlab-ci: remove redundant GitLab repo URL command
  docs: update README to use GitLab repo URLs
  pc-bios: update mirror URLs to GitLab
  get_maintainer: update repo URL to GitLab

 MAINTAINERS   |  24 +
 README.rst|   4 +-
 docs/devel/index.rst  |   1 +
 docs/devel/multi-process.rst  | 966 ++
 docs/system/index.rst |   1 +
 docs/system/multi-process.rst |  64 ++
 docs/interop/parallels.txt|   2 +-
 configure |  10 +
 meson.build   |   5 +-
 hw/remote/trace.h |   1 +
 include/exec/memory.h |   2 +
 include/exec/ram_addr.h   |   2 +-
 include/hw/pci-host/remote.h  |  30 +
 include/hw/pci/pci_ids.h  |   3 +
 include/hw/remote/iohub.h |  42 +
 include/hw/remote/machine.h   |  38 +
 include/hw/remote/memory.h|  19 +
 include/hw/remote/mpqemu-link.h   |  99 +++
 include/hw/remote/proxy-memory-listener.h |  28 +
 include/hw/remote/proxy.h |  48 ++
 include/io/channel.h  |  78 ++
 include/qemu/mmap-alloc.h |   4 +-
 include/sysemu/iothread.h |   6 +
 backends/hostmem-memfd.c  |   2 +-
 hw/misc/ivshmem.c |   3 +-
 hw/pci-host/remote.c  |  75 ++
 hw/remote/iohub.c | 119 +++
 hw/remote/machine.c   |  80 ++
 hw/remote/memory.c|  65 ++
 hw/remote/message.c   | 230 ++
 hw/remote/mpqemu-link.c   | 267 ++
 hw/remote/proxy-memory-listener.c | 227 +
 hw/remote/proxy.c | 379 +
 hw/remote/remote-obj.c| 203 +
 io/channel.c  | 116 ++-
 iothread.c|   6 +
 softmmu/memory.c  |   3 +-
 softmmu/physmem.c |  11 +-
 util/mmap-alloc.c |   7 +-
 util/oslib-posix.c|   2 +-
 .github/lockdown.yml  |   8 +-
 .gitlab-ci.yml|   1 -
 .gitmodules   |  44 +-
 Kconfig.host  |   4 +
 hw/Kconfig|   1 +
 hw/meson.build|   1 +
 hw/pci-host/Kconfig   |   3 +
 hw/pci-host/meson.build   |   1 +
 hw/remote/Kconfig |   4 +
 hw/remote/meson.build |  13 +
 hw/remote/t

Re: [PATCH v2 2/2] net: Add -ipv6-hostfwd option, ipv6_hostfwd_add/remove commands

2021-02-04 Thread Daniel P . Berrangé
On Wed, Feb 03, 2021 at 01:37:29PM -0800, dje--- via wrote:
> These are identical to their ipv4 counterparts, but for ipv6.
> 
> Signed-off-by: Doug Evans 
> ---
>  hmp-commands.hx |  28 ++
>  include/net/slirp.h |   2 +
>  net/slirp.c | 129 +++-
>  qapi/net.json   |   4 ++
>  4 files changed, 161 insertions(+), 2 deletions(-)
> 
> diff --git a/hmp-commands.hx b/hmp-commands.hx
> index d4001f9c5d..bd51173472 100644
> --- a/hmp-commands.hx
> +++ b/hmp-commands.hx
> @@ -1392,6 +1392,34 @@ SRST
>Remove host-to-guest TCP or UDP redirection.
>  ERST
>  
> +#ifdef CONFIG_SLIRP
> +{
> +.name   = "ipv6_hostfwd_add",
> +.args_type  = "arg1:s,arg2:s?",
> +.params = "[netdev_id] 
> [tcp|udp]:[hostaddr6]:hostport-[guestaddr6]:guestport",
> +.help   = "redirect TCP6 or UDP6 connections from host to guest 
> (requires -net user)",
> +.cmd= hmp_ipv6_hostfwd_add,
> +},
> +#endif
> +SRST
> +``ipv6_hostfwd_add``
> +  Redirect TCP6 or UDP6 connections from host to guest (requires -net user).
> +ERST
> +
> +#ifdef CONFIG_SLIRP
> +{
> +.name   = "ipv6_hostfwd_remove",
> +.args_type  = "arg1:s,arg2:s?",
> +.params = "[netdev_id] [tcp|udp]:[hostaddr6]:hostport",
> +.help   = "remove host-to-guest TCP6 or UDP6 redirection",
> +.cmd= hmp_ipv6_hostfwd_remove,
> +},
> +#endif
> +SRST
> +``ipv6_hostfwd_remove``
> +  Remove host-to-guest TCP6 or UDP6 redirection.
> +ERST

DO we really need new commands for this ? It seems to me that we
can reliably distinction IPv4 vs v6 from the address format, and
thus existing commands can be adapted to support both.

This is the way other command line options and monitor commands
work for IPv4 vs IPv6 elsewhere in QEMU, so I think consistency
is beneficial.  We already have the helper method inet_parse()
that can do this parsing.

Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




[PULL 01/27] .github: point Repo Lockdown bot to GitLab repo

2021-02-04 Thread Stefan Hajnoczi
Use the GitLab repo URL as the main repo location in order to reduce
load on qemu.org.

Signed-off-by: Stefan Hajnoczi 
Reviewed-by: Wainer dos Santos Moschetta 
Reviewed-by: Thomas Huth 
Message-id: 2021015017.156802-2-stefa...@redhat.com
Signed-off-by: Stefan Hajnoczi 
---
 .github/lockdown.yml | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/lockdown.yml b/.github/lockdown.yml
index 9acc393f1c..07fc2f31ee 100644
--- a/.github/lockdown.yml
+++ b/.github/lockdown.yml
@@ -10,8 +10,8 @@ issues:
   comment: |
 Thank you for your interest in the QEMU project.
 
-This repository is a read-only mirror of the project's master
-repostories hosted on https://git.qemu.org/git/qemu.git.
+This repository is a read-only mirror of the project's repostories hosted
+at https://gitlab.com/qemu-project/qemu.git.
 The project does not process issues filed on GitHub.
 
 The project issues are tracked on Launchpad:
@@ -24,8 +24,8 @@ pulls:
   comment: |
 Thank you for your interest in the QEMU project.
 
-This repository is a read-only mirror of the project's master
-repostories hosted on https://git.qemu.org/git/qemu.git.
+This repository is a read-only mirror of the project's repostories hosted
+on https://gitlab.com/qemu-project/qemu.git.
 The project does not process merge requests filed on GitHub.
 
 QEMU welcomes contributions of code (either fixing bugs or adding new
-- 
2.29.2



[PULL 15/27] multi-process: define MPQemuMsg format and transmission functions

2021-02-04 Thread Stefan Hajnoczi
From: Elena Ufimtseva 

Defines MPQemuMsg, which is the message that is sent to the remote
process. This message is sent over QIOChannel and is used to
command the remote process to perform various tasks.
Define transmission functions used by proxy and by remote.

Signed-off-by: Jagannathan Raman 
Signed-off-by: John G Johnson 
Signed-off-by: Elena Ufimtseva 
Reviewed-by: Stefan Hajnoczi 
Message-id: 
56ca8bcf95195b2b195b08f6b9565b6d7410bce5.1611938319.git.jag.ra...@oracle.com
Signed-off-by: Stefan Hajnoczi 
---
 MAINTAINERS |   2 +
 meson.build |   1 +
 hw/remote/trace.h   |   1 +
 include/hw/remote/mpqemu-link.h |  63 ++
 include/sysemu/iothread.h   |   6 +
 hw/remote/mpqemu-link.c | 205 
 iothread.c  |   6 +
 hw/remote/meson.build   |   1 +
 hw/remote/trace-events  |   4 +
 9 files changed, 289 insertions(+)
 create mode 100644 hw/remote/trace.h
 create mode 100644 include/hw/remote/mpqemu-link.h
 create mode 100644 hw/remote/mpqemu-link.c
 create mode 100644 hw/remote/trace-events

diff --git a/MAINTAINERS b/MAINTAINERS
index 45979452ed..97137f617b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3205,6 +3205,8 @@ F: hw/pci-host/remote.c
 F: include/hw/pci-host/remote.h
 F: hw/remote/machine.c
 F: include/hw/remote/machine.h
+F: hw/remote/mpqemu-link.c
+F: include/hw/remote/mpqemu-link.h
 
 Build and test automation
 -
diff --git a/meson.build b/meson.build
index 7a7283a97d..43215c74e3 100644
--- a/meson.build
+++ b/meson.build
@@ -1800,6 +1800,7 @@ if have_system
 'net',
 'softmmu',
 'ui',
+'hw/remote',
   ]
 endif
 trace_events_subdirs += [
diff --git a/hw/remote/trace.h b/hw/remote/trace.h
new file mode 100644
index 00..5d5e3ac720
--- /dev/null
+++ b/hw/remote/trace.h
@@ -0,0 +1 @@
+#include "trace/trace-hw_remote.h"
diff --git a/include/hw/remote/mpqemu-link.h b/include/hw/remote/mpqemu-link.h
new file mode 100644
index 00..cac699cb42
--- /dev/null
+++ b/include/hw/remote/mpqemu-link.h
@@ -0,0 +1,63 @@
+/*
+ * Communication channel between QEMU and remote device process
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef MPQEMU_LINK_H
+#define MPQEMU_LINK_H
+
+#include "qom/object.h"
+#include "qemu/thread.h"
+#include "io/channel.h"
+
+#define REMOTE_MAX_FDS 8
+
+#define MPQEMU_MSG_HDR_SIZE offsetof(MPQemuMsg, data.u64)
+
+/**
+ * MPQemuCmd:
+ *
+ * MPQemuCmd enum type to specify the command to be executed on the remote
+ * device.
+ *
+ * This uses a private protocol between QEMU and the remote process. vfio-user
+ * protocol would supersede this in the future.
+ *
+ */
+typedef enum {
+MPQEMU_CMD_MAX,
+} MPQemuCmd;
+
+/**
+ * MPQemuMsg:
+ * @cmd: The remote command
+ * @size: Size of the data to be shared
+ * @data: Structured data
+ * @fds: File descriptors to be shared with remote device
+ *
+ * MPQemuMsg Format of the message sent to the remote device from QEMU.
+ *
+ */
+typedef struct {
+int cmd;
+size_t size;
+
+union {
+uint64_t u64;
+} data;
+
+int fds[REMOTE_MAX_FDS];
+int num_fds;
+} MPQemuMsg;
+
+bool mpqemu_msg_send(MPQemuMsg *msg, QIOChannel *ioc, Error **errp);
+bool mpqemu_msg_recv(MPQemuMsg *msg, QIOChannel *ioc, Error **errp);
+
+bool mpqemu_msg_valid(MPQemuMsg *msg);
+
+#endif
diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h
index 0c5284dbbc..f177142f16 100644
--- a/include/sysemu/iothread.h
+++ b/include/sysemu/iothread.h
@@ -57,4 +57,10 @@ IOThread *iothread_create(const char *id, Error **errp);
 void iothread_stop(IOThread *iothread);
 void iothread_destroy(IOThread *iothread);
 
+/*
+ * Returns true if executing withing IOThread context,
+ * false otherwise.
+ */
+bool qemu_in_iothread(void);
+
 #endif /* IOTHREAD_H */
diff --git a/hw/remote/mpqemu-link.c b/hw/remote/mpqemu-link.c
new file mode 100644
index 00..b3d380e42b
--- /dev/null
+++ b/hw/remote/mpqemu-link.c
@@ -0,0 +1,205 @@
+/*
+ * Communication channel between QEMU and remote device process
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+
+#include "qemu/module.h"
+#include "hw/remote/mpqemu-link.h"
+#include "qapi/error.h"
+#include "qemu/iov.h"
+#include "qemu/error-report.h"
+#include "qemu/main-loop.h"
+#include "io/channel.h"
+#include "sysemu/iothread.h"
+#include "trace.h"
+
+/*
+ * Send message over the ioc QIOChannel.
+ * This function is safe to call from:
+ * - main loop in co-routine context. Will block the main loop if not in
+ *   co-routine context;
+ * - vCPU thread with no co-ro

[PULL 02/27] gitmodules: use GitLab repos instead of qemu.org

2021-02-04 Thread Stefan Hajnoczi
qemu.org is running out of bandwidth and the QEMU project is moving
towards a gating CI on GitLab. Use the GitLab repos instead of qemu.org
(they will become mirrors).

Signed-off-by: Stefan Hajnoczi 
Reviewed-by: Wainer dos Santos Moschetta 
Reviewed-by: Thomas Huth 
Reviewed-by: Philippe Mathieu-Daudé 
Message-id: 2021015017.156802-3-stefa...@redhat.com
Signed-off-by: Stefan Hajnoczi 
---
 .gitmodules | 44 ++--
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/.gitmodules b/.gitmodules
index 2bdeeacef8..08b1b48a09 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,66 +1,66 @@
 [submodule "roms/seabios"]
path = roms/seabios
-   url = https://git.qemu.org/git/seabios.git/
+   url = https://gitlab.com/qemu-project/seabios.git/
 [submodule "roms/SLOF"]
path = roms/SLOF
-   url = https://git.qemu.org/git/SLOF.git
+   url = https://gitlab.com/qemu-project/SLOF.git
 [submodule "roms/ipxe"]
path = roms/ipxe
-   url = https://git.qemu.org/git/ipxe.git
+   url = https://gitlab.com/qemu-project/ipxe.git
 [submodule "roms/openbios"]
path = roms/openbios
-   url = https://git.qemu.org/git/openbios.git
+   url = https://gitlab.com/qemu-project/openbios.git
 [submodule "roms/qemu-palcode"]
path = roms/qemu-palcode
-   url = https://git.qemu.org/git/qemu-palcode.git
+   url = https://gitlab.com/qemu-project/qemu-palcode.git
 [submodule "roms/sgabios"]
path = roms/sgabios
-   url = https://git.qemu.org/git/sgabios.git
+   url = https://gitlab.com/qemu-project/sgabios.git
 [submodule "dtc"]
path = dtc
-   url = https://git.qemu.org/git/dtc.git
+   url = https://gitlab.com/qemu-project/dtc.git
 [submodule "roms/u-boot"]
path = roms/u-boot
-   url = https://git.qemu.org/git/u-boot.git
+   url = https://gitlab.com/qemu-project/u-boot.git
 [submodule "roms/skiboot"]
path = roms/skiboot
-   url = https://git.qemu.org/git/skiboot.git
+   url = https://gitlab.com/qemu-project/skiboot.git
 [submodule "roms/QemuMacDrivers"]
path = roms/QemuMacDrivers
-   url = https://git.qemu.org/git/QemuMacDrivers.git
+   url = https://gitlab.com/qemu-project/QemuMacDrivers.git
 [submodule "ui/keycodemapdb"]
path = ui/keycodemapdb
-   url = https://git.qemu.org/git/keycodemapdb.git
+   url = https://gitlab.com/qemu-project/keycodemapdb.git
 [submodule "capstone"]
path = capstone
-   url = https://git.qemu.org/git/capstone.git
+   url = https://gitlab.com/qemu-project/capstone.git
 [submodule "roms/seabios-hppa"]
path = roms/seabios-hppa
-   url = https://git.qemu.org/git/seabios-hppa.git
+   url = https://gitlab.com/qemu-project/seabios-hppa.git
 [submodule "roms/u-boot-sam460ex"]
path = roms/u-boot-sam460ex
-   url = https://git.qemu.org/git/u-boot-sam460ex.git
+   url = https://gitlab.com/qemu-project/u-boot-sam460ex.git
 [submodule "tests/fp/berkeley-testfloat-3"]
path = tests/fp/berkeley-testfloat-3
-   url = https://git.qemu.org/git/berkeley-testfloat-3.git
+   url = https://gitlab.com/qemu-project/berkeley-testfloat-3.git
 [submodule "tests/fp/berkeley-softfloat-3"]
path = tests/fp/berkeley-softfloat-3
-   url = https://git.qemu.org/git/berkeley-softfloat-3.git
+   url = https://gitlab.com/qemu-project/berkeley-softfloat-3.git
 [submodule "roms/edk2"]
path = roms/edk2
-   url = https://git.qemu.org/git/edk2.git
+   url = https://gitlab.com/qemu-project/edk2.git
 [submodule "slirp"]
path = slirp
-   url = https://git.qemu.org/git/libslirp.git
+   url = https://gitlab.com/qemu-project/libslirp.git
 [submodule "roms/opensbi"]
path = roms/opensbi
-   url =   https://git.qemu.org/git/opensbi.git
+   url =   https://gitlab.com/qemu-project/opensbi.git
 [submodule "roms/qboot"]
path = roms/qboot
-   url = https://git.qemu.org/git/qboot.git
+   url = https://gitlab.com/qemu-project/qboot.git
 [submodule "meson"]
path = meson
-   url = https://git.qemu.org/git/meson.git
+   url = https://gitlab.com/qemu-project/meson.git
 [submodule "roms/vbootrom"]
path = roms/vbootrom
-   url = https://git.qemu.org/git/vbootrom.git
+   url = https://gitlab.com/qemu-project/vbootrom.git
-- 
2.29.2



Re: [PATCH v2 00/93] TCI fixes and cleanups

2021-02-04 Thread Peter Maydell
On Thu, 4 Feb 2021 at 01:49, Richard Henderson
 wrote:
>
> Almost 7 years ago I detailed 5 major problems in tci[1], of
> which three still remain:
>
>   * Unaligned accesses to the bytecode stream, which means
> that we immediately SIGBUS on any host requiring alignment.
>   * Non-portable calls to helper functions.
>   * Full of useless ifdefs and TODOs.
>
> To my mind, this means the code is unmaintained, despite what it
> says in MAINTAINERS.  Thus tci *should* be simply removed.
> However, every time removal is suggested, someone comes out of the
> woodwork and says we should keep it, because it's useful for $FOO.

Not listed, but also a problem:
 * it's a configure-time choice, not a runtime choice

(Personally I'm on the "we should just remove it" side.)

thanks
-- PMM



[PULL 03/27] gitlab-ci: remove redundant GitLab repo URL command

2021-02-04 Thread Stefan Hajnoczi
It is no longer necessary to point .gitmodules at GitLab repos when
running in GitLab CI since they are now used all the time.

Signed-off-by: Stefan Hajnoczi 
Reviewed-by: Wainer dos Santos Moschetta 
Reviewed-by: Thomas Huth 
Reviewed-by: Philippe Mathieu-Daudé 
Message-id: 2021015017.156802-4-stefa...@redhat.com
Signed-off-by: Stefan Hajnoczi 
---
 .gitlab-ci.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 7c0db64710..28a83afb91 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -18,7 +18,6 @@ include:
   image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:latest
   before_script:
 - JOBS=$(expr $(nproc) + 1)
-- sed -i s,git.qemu.org/git,gitlab.com/qemu-project, .gitmodules
   script:
 - mkdir build
 - cd build
-- 
2.29.2



[PULL 16/27] multi-process: Initialize message handler in remote device

2021-02-04 Thread Stefan Hajnoczi
From: Jagannathan Raman 

Initializes the message handler function in the remote process. It is
called whenever there's an event pending on QIOChannel that registers
this function.

Signed-off-by: Elena Ufimtseva 
Signed-off-by: John G Johnson 
Signed-off-by: Jagannathan Raman 
Reviewed-by: Stefan Hajnoczi 
Message-id: 
99d38d8b93753a6409ac2340e858858cda59ab1b.1611938319.git.jag.ra...@oracle.com
Signed-off-by: Stefan Hajnoczi 
---
 MAINTAINERS |  1 +
 include/hw/remote/machine.h |  9 ++
 hw/remote/message.c | 57 +
 hw/remote/meson.build   |  1 +
 4 files changed, 68 insertions(+)
 create mode 100644 hw/remote/message.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 97137f617b..771513bc34 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3207,6 +3207,7 @@ F: hw/remote/machine.c
 F: include/hw/remote/machine.h
 F: hw/remote/mpqemu-link.c
 F: include/hw/remote/mpqemu-link.h
+F: hw/remote/message.c
 
 Build and test automation
 -
diff --git a/include/hw/remote/machine.h b/include/hw/remote/machine.h
index bdfbca40b9..b92b2ce705 100644
--- a/include/hw/remote/machine.h
+++ b/include/hw/remote/machine.h
@@ -14,6 +14,7 @@
 #include "qom/object.h"
 #include "hw/boards.h"
 #include "hw/pci-host/remote.h"
+#include "io/channel.h"
 
 struct RemoteMachineState {
 MachineState parent_obj;
@@ -21,7 +22,15 @@ struct RemoteMachineState {
 RemotePCIHost *host;
 };
 
+/* Used to pass to co-routine device and ioc. */
+typedef struct RemoteCommDev {
+PCIDevice *dev;
+QIOChannel *ioc;
+} RemoteCommDev;
+
 #define TYPE_REMOTE_MACHINE "x-remote-machine"
 OBJECT_DECLARE_SIMPLE_TYPE(RemoteMachineState, REMOTE_MACHINE)
 
+void coroutine_fn mpqemu_remote_msg_loop_co(void *data);
+
 #endif
diff --git a/hw/remote/message.c b/hw/remote/message.c
new file mode 100644
index 00..36e2d4fb0c
--- /dev/null
+++ b/hw/remote/message.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright © 2020, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL-v2, version 2 or later.
+ *
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+
+#include "hw/remote/machine.h"
+#include "io/channel.h"
+#include "hw/remote/mpqemu-link.h"
+#include "qapi/error.h"
+#include "sysemu/runstate.h"
+
+void coroutine_fn mpqemu_remote_msg_loop_co(void *data)
+{
+g_autofree RemoteCommDev *com = (RemoteCommDev *)data;
+PCIDevice *pci_dev = NULL;
+Error *local_err = NULL;
+
+assert(com->ioc);
+
+pci_dev = com->dev;
+for (; !local_err;) {
+MPQemuMsg msg = {0};
+
+if (!mpqemu_msg_recv(&msg, com->ioc, &local_err)) {
+break;
+}
+
+if (!mpqemu_msg_valid(&msg)) {
+error_setg(&local_err, "Received invalid message from proxy"
+   "in remote process pid="FMT_pid"",
+   getpid());
+break;
+}
+
+switch (msg.cmd) {
+default:
+error_setg(&local_err,
+   "Unknown command (%d) received for device %s"
+   " (pid="FMT_pid")",
+   msg.cmd, DEVICE(pci_dev)->id, getpid());
+}
+}
+
+if (local_err) {
+error_report_err(local_err);
+qemu_system_shutdown_request(SHUTDOWN_CAUSE_HOST_ERROR);
+} else {
+qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
+}
+}
diff --git a/hw/remote/meson.build b/hw/remote/meson.build
index a2b2fc0e59..9f5c57f35a 100644
--- a/hw/remote/meson.build
+++ b/hw/remote/meson.build
@@ -2,5 +2,6 @@ remote_ss = ss.source_set()
 
 remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('machine.c'))
 remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('mpqemu-link.c'))
+remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('message.c'))
 
 softmmu_ss.add_all(when: 'CONFIG_MULTIPROCESS', if_true: remote_ss)
-- 
2.29.2



[PULL 05/27] pc-bios: update mirror URLs to GitLab

2021-02-04 Thread Stefan Hajnoczi
qemu.org is running out of bandwidth and the QEMU project is moving
towards a gating CI on GitLab. Use the GitLab repos instead of qemu.org
(they will become mirrors).

Signed-off-by: Stefan Hajnoczi 
Reviewed-by: Wainer dos Santos Moschetta 
Reviewed-by: Thomas Huth 
Reviewed-by: Philippe Mathieu-Daudé 
Message-id: 2021015017.156802-6-stefa...@redhat.com
Signed-off-by: Stefan Hajnoczi 
---
 pc-bios/README | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pc-bios/README b/pc-bios/README
index 33f9754ad3..db7129ef64 100644
--- a/pc-bios/README
+++ b/pc-bios/README
@@ -20,7 +20,7 @@
   legacy x86 software to communicate with an attached serial console as
   if a video card were attached.  The master sources reside in a subversion
   repository at http://sgabios.googlecode.com/svn/trunk.  A git mirror is
-  available at https://git.qemu.org/git/sgabios.git.
+  available at https://gitlab.com/qemu-project/sgabios.git.
 
 - The PXE roms come from the iPXE project. Built with BANNER_TIME 0.
   Sources available at http://ipxe.org.  Vendor:Device ID -> ROM mapping:
@@ -37,7 +37,7 @@
 
 - The u-boot binary for e500 comes from the upstream denx u-boot project where
   it was compiled using the qemu-ppce500 target.
-  A git mirror is available at: https://git.qemu.org/git/u-boot.git
+  A git mirror is available at: https://gitlab.com/qemu-project/u-boot.git
   The hash used to compile the current version is: 2072e72
 
 - Skiboot (https://github.com/open-power/skiboot/) is an OPAL
-- 
2.29.2



[PULL 11/27] multi-process: setup PCI host bridge for remote device

2021-02-04 Thread Stefan Hajnoczi
From: Jagannathan Raman 

PCI host bridge is setup for the remote device process. It is
implemented using remote-pcihost object. It is an extension of the PCI
host bridge setup by QEMU.
Remote-pcihost configures a PCI bus which could be used by the remote
PCI device to latch on to.

Signed-off-by: Jagannathan Raman 
Signed-off-by: John G Johnson 
Signed-off-by: Elena Ufimtseva 
Reviewed-by: Stefan Hajnoczi 
Message-id: 
0871ba857abb2eafacde07e7fe66a3f12415bfb2.1611938319.git.jag.ra...@oracle.com
Signed-off-by: Stefan Hajnoczi 
---
 MAINTAINERS  |  2 +
 include/hw/pci-host/remote.h | 29 ++
 hw/pci-host/remote.c | 75 
 hw/pci-host/Kconfig  |  3 ++
 hw/pci-host/meson.build  |  1 +
 hw/remote/Kconfig|  1 +
 6 files changed, 111 insertions(+)
 create mode 100644 include/hw/pci-host/remote.h
 create mode 100644 hw/pci-host/remote.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 9d2fe7f8db..45e777bc55 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3201,6 +3201,8 @@ M: John G Johnson 
 S: Maintained
 F: docs/devel/multi-process.rst
 F: docs/system/multi-process.rst
+F: hw/pci-host/remote.c
+F: include/hw/pci-host/remote.h
 
 Build and test automation
 -
diff --git a/include/hw/pci-host/remote.h b/include/hw/pci-host/remote.h
new file mode 100644
index 00..06b8a83a4b
--- /dev/null
+++ b/include/hw/pci-host/remote.h
@@ -0,0 +1,29 @@
+/*
+ * PCI Host for remote device
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef REMOTE_PCIHOST_H
+#define REMOTE_PCIHOST_H
+
+#include "exec/memory.h"
+#include "hw/pci/pcie_host.h"
+
+#define TYPE_REMOTE_PCIHOST "remote-pcihost"
+OBJECT_DECLARE_SIMPLE_TYPE(RemotePCIHost, REMOTE_PCIHOST)
+
+struct RemotePCIHost {
+/*< private >*/
+PCIExpressHost parent_obj;
+/*< public >*/
+
+MemoryRegion *mr_pci_mem;
+MemoryRegion *mr_sys_io;
+};
+
+#endif
diff --git a/hw/pci-host/remote.c b/hw/pci-host/remote.c
new file mode 100644
index 00..eee45444ef
--- /dev/null
+++ b/hw/pci-host/remote.c
@@ -0,0 +1,75 @@
+/*
+ * Remote PCI host device
+ *
+ * Unlike PCI host devices that model physical hardware, the purpose
+ * of this PCI host is to host multi-process QEMU devices.
+ *
+ * Multi-process QEMU extends the PCI host of a QEMU machine into a
+ * remote process. Any PCI device attached to the remote process is
+ * visible in the QEMU guest. This allows existing QEMU device models
+ * to be reused in the remote process.
+ *
+ * This PCI host is purely a container for PCI devices. It's fake in the
+ * sense that the guest never sees this PCI host and has no way of
+ * accessing it. Its job is just to provide the environment that QEMU
+ * PCI device models need when running in a remote process.
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+
+#include "hw/pci/pci.h"
+#include "hw/pci/pci_host.h"
+#include "hw/pci/pcie_host.h"
+#include "hw/qdev-properties.h"
+#include "hw/pci-host/remote.h"
+#include "exec/memory.h"
+
+static const char *remote_pcihost_root_bus_path(PCIHostState *host_bridge,
+PCIBus *rootbus)
+{
+return ":00";
+}
+
+static void remote_pcihost_realize(DeviceState *dev, Error **errp)
+{
+PCIHostState *pci = PCI_HOST_BRIDGE(dev);
+RemotePCIHost *s = REMOTE_PCIHOST(dev);
+
+pci->bus = pci_root_bus_new(DEVICE(s), "remote-pci",
+s->mr_pci_mem, s->mr_sys_io,
+0, TYPE_PCIE_BUS);
+}
+
+static void remote_pcihost_class_init(ObjectClass *klass, void *data)
+{
+DeviceClass *dc = DEVICE_CLASS(klass);
+PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass);
+
+hc->root_bus_path = remote_pcihost_root_bus_path;
+dc->realize = remote_pcihost_realize;
+
+dc->user_creatable = false;
+set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
+dc->fw_name = "pci";
+}
+
+static const TypeInfo remote_pcihost_info = {
+.name = TYPE_REMOTE_PCIHOST,
+.parent = TYPE_PCIE_HOST_BRIDGE,
+.instance_size = sizeof(RemotePCIHost),
+.class_init = remote_pcihost_class_init,
+};
+
+static void remote_pcihost_register(void)
+{
+type_register_static(&remote_pcihost_info);
+}
+
+type_init(remote_pcihost_register)
diff --git a/hw/pci-host/Kconfig b/hw/pci-host/Kconfig
index eb03f0489d..8b8c763c28 100644
--- a/hw/pci-host/Kconfig
+++ b/hw/pci-host/Kconfig
@@ -65,3 +65,6 @@ config PCI_POWERNV
 select PCI_EXPRESS
 select MSI_NONBROKEN
 select PCIE_PORT
+
+config REMOTE_PCIHOST
+bool
diff --git a/hw/pci-host/meson.build b

[PULL 04/27] docs: update README to use GitLab repo URLs

2021-02-04 Thread Stefan Hajnoczi
qemu.org is running out of bandwidth and the QEMU project is moving
towards a gating CI on GitLab. Use the GitLab repos instead of qemu.org
(they will become mirrors).

Signed-off-by: Stefan Hajnoczi 
Reviewed-by: Wainer dos Santos Moschetta 
Reviewed-by: Thomas Huth 
Reviewed-by: Philippe Mathieu-Daudé 
Message-id: 2021015017.156802-5-stefa...@redhat.com
Signed-off-by: Stefan Hajnoczi 
---
 README.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.rst b/README.rst
index 58b9f2dc15..ce39d89077 100644
--- a/README.rst
+++ b/README.rst
@@ -60,7 +60,7 @@ The QEMU source code is maintained under the GIT version 
control system.
 
 .. code-block:: shell
 
-   git clone https://git.qemu.org/git/qemu.git
+   git clone https://gitlab.com/qemu-project/qemu.git
 
 When submitting patches, one common approach is to use 'git
 format-patch' and/or 'git send-email' to format & send the mail to the
@@ -78,7 +78,7 @@ The QEMU website is also maintained under source control.
 
 .. code-block:: shell
 
-  git clone https://git.qemu.org/git/qemu-web.git
+  git clone https://gitlab.com/qemu-project/qemu-web.git
 
 * ``_
 
-- 
2.29.2



[PULL 26/27] multi-process: perform device reset in the remote process

2021-02-04 Thread Stefan Hajnoczi
From: Elena Ufimtseva 

Perform device reset in the remote process when QEMU performs
device reset. This is required to reset the internal state
(like registers, etc...) of emulated devices

Signed-off-by: Elena Ufimtseva 
Signed-off-by: John G Johnson 
Signed-off-by: Jagannathan Raman 
Reviewed-by: Stefan Hajnoczi 
Message-id: 
7cb220a51f565dc0817bd76e2f540e89c2d2b850.1611938319.git.jag.ra...@oracle.com
Signed-off-by: Stefan Hajnoczi 
---
 include/hw/remote/mpqemu-link.h |  1 +
 hw/remote/message.c | 22 ++
 hw/remote/proxy.c   | 19 +++
 3 files changed, 42 insertions(+)

diff --git a/include/hw/remote/mpqemu-link.h b/include/hw/remote/mpqemu-link.h
index 71d206f00e..4ec0915885 100644
--- a/include/hw/remote/mpqemu-link.h
+++ b/include/hw/remote/mpqemu-link.h
@@ -40,6 +40,7 @@ typedef enum {
 MPQEMU_CMD_BAR_WRITE,
 MPQEMU_CMD_BAR_READ,
 MPQEMU_CMD_SET_IRQFD,
+MPQEMU_CMD_DEVICE_RESET,
 MPQEMU_CMD_MAX,
 } MPQemuCmd;
 
diff --git a/hw/remote/message.c b/hw/remote/message.c
index adab040ca1..11d729845c 100644
--- a/hw/remote/message.c
+++ b/hw/remote/message.c
@@ -19,6 +19,7 @@
 #include "exec/memattrs.h"
 #include "hw/remote/memory.h"
 #include "hw/remote/iohub.h"
+#include "sysemu/reset.h"
 
 static void process_config_write(QIOChannel *ioc, PCIDevice *dev,
  MPQemuMsg *msg, Error **errp);
@@ -26,6 +27,8 @@ static void process_config_read(QIOChannel *ioc, PCIDevice 
*dev,
 MPQemuMsg *msg, Error **errp);
 static void process_bar_write(QIOChannel *ioc, MPQemuMsg *msg, Error **errp);
 static void process_bar_read(QIOChannel *ioc, MPQemuMsg *msg, Error **errp);
+static void process_device_reset_msg(QIOChannel *ioc, PCIDevice *dev,
+ Error **errp);
 
 void coroutine_fn mpqemu_remote_msg_loop_co(void *data)
 {
@@ -69,6 +72,9 @@ void coroutine_fn mpqemu_remote_msg_loop_co(void *data)
 case MPQEMU_CMD_SET_IRQFD:
 process_set_irqfd_msg(pci_dev, &msg);
 break;
+case MPQEMU_CMD_DEVICE_RESET:
+process_device_reset_msg(com->ioc, pci_dev, &local_err);
+break;
 default:
 error_setg(&local_err,
"Unknown command (%d) received for device %s"
@@ -206,3 +212,19 @@ fail:
   getpid());
 }
 }
+
+static void process_device_reset_msg(QIOChannel *ioc, PCIDevice *dev,
+ Error **errp)
+{
+DeviceClass *dc = DEVICE_GET_CLASS(dev);
+DeviceState *s = DEVICE(dev);
+MPQemuMsg ret = { 0 };
+
+if (dc->reset) {
+dc->reset(s);
+}
+
+ret.cmd = MPQEMU_CMD_RET;
+
+mpqemu_msg_send(&ret, ioc, errp);
+}
diff --git a/hw/remote/proxy.c b/hw/remote/proxy.c
index a082709881..4fa4be079d 100644
--- a/hw/remote/proxy.c
+++ b/hw/remote/proxy.c
@@ -26,6 +26,7 @@
 #include "util/event_notifier-posix.c"
 
 static void probe_pci_info(PCIDevice *dev, Error **errp);
+static void proxy_device_reset(DeviceState *dev);
 
 static void proxy_intx_update(PCIDevice *pci_dev)
 {
@@ -202,6 +203,8 @@ static void pci_proxy_dev_class_init(ObjectClass *klass, 
void *data)
 k->config_read = pci_proxy_read_config;
 k->config_write = pci_proxy_write_config;
 
+dc->reset = proxy_device_reset;
+
 device_class_set_props(dc, proxy_properties);
 }
 
@@ -358,3 +361,19 @@ static void probe_pci_info(PCIDevice *dev, Error **errp)
 }
 }
 }
+
+static void proxy_device_reset(DeviceState *dev)
+{
+PCIProxyDev *pdev = PCI_PROXY_DEV(dev);
+MPQemuMsg msg = { 0 };
+Error *local_err = NULL;
+
+msg.cmd = MPQEMU_CMD_DEVICE_RESET;
+msg.size = 0;
+
+mpqemu_msg_send_and_await_reply(&msg, pdev, &local_err);
+if (local_err) {
+error_report_err(local_err);
+}
+
+}
-- 
2.29.2



[PULL 08/27] multi-process: add configure and usage information

2021-02-04 Thread Stefan Hajnoczi
From: Elena Ufimtseva 

Adds documentation explaining the command-line arguments needed
to use multi-process.

Signed-off-by: Elena Ufimtseva 
Signed-off-by: Jagannathan Raman 
Signed-off-by: John G Johnson 
Reviewed-by: Stefan Hajnoczi 
Message-id: 
49f757a84e5dd6fae14b22544897d1124c5fdbad.1611938319.git.jag.ra...@oracle.com

[Move orphan docs/multi-process.rst document into docs/system/ and add
it to index.rst to prevent Sphinx "document isn't included in any
toctree" error.
--Stefan]

Signed-off-by: Stefan Hajnoczi 
---
 MAINTAINERS   |  1 +
 docs/system/index.rst |  1 +
 docs/system/multi-process.rst | 64 +++
 3 files changed, 66 insertions(+)
 create mode 100644 docs/system/multi-process.rst

diff --git a/MAINTAINERS b/MAINTAINERS
index 2ff1ead4ab..9d2fe7f8db 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3200,6 +3200,7 @@ M: Jagannathan Raman 
 M: John G Johnson 
 S: Maintained
 F: docs/devel/multi-process.rst
+F: docs/system/multi-process.rst
 
 Build and test automation
 -
diff --git a/docs/system/index.rst b/docs/system/index.rst
index d40f72c92b..625b494372 100644
--- a/docs/system/index.rst
+++ b/docs/system/index.rst
@@ -34,6 +34,7 @@ Contents:
pr-manager
targets
security
+   multi-process
deprecated
removed-features
build-platforms
diff --git a/docs/system/multi-process.rst b/docs/system/multi-process.rst
new file mode 100644
index 00..46bb0cafc2
--- /dev/null
+++ b/docs/system/multi-process.rst
@@ -0,0 +1,64 @@
+Multi-process QEMU
+==
+
+This document describes how to configure and use multi-process qemu.
+For the design document refer to docs/devel/qemu-multiprocess.
+
+1) Configuration
+
+
+multi-process is enabled by default for targets that enable KVM
+
+
+2) Usage
+
+
+Multi-process QEMU requires an orchestrator to launch.
+
+Following is a description of command-line used to launch mpqemu.
+
+* Orchestrator:
+
+  - The Orchestrator creates a unix socketpair
+
+  - It launches the remote process and passes one of the
+sockets to it via command-line.
+
+  - It then launches QEMU and specifies the other socket as an option
+to the Proxy device object
+
+* Remote Process:
+
+  - QEMU can enter remote process mode by using the "remote" machine
+option.
+
+  - The orchestrator creates a "remote-object" with details about
+the device and the file descriptor for the device
+
+  - The remaining options are no different from how one launches QEMU with
+devices.
+
+  - Example command-line for the remote process is as follows:
+
+  /usr/bin/qemu-system-x86_64\
+  -machine x-remote  \
+  -device lsi53c895a,id=lsi0 \
+  -drive id=drive_image2,file=/build/ol7-nvme-test-1.qcow2   \
+  -device scsi-hd,id=drive2,drive=drive_image2,bus=lsi0.0,scsi-id=0  \
+  -object x-remote-object,id=robj1,devid=lsi1,fd=4,
+
+* QEMU:
+
+  - Since parts of the RAM are shared between QEMU & remote process, a
+memory-backend-memfd is required to facilitate this, as follows:
+
+-object memory-backend-memfd,id=mem,size=2G
+
+  - A "x-pci-proxy-dev" device is created for each of the PCI devices emulated
+in the remote process. A "socket" sub-option specifies the other end of
+unix channel created by orchestrator. The "id" sub-option must be specified
+and should be the same as the "id" specified for the remote PCI device
+
+  - Example commandline for QEMU is as follows:
+
+  -device x-pci-proxy-dev,id=lsi0,socket=3
-- 
2.29.2



[PULL 10/27] multi-process: Add config option for multi-process QEMU

2021-02-04 Thread Stefan Hajnoczi
From: Jagannathan Raman 

Add configuration options to enable or disable multiprocess QEMU code

Signed-off-by: John G Johnson 
Signed-off-by: Jagannathan Raman 
Signed-off-by: Elena Ufimtseva 
Reviewed-by: Stefan Hajnoczi 
Message-id: 
6cc37253e35418ebd7b675a31a3df6e3c7a12dc1.1611938319.git.jag.ra...@oracle.com
Signed-off-by: Stefan Hajnoczi 
---
 configure | 10 ++
 meson.build   |  4 +++-
 Kconfig.host  |  4 
 hw/Kconfig|  1 +
 hw/remote/Kconfig |  3 +++
 5 files changed, 21 insertions(+), 1 deletion(-)
 create mode 100644 hw/remote/Kconfig

diff --git a/configure b/configure
index e85d6baf8f..a73869860b 100755
--- a/configure
+++ b/configure
@@ -463,6 +463,7 @@ skip_meson=no
 gettext="auto"
 fuse="auto"
 fuse_lseek="auto"
+multiprocess="no"
 
 malloc_trim="auto"
 
@@ -797,6 +798,7 @@ Linux)
   linux="yes"
   linux_user="yes"
   vhost_user=${default_feature:-yes}
+  multiprocess=${default_feature:-yes}
 ;;
 esac
 
@@ -1556,6 +1558,10 @@ for opt do
   ;;
   --disable-fuse-lseek) fuse_lseek="disabled"
   ;;
+  --enable-multiprocess) multiprocess="yes"
+  ;;
+  --disable-multiprocess) multiprocess="no"
+  ;;
   *)
   echo "ERROR: unknown option $opt"
   echo "Try '$0 --help' for more information"
@@ -1908,6 +1914,7 @@ disabled with --disable-FEATURE, default is enabled if 
available
   libdaxctl   libdaxctl support
   fuseFUSE block device export
   fuse-lseek  SEEK_HOLE/SEEK_DATA support for FUSE exports
+  multiprocessMultiprocess QEMU support
 
 NOTE: The object files are built at the place where configure is launched
 EOF
@@ -6082,6 +6089,9 @@ fi
 if test "$have_mlockall" = "yes" ; then
   echo "HAVE_MLOCKALL=y" >> $config_host_mak
 fi
+if test "$multiprocess" = "yes" ; then
+  echo "CONFIG_MULTIPROCESS_ALLOWED=y" >> $config_host_mak
+fi
 if test "$fuzzing" = "yes" ; then
   # If LIB_FUZZING_ENGINE is set, assume we are running on OSS-Fuzz, and the
   # needed CFLAGS have already been provided
diff --git a/meson.build b/meson.build
index 2d8b433ff0..7a7283a97d 100644
--- a/meson.build
+++ b/meson.build
@@ -1210,7 +1210,8 @@ host_kconfig = \
   ('CONFIG_VHOST_KERNEL' in config_host ? ['CONFIG_VHOST_KERNEL=y'] : []) + \
   (have_virtfs ? ['CONFIG_VIRTFS=y'] : []) + \
   ('CONFIG_LINUX' in config_host ? ['CONFIG_LINUX=y'] : []) + \
-  ('CONFIG_PVRDMA' in config_host ? ['CONFIG_PVRDMA=y'] : [])
+  ('CONFIG_PVRDMA' in config_host ? ['CONFIG_PVRDMA=y'] : []) + \
+  ('CONFIG_MULTIPROCESS_ALLOWED' in config_host ? 
['CONFIG_MULTIPROCESS_ALLOWED=y'] : [])
 
 ignored = [ 'TARGET_XML_FILES', 'TARGET_ABI_DIR', 'TARGET_ARCH' ]
 
@@ -2626,6 +2627,7 @@ summary_info += {'libpmem support':   
config_host.has_key('CONFIG_LIBPMEM')}
 summary_info += {'libdaxctl support': config_host.has_key('CONFIG_LIBDAXCTL')}
 summary_info += {'libudev':   libudev.found()}
 summary_info += {'FUSE lseek':fuse_lseek.found()}
+summary_info += {'Multiprocess QEMU': 
config_host.has_key('CONFIG_MULTIPROCESS_ALLOWED')}
 summary(summary_info, bool_yn: true, section: 'Dependencies')
 
 if not supported_cpus.contains(cpu)
diff --git a/Kconfig.host b/Kconfig.host
index a9a55a9c31..24255ef441 100644
--- a/Kconfig.host
+++ b/Kconfig.host
@@ -37,3 +37,7 @@ config VIRTFS
 
 config PVRDMA
 bool
+
+config MULTIPROCESS_ALLOWED
+bool
+imply MULTIPROCESS
diff --git a/hw/Kconfig b/hw/Kconfig
index 5ad3c6b5a4..525fb52506 100644
--- a/hw/Kconfig
+++ b/hw/Kconfig
@@ -27,6 +27,7 @@ source pci-host/Kconfig
 source pcmcia/Kconfig
 source pci/Kconfig
 source rdma/Kconfig
+source remote/Kconfig
 source rtc/Kconfig
 source scsi/Kconfig
 source sd/Kconfig
diff --git a/hw/remote/Kconfig b/hw/remote/Kconfig
new file mode 100644
index 00..54844467a0
--- /dev/null
+++ b/hw/remote/Kconfig
@@ -0,0 +1,3 @@
+config MULTIPROCESS
+bool
+depends on PCI && KVM
-- 
2.29.2



[PULL 06/27] get_maintainer: update repo URL to GitLab

2021-02-04 Thread Stefan Hajnoczi
qemu.org is running out of bandwidth and the QEMU project is moving
towards a gating CI on GitLab. Use the GitLab repos instead of qemu.org
(they will become mirrors).

Signed-off-by: Stefan Hajnoczi 
Reviewed-by: Wainer dos Santos Moschetta 
Reviewed-by: Thomas Huth 
Reviewed-by: Philippe Mathieu-Daudé 
Message-id: 2021015017.156802-7-stefa...@redhat.com
Signed-off-by: Stefan Hajnoczi 
---
 scripts/get_maintainer.pl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
index 271f5ff42a..e5499b94b4 100755
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -1377,7 +1377,7 @@ sub vcs_exists {
warn("$P: No supported VCS found.  Add --nogit to options?\n");
warn("Using a git repository produces better results.\n");
warn("Try latest git repository using:\n");
-   warn("git clone https://git.qemu.org/git/qemu.git\n";);
+   warn("git clone https://gitlab.com/qemu-project/qemu.git\n";);
$printed_novcs = 1;
 }
 return 0;
-- 
2.29.2



Re: gitlab containers are broken

2021-02-04 Thread Daniel P . Berrangé
On Thu, Feb 04, 2021 at 09:08:13AM +0100, Thomas Huth wrote:
> On 04/02/2021 07.27, Richard Henderson wrote:
> > On 2/3/21 8:03 PM, Thomas Huth wrote:
> > > On 04/02/2021 00.04, Richard Henderson wrote:
> > > > Something has gone wrong with the building of the containers
> > > > in gitlab, because *all* off them are installing Alpine Linux.
> > > > 
> > > > https://gitlab.com/rth7680/qemu/-/jobs/1006336396#L155
> > > 
> > > I think that's ok ... the output about alpine that you see there is just 
> > > the
> > > output from the container that builds the final container. Later you can 
> > > see
> > > some "yum install" lines in that output, too, that's where the CentOS 
> > > container
> > > gets build. And the final compilation job runs on CentOS, too:
> > > 
> > >   https://gitlab.com/rth7680/qemu/-/jobs/1006336699#L35
> > > 
> > > (look for the string "Red Hat" there)
> > 
> > Hmm.  Is there any way to get the full output of the container build?  At
> > present it's being truncated:
> > 
> > #7 [4/5] RUN yum install -y bzip2 bzip2-devel ccache 
> > csnappy-de...
> > 
> > 
> > In particular, I'm trying to add a new test, and I have added 
> > libffi-devel.i686
> > to the fedora-i386-cross.docker file, but then the actual build fails 
> > because
> > the libffi header file is missing.
> > 
> > I know you may need the actual patch to comment, but pointers to how to 
> > debug
> > this sort of failure are welcome.
> 
> I don't have a clue, all that container magic has been done by Daniel
> initially - maybe he can help (now on CC:) ...

I honestly never bother with the local QEMU container build sripts as
they've over-engineered IMHO.  If I need to debug something locally
I'd just do  "podman build -f path/to/docker-file /path/to/docker-dir".

Alternatively just make your changes to the dockerfiles and thne push
the branch to gitlab. Gitlab will run the build and you can pull down
the docker image from your fork's docker registry


Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




[PULL 18/27] multi-process: setup memory manager for remote device

2021-02-04 Thread Stefan Hajnoczi
From: Jagannathan Raman 

SyncSysMemMsg message format is defined. It is used to send
file descriptors of the RAM regions to remote device.
RAM on the remote device is configured with a set of file descriptors.
Old RAM regions are deleted and new regions, each with an fd, is
added to the RAM.

Signed-off-by: Jagannathan Raman 
Signed-off-by: John G Johnson 
Signed-off-by: Elena Ufimtseva 
Reviewed-by: Stefan Hajnoczi 
Message-id: 
7d2d1831d812e85f681e7a8ab99e032cf4704689.1611938319.git.jag.ra...@oracle.com
Signed-off-by: Stefan Hajnoczi 
---
 MAINTAINERS |  2 +
 include/hw/remote/memory.h  | 19 ++
 include/hw/remote/mpqemu-link.h | 10 +
 hw/remote/memory.c  | 65 +
 hw/remote/mpqemu-link.c | 11 ++
 hw/remote/meson.build   |  2 +
 6 files changed, 109 insertions(+)
 create mode 100644 include/hw/remote/memory.h
 create mode 100644 hw/remote/memory.c

diff --git a/MAINTAINERS b/MAINTAINERS
index e37fc4b226..88732e51a2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3209,6 +3209,8 @@ F: hw/remote/mpqemu-link.c
 F: include/hw/remote/mpqemu-link.h
 F: hw/remote/message.c
 F: hw/remote/remote-obj.c
+F: include/hw/remote/memory.h
+F: hw/remote/memory.c
 
 Build and test automation
 -
diff --git a/include/hw/remote/memory.h b/include/hw/remote/memory.h
new file mode 100644
index 00..bc2e30945f
--- /dev/null
+++ b/include/hw/remote/memory.h
@@ -0,0 +1,19 @@
+/*
+ * Memory manager for remote device
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef REMOTE_MEMORY_H
+#define REMOTE_MEMORY_H
+
+#include "exec/hwaddr.h"
+#include "hw/remote/mpqemu-link.h"
+
+void remote_sysmem_reconfig(MPQemuMsg *msg, Error **errp);
+
+#endif
diff --git a/include/hw/remote/mpqemu-link.h b/include/hw/remote/mpqemu-link.h
index cac699cb42..6ee5bc5751 100644
--- a/include/hw/remote/mpqemu-link.h
+++ b/include/hw/remote/mpqemu-link.h
@@ -14,6 +14,7 @@
 #include "qom/object.h"
 #include "qemu/thread.h"
 #include "io/channel.h"
+#include "exec/hwaddr.h"
 
 #define REMOTE_MAX_FDS 8
 
@@ -30,9 +31,16 @@
  *
  */
 typedef enum {
+MPQEMU_CMD_SYNC_SYSMEM,
 MPQEMU_CMD_MAX,
 } MPQemuCmd;
 
+typedef struct {
+hwaddr gpas[REMOTE_MAX_FDS];
+uint64_t sizes[REMOTE_MAX_FDS];
+off_t offsets[REMOTE_MAX_FDS];
+} SyncSysmemMsg;
+
 /**
  * MPQemuMsg:
  * @cmd: The remote command
@@ -43,12 +51,14 @@ typedef enum {
  * MPQemuMsg Format of the message sent to the remote device from QEMU.
  *
  */
+
 typedef struct {
 int cmd;
 size_t size;
 
 union {
 uint64_t u64;
+SyncSysmemMsg sync_sysmem;
 } data;
 
 int fds[REMOTE_MAX_FDS];
diff --git a/hw/remote/memory.c b/hw/remote/memory.c
new file mode 100644
index 00..32085b1e05
--- /dev/null
+++ b/hw/remote/memory.c
@@ -0,0 +1,65 @@
+/*
+ * Memory manager for remote device
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+
+#include "hw/remote/memory.h"
+#include "exec/address-spaces.h"
+#include "exec/ram_addr.h"
+#include "qapi/error.h"
+
+static void remote_sysmem_reset(void)
+{
+MemoryRegion *sysmem, *subregion, *next;
+
+sysmem = get_system_memory();
+
+QTAILQ_FOREACH_SAFE(subregion, &sysmem->subregions, subregions_link, next) 
{
+if (subregion->ram) {
+memory_region_del_subregion(sysmem, subregion);
+object_unparent(OBJECT(subregion));
+}
+}
+}
+
+void remote_sysmem_reconfig(MPQemuMsg *msg, Error **errp)
+{
+ERRP_GUARD();
+SyncSysmemMsg *sysmem_info = &msg->data.sync_sysmem;
+MemoryRegion *sysmem, *subregion;
+static unsigned int suffix;
+int region;
+
+sysmem = get_system_memory();
+
+remote_sysmem_reset();
+
+for (region = 0; region < msg->num_fds; region++) {
+g_autofree char *name;
+subregion = g_new(MemoryRegion, 1);
+name = g_strdup_printf("remote-mem-%u", suffix++);
+memory_region_init_ram_from_fd(subregion, NULL,
+   name, sysmem_info->sizes[region],
+   true, msg->fds[region],
+   sysmem_info->offsets[region],
+   errp);
+
+if (*errp) {
+g_free(subregion);
+remote_sysmem_reset();
+return;
+}
+
+memory_region_add_subregion(sysmem, sysmem_info->gpas[region],
+subregion);
+
+}
+}
diff --git a/hw/remote/mpqemu-link.c b/hw/remote/mpqemu-link.c
index b3d380e42b..4b25649b39 100644
--- a/hw/remote/

[PULL 07/27] multi-process: add the concept description to docs/devel/qemu-multiprocess

2021-02-04 Thread Stefan Hajnoczi
From: John G Johnson 

Signed-off-by: John G Johnson 
Signed-off-by: Elena Ufimtseva 
Signed-off-by: Jagannathan Raman 
Reviewed-by: Stefan Hajnoczi 
Message-id: 
02a68adef99f5df6a380bf8fd7b90948777e411c.1611938319.git.jag.ra...@oracle.com
Signed-off-by: Stefan Hajnoczi 
---
 MAINTAINERS  |   7 +
 docs/devel/index.rst |   1 +
 docs/devel/multi-process.rst | 966 +++
 3 files changed, 974 insertions(+)
 create mode 100644 docs/devel/multi-process.rst

diff --git a/MAINTAINERS b/MAINTAINERS
index 00626941f1..2ff1ead4ab 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3194,6 +3194,13 @@ S: Maintained
 F: hw/semihosting/
 F: include/hw/semihosting/
 
+Multi-process QEMU
+M: Elena Ufimtseva 
+M: Jagannathan Raman 
+M: John G Johnson 
+S: Maintained
+F: docs/devel/multi-process.rst
+
 Build and test automation
 -
 Build and test automation
diff --git a/docs/devel/index.rst b/docs/devel/index.rst
index 98a7016a9b..22854e334d 100644
--- a/docs/devel/index.rst
+++ b/docs/devel/index.rst
@@ -37,3 +37,4 @@ Contents:
clocks
qom
block-coroutine-wrapper
+   multi-process
diff --git a/docs/devel/multi-process.rst b/docs/devel/multi-process.rst
new file mode 100644
index 00..69699329d6
--- /dev/null
+++ b/docs/devel/multi-process.rst
@@ -0,0 +1,966 @@
+This is the design document for multi-process QEMU. It does not
+necessarily reflect the status of the current implementation, which
+may lack features or be considerably different from what is described
+in this document. This document is still useful as a description of
+the goals and general direction of this feature.
+
+Please refer to the following wiki for latest details:
+https://wiki.qemu.org/Features/MultiProcessQEMU
+
+Multi-process QEMU
+===
+
+QEMU is often used as the hypervisor for virtual machines running in the
+Oracle cloud. Since one of the advantages of cloud computing is the
+ability to run many VMs from different tenants in the same cloud
+infrastructure, a guest that compromised its hypervisor could
+potentially use the hypervisor's access privileges to access data it is
+not authorized for.
+
+QEMU can be susceptible to security attacks because it is a large,
+monolithic program that provides many features to the VMs it services.
+Many of these features can be configured out of QEMU, but even a reduced
+configuration QEMU has a large amount of code a guest can potentially
+attack. Separating QEMU reduces the attack surface by aiding to
+limit each component in the system to only access the resources that
+it needs to perform its job.
+
+QEMU services
+-
+
+QEMU can be broadly described as providing three main services. One is a
+VM control point, where VMs can be created, migrated, re-configured, and
+destroyed. A second is to emulate the CPU instructions within the VM,
+often accelerated by HW virtualization features such as Intel's VT
+extensions. Finally, it provides IO services to the VM by emulating HW
+IO devices, such as disk and network devices.
+
+A multi-process QEMU
+
+
+A multi-process QEMU involves separating QEMU services into separate
+host processes. Each of these processes can be given only the privileges
+it needs to provide its service, e.g., a disk service could be given
+access only to the disk images it provides, and not be allowed to
+access other files, or any network devices. An attacker who compromised
+this service would not be able to use this exploit to access files or
+devices beyond what the disk service was given access to.
+
+A QEMU control process would remain, but in multi-process mode, will
+have no direct interfaces to the VM. During VM execution, it would still
+provide the user interface to hot-plug devices or live migrate the VM.
+
+A first step in creating a multi-process QEMU is to separate IO services
+from the main QEMU program, which would continue to provide CPU
+emulation. i.e., the control process would also be the CPU emulation
+process. In a later phase, CPU emulation could be separated from the
+control process.
+
+Separating IO services
+--
+
+Separating IO services into individual host processes is a good place to
+begin for a couple of reasons. One is the sheer number of IO devices QEMU
+can emulate provides a large surface of interfaces which could potentially
+be exploited, and, indeed, have been a source of exploits in the past.
+Another is the modular nature of QEMU device emulation code provides
+interface points where the QEMU functions that perform device emulation
+can be separated from the QEMU functions that manage the emulation of
+guest CPU instructions. The devices emulated in the separate process are
+referred to as remote devices.
+
+QEMU device emulation
+~
+
+QEMU uses an object oriented SW architecture for device emulation code.
+Configured objects are all compiled into the QEMU binary, then objects

[PULL 12/27] multi-process: setup a machine object for remote device process

2021-02-04 Thread Stefan Hajnoczi
From: Jagannathan Raman 

x-remote-machine object sets up various subsystems of the remote
device process. Instantiate PCI host bridge object and initialize RAM, IO &
PCI memory regions.

Signed-off-by: John G Johnson 
Signed-off-by: Jagannathan Raman 
Signed-off-by: Elena Ufimtseva 
Reviewed-by: Stefan Hajnoczi 
Message-id: 
c537f38d17f90453ca610c6b70cf3480274e0ba1.1611938319.git.jag.ra...@oracle.com
Signed-off-by: Stefan Hajnoczi 
---
 MAINTAINERS  |  2 ++
 include/hw/pci-host/remote.h |  1 +
 include/hw/remote/machine.h  | 27 ++
 hw/remote/machine.c  | 70 
 hw/meson.build   |  1 +
 hw/remote/meson.build|  5 +++
 6 files changed, 106 insertions(+)
 create mode 100644 include/hw/remote/machine.h
 create mode 100644 hw/remote/machine.c
 create mode 100644 hw/remote/meson.build

diff --git a/MAINTAINERS b/MAINTAINERS
index 45e777bc55..45979452ed 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3203,6 +3203,8 @@ F: docs/devel/multi-process.rst
 F: docs/system/multi-process.rst
 F: hw/pci-host/remote.c
 F: include/hw/pci-host/remote.h
+F: hw/remote/machine.c
+F: include/hw/remote/machine.h
 
 Build and test automation
 -
diff --git a/include/hw/pci-host/remote.h b/include/hw/pci-host/remote.h
index 06b8a83a4b..3dcf6aa51d 100644
--- a/include/hw/pci-host/remote.h
+++ b/include/hw/pci-host/remote.h
@@ -24,6 +24,7 @@ struct RemotePCIHost {
 
 MemoryRegion *mr_pci_mem;
 MemoryRegion *mr_sys_io;
+MemoryRegion *mr_sys_mem;
 };
 
 #endif
diff --git a/include/hw/remote/machine.h b/include/hw/remote/machine.h
new file mode 100644
index 00..bdfbca40b9
--- /dev/null
+++ b/include/hw/remote/machine.h
@@ -0,0 +1,27 @@
+/*
+ * Remote machine configuration
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef REMOTE_MACHINE_H
+#define REMOTE_MACHINE_H
+
+#include "qom/object.h"
+#include "hw/boards.h"
+#include "hw/pci-host/remote.h"
+
+struct RemoteMachineState {
+MachineState parent_obj;
+
+RemotePCIHost *host;
+};
+
+#define TYPE_REMOTE_MACHINE "x-remote-machine"
+OBJECT_DECLARE_SIMPLE_TYPE(RemoteMachineState, REMOTE_MACHINE)
+
+#endif
diff --git a/hw/remote/machine.c b/hw/remote/machine.c
new file mode 100644
index 00..9519a6c0a4
--- /dev/null
+++ b/hw/remote/machine.c
@@ -0,0 +1,70 @@
+/*
+ * Machine for remote device
+ *
+ *  This machine type is used by the remote device process in multi-process
+ *  QEMU. QEMU device models depend on parent busses, interrupt controllers,
+ *  memory regions, etc. The remote machine type offers this environment so
+ *  that QEMU device models can be used as remote devices.
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+
+#include "hw/remote/machine.h"
+#include "exec/address-spaces.h"
+#include "exec/memory.h"
+#include "qapi/error.h"
+
+static void remote_machine_init(MachineState *machine)
+{
+MemoryRegion *system_memory, *system_io, *pci_memory;
+RemoteMachineState *s = REMOTE_MACHINE(machine);
+RemotePCIHost *rem_host;
+
+system_memory = get_system_memory();
+system_io = get_system_io();
+
+pci_memory = g_new(MemoryRegion, 1);
+memory_region_init(pci_memory, NULL, "pci", UINT64_MAX);
+
+rem_host = REMOTE_PCIHOST(qdev_new(TYPE_REMOTE_PCIHOST));
+
+rem_host->mr_pci_mem = pci_memory;
+rem_host->mr_sys_mem = system_memory;
+rem_host->mr_sys_io = system_io;
+
+s->host = rem_host;
+
+object_property_add_child(OBJECT(s), "remote-pcihost", OBJECT(rem_host));
+memory_region_add_subregion_overlap(system_memory, 0x0, pci_memory, -1);
+
+qdev_realize(DEVICE(rem_host), sysbus_get_default(), &error_fatal);
+}
+
+static void remote_machine_class_init(ObjectClass *oc, void *data)
+{
+MachineClass *mc = MACHINE_CLASS(oc);
+
+mc->init = remote_machine_init;
+mc->desc = "Experimental remote machine";
+}
+
+static const TypeInfo remote_machine = {
+.name = TYPE_REMOTE_MACHINE,
+.parent = TYPE_MACHINE,
+.instance_size = sizeof(RemoteMachineState),
+.class_init = remote_machine_class_init,
+};
+
+static void remote_machine_register_types(void)
+{
+type_register_static(&remote_machine);
+}
+
+type_init(remote_machine_register_types);
diff --git a/hw/meson.build b/hw/meson.build
index 010de7219c..e615d72d4d 100644
--- a/hw/meson.build
+++ b/hw/meson.build
@@ -56,6 +56,7 @@ subdir('moxie')
 subdir('nios2')
 subdir('openrisc')
 subdir('ppc')
+subdir('remote')
 subdir('riscv')
 subdir('rx')
 subdir('s390x')
diff --git a/hw/remote/meson.build b/hw/remote/meson.build
new file mode 100644
index 00

[PULL 19/27] multi-process: introduce proxy object

2021-02-04 Thread Stefan Hajnoczi
From: Elena Ufimtseva 

Defines a PCI Device proxy object as a child of TYPE_PCI_DEVICE.

Signed-off-by: Elena Ufimtseva 
Signed-off-by: Jagannathan Raman 
Signed-off-by: John G Johnson 
Reviewed-by: Stefan Hajnoczi 
Message-id: 
b5186ebfedf8e557044d09a768846c59230ad3a7.1611938319.git.jag.ra...@oracle.com
Signed-off-by: Stefan Hajnoczi 
---
 MAINTAINERS   |  2 +
 include/hw/remote/proxy.h | 33 +
 hw/remote/proxy.c | 99 +++
 hw/remote/meson.build |  1 +
 4 files changed, 135 insertions(+)
 create mode 100644 include/hw/remote/proxy.h
 create mode 100644 hw/remote/proxy.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 88732e51a2..51a8859357 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3211,6 +3211,8 @@ F: hw/remote/message.c
 F: hw/remote/remote-obj.c
 F: include/hw/remote/memory.h
 F: hw/remote/memory.c
+F: hw/remote/proxy.c
+F: include/hw/remote/proxy.h
 
 Build and test automation
 -
diff --git a/include/hw/remote/proxy.h b/include/hw/remote/proxy.h
new file mode 100644
index 00..faa9c4d580
--- /dev/null
+++ b/include/hw/remote/proxy.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef PROXY_H
+#define PROXY_H
+
+#include "hw/pci/pci.h"
+#include "io/channel.h"
+
+#define TYPE_PCI_PROXY_DEV "x-pci-proxy-dev"
+OBJECT_DECLARE_SIMPLE_TYPE(PCIProxyDev, PCI_PROXY_DEV)
+
+struct PCIProxyDev {
+PCIDevice parent_dev;
+char *fd;
+
+/*
+ * Mutex used to protect the QIOChannel fd from
+ * the concurrent access by the VCPUs since proxy
+ * blocks while awaiting for the replies from the
+ * process remote.
+ */
+QemuMutex io_mutex;
+QIOChannel *ioc;
+Error *migration_blocker;
+};
+
+#endif /* PROXY_H */
diff --git a/hw/remote/proxy.c b/hw/remote/proxy.c
new file mode 100644
index 00..cd5b071ab4
--- /dev/null
+++ b/hw/remote/proxy.c
@@ -0,0 +1,99 @@
+/*
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+
+#include "hw/remote/proxy.h"
+#include "hw/pci/pci.h"
+#include "qapi/error.h"
+#include "io/channel-util.h"
+#include "hw/qdev-properties.h"
+#include "monitor/monitor.h"
+#include "migration/blocker.h"
+#include "qemu/sockets.h"
+
+static void pci_proxy_dev_realize(PCIDevice *device, Error **errp)
+{
+ERRP_GUARD();
+PCIProxyDev *dev = PCI_PROXY_DEV(device);
+int fd;
+
+if (!dev->fd) {
+error_setg(errp, "fd parameter not specified for %s",
+   DEVICE(device)->id);
+return;
+}
+
+fd = monitor_fd_param(monitor_cur(), dev->fd, errp);
+if (fd == -1) {
+error_prepend(errp, "proxy: unable to parse fd %s: ", dev->fd);
+return;
+}
+
+if (!fd_is_socket(fd)) {
+error_setg(errp, "proxy: fd %d is not a socket", fd);
+close(fd);
+return;
+}
+
+dev->ioc = qio_channel_new_fd(fd, errp);
+
+error_setg(&dev->migration_blocker, "%s does not support migration",
+   TYPE_PCI_PROXY_DEV);
+migrate_add_blocker(dev->migration_blocker, errp);
+
+qemu_mutex_init(&dev->io_mutex);
+qio_channel_set_blocking(dev->ioc, true, NULL);
+}
+
+static void pci_proxy_dev_exit(PCIDevice *pdev)
+{
+PCIProxyDev *dev = PCI_PROXY_DEV(pdev);
+
+if (dev->ioc) {
+qio_channel_close(dev->ioc, NULL);
+}
+
+migrate_del_blocker(dev->migration_blocker);
+
+error_free(dev->migration_blocker);
+}
+
+static Property proxy_properties[] = {
+DEFINE_PROP_STRING("fd", PCIProxyDev, fd),
+DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pci_proxy_dev_class_init(ObjectClass *klass, void *data)
+{
+DeviceClass *dc = DEVICE_CLASS(klass);
+PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+
+k->realize = pci_proxy_dev_realize;
+k->exit = pci_proxy_dev_exit;
+device_class_set_props(dc, proxy_properties);
+}
+
+static const TypeInfo pci_proxy_dev_type_info = {
+.name  = TYPE_PCI_PROXY_DEV,
+.parent= TYPE_PCI_DEVICE,
+.instance_size = sizeof(PCIProxyDev),
+.class_init= pci_proxy_dev_class_init,
+.interfaces = (InterfaceInfo[]) {
+{ INTERFACE_CONVENTIONAL_PCI_DEVICE },
+{ },
+},
+};
+
+static void pci_proxy_dev_register_types(void)
+{
+type_register_static(&pci_proxy_dev_type_info);
+}
+
+type_init(pci_proxy_dev_register_types)
diff --git a/hw/remote/meson.build b/hw/remote/meson.build
index 64da16c1de..569cd20edf 100644
--- a/hw/remote/meson.build
+++ b/hw/remote/meson.build
@@ -4,6 +4,7 @@ remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: 
files('machine.c'))
 remote_ss.add(when: 'CONFIG_MULTIPROCES

[PULL 09/27] memory: alloc RAM from file at offset

2021-02-04 Thread Stefan Hajnoczi
From: Jagannathan Raman 

Allow RAM MemoryRegion to be created from an offset in a file, instead
of allocating at offset of 0 by default. This is needed to synchronize
RAM between QEMU & remote process.

Signed-off-by: Jagannathan Raman 
Signed-off-by: John G Johnson 
Signed-off-by: Elena Ufimtseva 
Reviewed-by: Stefan Hajnoczi 
Message-id: 
609996697ad8617e3b01df38accc5c208c24d74e.1611938319.git.jag.ra...@oracle.com
Signed-off-by: Stefan Hajnoczi 
---
 include/exec/memory.h |  2 ++
 include/exec/ram_addr.h   |  2 +-
 include/qemu/mmap-alloc.h |  4 +++-
 backends/hostmem-memfd.c  |  2 +-
 hw/misc/ivshmem.c |  3 ++-
 softmmu/memory.c  |  3 ++-
 softmmu/physmem.c | 11 +++
 util/mmap-alloc.c |  7 ---
 util/oslib-posix.c|  2 +-
 9 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/include/exec/memory.h b/include/exec/memory.h
index 521d9901d7..a9d2b669e8 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -990,6 +990,7 @@ void memory_region_init_ram_from_file(MemoryRegion *mr,
  * @size: size of the region.
  * @share: %true if memory must be mmaped with the MAP_SHARED flag
  * @fd: the fd to mmap.
+ * @offset: offset within the file referenced by fd
  * @errp: pointer to Error*, to store an error if it happens.
  *
  * Note that this function does not do anything to cause the data in the
@@ -1001,6 +1002,7 @@ void memory_region_init_ram_from_fd(MemoryRegion *mr,
 uint64_t size,
 bool share,
 int fd,
+ram_addr_t offset,
 Error **errp);
 #endif
 
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index c6d2ef1d07..d465a483c6 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -121,7 +121,7 @@ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, 
MemoryRegion *mr,
Error **errp);
 RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
  uint32_t ram_flags, int fd,
- Error **errp);
+ off_t offset, Error **errp);
 
 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
   MemoryRegion *mr, Error **errp);
diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h
index e786266b92..b096ffb7e9 100644
--- a/include/qemu/mmap-alloc.h
+++ b/include/qemu/mmap-alloc.h
@@ -16,6 +16,7 @@ size_t qemu_mempath_getpagesize(const char *mem_path);
  *  otherwise, the alignment in use will be determined by QEMU.
  *  @shared: map has RAM_SHARED flag.
  *  @is_pmem: map has RAM_PMEM flag.
+ *  @map_offset: map starts at offset of map_offset from the start of fd
  *
  * Return:
  *  On success, return a pointer to the mapped area.
@@ -25,7 +26,8 @@ void *qemu_ram_mmap(int fd,
 size_t size,
 size_t align,
 bool shared,
-bool is_pmem);
+bool is_pmem,
+off_t map_offset);
 
 void qemu_ram_munmap(int fd, void *ptr, size_t size);
 
diff --git a/backends/hostmem-memfd.c b/backends/hostmem-memfd.c
index e5626d4330..69b0ae30bb 100644
--- a/backends/hostmem-memfd.c
+++ b/backends/hostmem-memfd.c
@@ -55,7 +55,7 @@ memfd_backend_memory_alloc(HostMemoryBackend *backend, Error 
**errp)
 name = host_memory_backend_get_name(backend);
 memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend),
name, backend->size,
-   backend->share, fd, errp);
+   backend->share, fd, 0, errp);
 g_free(name);
 }
 
diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c
index 0505b52c98..603e992a7f 100644
--- a/hw/misc/ivshmem.c
+++ b/hw/misc/ivshmem.c
@@ -495,7 +495,8 @@ static void process_msg_shmem(IVShmemState *s, int fd, 
Error **errp)
 
 /* mmap the region and map into the BAR2 */
 memory_region_init_ram_from_fd(&s->server_bar2, OBJECT(s),
-   "ivshmem.bar2", size, true, fd, &local_err);
+   "ivshmem.bar2", size, true, fd, 0,
+   &local_err);
 if (local_err) {
 error_propagate(errp, local_err);
 return;
diff --git a/softmmu/memory.c b/softmmu/memory.c
index 333e1ed7b0..fa65f45532 100644
--- a/softmmu/memory.c
+++ b/softmmu/memory.c
@@ -1609,6 +1609,7 @@ void memory_region_init_ram_from_fd(MemoryRegion *mr,
 uint64_t size,
 bool share,
 int fd,
+ram_addr_t offset,
 Error **errp)
 {
 Error *err = NULL;
@@ -1618,7 +1619,7 @@ void memory_region_init_ram_f

[PULL 17/27] multi-process: Associate fd of a PCIDevice with its object

2021-02-04 Thread Stefan Hajnoczi
From: Jagannathan Raman 

Associate the file descriptor for a PCIDevice in remote process with
DeviceState object.

Signed-off-by: Elena Ufimtseva 
Signed-off-by: John G Johnson 
Signed-off-by: Jagannathan Raman 
Reviewed-by: Stefan Hajnoczi 
Message-id: 
f405a2ed5d7518b87bea7c59cfdf334d67e5ee51.1611938319.git.jag.ra...@oracle.com
Signed-off-by: Stefan Hajnoczi 
---
 MAINTAINERS|   1 +
 hw/remote/remote-obj.c | 203 +
 hw/remote/meson.build  |   1 +
 3 files changed, 205 insertions(+)
 create mode 100644 hw/remote/remote-obj.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 771513bc34..e37fc4b226 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3208,6 +3208,7 @@ F: include/hw/remote/machine.h
 F: hw/remote/mpqemu-link.c
 F: include/hw/remote/mpqemu-link.h
 F: hw/remote/message.c
+F: hw/remote/remote-obj.c
 
 Build and test automation
 -
diff --git a/hw/remote/remote-obj.c b/hw/remote/remote-obj.c
new file mode 100644
index 00..4f21254219
--- /dev/null
+++ b/hw/remote/remote-obj.c
@@ -0,0 +1,203 @@
+/*
+ * Copyright © 2020, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL-v2, version 2 or later.
+ *
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+
+#include "qemu/error-report.h"
+#include "qemu/notify.h"
+#include "qom/object_interfaces.h"
+#include "hw/qdev-core.h"
+#include "io/channel.h"
+#include "hw/qdev-core.h"
+#include "hw/remote/machine.h"
+#include "io/channel-util.h"
+#include "qapi/error.h"
+#include "sysemu/sysemu.h"
+#include "hw/pci/pci.h"
+#include "qemu/sockets.h"
+#include "monitor/monitor.h"
+
+#define TYPE_REMOTE_OBJECT "x-remote-object"
+OBJECT_DECLARE_TYPE(RemoteObject, RemoteObjectClass, REMOTE_OBJECT)
+
+struct RemoteObjectClass {
+ObjectClass parent_class;
+
+unsigned int nr_devs;
+unsigned int max_devs;
+};
+
+struct RemoteObject {
+/* private */
+Object parent;
+
+Notifier machine_done;
+
+int32_t fd;
+char *devid;
+
+QIOChannel *ioc;
+
+DeviceState *dev;
+DeviceListener listener;
+};
+
+static void remote_object_set_fd(Object *obj, const char *str, Error **errp)
+{
+RemoteObject *o = REMOTE_OBJECT(obj);
+int fd = -1;
+
+fd = monitor_fd_param(monitor_cur(), str, errp);
+if (fd == -1) {
+error_prepend(errp, "Could not parse remote object fd %s:", str);
+return;
+}
+
+if (!fd_is_socket(fd)) {
+error_setg(errp, "File descriptor '%s' is not a socket", str);
+close(fd);
+return;
+}
+
+o->fd = fd;
+}
+
+static void remote_object_set_devid(Object *obj, const char *str, Error **errp)
+{
+RemoteObject *o = REMOTE_OBJECT(obj);
+
+g_free(o->devid);
+
+o->devid = g_strdup(str);
+}
+
+static void remote_object_unrealize_listener(DeviceListener *listener,
+ DeviceState *dev)
+{
+RemoteObject *o = container_of(listener, RemoteObject, listener);
+
+if (o->dev == dev) {
+object_unref(OBJECT(o));
+}
+}
+
+static void remote_object_machine_done(Notifier *notifier, void *data)
+{
+RemoteObject *o = container_of(notifier, RemoteObject, machine_done);
+DeviceState *dev = NULL;
+QIOChannel *ioc = NULL;
+Coroutine *co = NULL;
+RemoteCommDev *comdev = NULL;
+Error *err = NULL;
+
+dev = qdev_find_recursive(sysbus_get_default(), o->devid);
+if (!dev || !object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
+error_report("%s is not a PCI device", o->devid);
+return;
+}
+
+ioc = qio_channel_new_fd(o->fd, &err);
+if (!ioc) {
+error_report_err(err);
+return;
+}
+qio_channel_set_blocking(ioc, false, NULL);
+
+o->dev = dev;
+
+o->listener.unrealize = remote_object_unrealize_listener;
+device_listener_register(&o->listener);
+
+/* co-routine should free this. */
+comdev = g_new0(RemoteCommDev, 1);
+*comdev = (RemoteCommDev) {
+.ioc = ioc,
+.dev = PCI_DEVICE(dev),
+};
+
+co = qemu_coroutine_create(mpqemu_remote_msg_loop_co, comdev);
+qemu_coroutine_enter(co);
+}
+
+static void remote_object_init(Object *obj)
+{
+RemoteObjectClass *k = REMOTE_OBJECT_GET_CLASS(obj);
+RemoteObject *o = REMOTE_OBJECT(obj);
+
+if (k->nr_devs >= k->max_devs) {
+error_report("Reached maximum number of devices: %u", k->max_devs);
+return;
+}
+
+o->ioc = NULL;
+o->fd = -1;
+o->devid = NULL;
+
+k->nr_devs++;
+
+o->machine_done.notify = remote_object_machine_done;
+qemu_add_machine_init_done_notifier(&o->machine_done);
+}
+
+static void remote_object_finalize(Object *obj)
+{
+RemoteObjectClass *k = REMOTE_OBJECT_GET_CLASS(obj);
+RemoteObject *o = REMOTE_OBJECT(obj);
+
+device_listener_unregister(&o->listener);
+
+if (o->ioc) {
+qio_channel_shutdown(o->i

[PULL 21/27] multi-process: Forward PCI config space acceses to the remote process

2021-02-04 Thread Stefan Hajnoczi
From: Elena Ufimtseva 

The Proxy Object sends the PCI config space accesses as messages
to the remote process over the communication channel

Signed-off-by: Elena Ufimtseva 
Signed-off-by: Jagannathan Raman 
Signed-off-by: John G Johnson 
Reviewed-by: Stefan Hajnoczi 
Message-id: 
d3c94f4618813234655356c60e6f0d0362ff42d6.1611938319.git.jag.ra...@oracle.com
Signed-off-by: Stefan Hajnoczi 
---
 include/hw/remote/mpqemu-link.h | 10 ++
 hw/remote/message.c | 60 +
 hw/remote/mpqemu-link.c |  8 -
 hw/remote/proxy.c   | 55 ++
 4 files changed, 132 insertions(+), 1 deletion(-)

diff --git a/include/hw/remote/mpqemu-link.h b/include/hw/remote/mpqemu-link.h
index 1b35d408f8..7bc0bddb5a 100644
--- a/include/hw/remote/mpqemu-link.h
+++ b/include/hw/remote/mpqemu-link.h
@@ -34,6 +34,9 @@
  */
 typedef enum {
 MPQEMU_CMD_SYNC_SYSMEM,
+MPQEMU_CMD_RET,
+MPQEMU_CMD_PCI_CFGWRITE,
+MPQEMU_CMD_PCI_CFGREAD,
 MPQEMU_CMD_MAX,
 } MPQemuCmd;
 
@@ -43,6 +46,12 @@ typedef struct {
 off_t offsets[REMOTE_MAX_FDS];
 } SyncSysmemMsg;
 
+typedef struct {
+uint32_t addr;
+uint32_t val;
+int len;
+} PciConfDataMsg;
+
 /**
  * MPQemuMsg:
  * @cmd: The remote command
@@ -60,6 +69,7 @@ typedef struct {
 
 union {
 uint64_t u64;
+PciConfDataMsg pci_conf_data;
 SyncSysmemMsg sync_sysmem;
 } data;
 
diff --git a/hw/remote/message.c b/hw/remote/message.c
index 36e2d4fb0c..636bd161bd 100644
--- a/hw/remote/message.c
+++ b/hw/remote/message.c
@@ -15,6 +15,12 @@
 #include "hw/remote/mpqemu-link.h"
 #include "qapi/error.h"
 #include "sysemu/runstate.h"
+#include "hw/pci/pci.h"
+
+static void process_config_write(QIOChannel *ioc, PCIDevice *dev,
+ MPQemuMsg *msg, Error **errp);
+static void process_config_read(QIOChannel *ioc, PCIDevice *dev,
+MPQemuMsg *msg, Error **errp);
 
 void coroutine_fn mpqemu_remote_msg_loop_co(void *data)
 {
@@ -40,6 +46,12 @@ void coroutine_fn mpqemu_remote_msg_loop_co(void *data)
 }
 
 switch (msg.cmd) {
+case MPQEMU_CMD_PCI_CFGWRITE:
+process_config_write(com->ioc, pci_dev, &msg, &local_err);
+break;
+case MPQEMU_CMD_PCI_CFGREAD:
+process_config_read(com->ioc, pci_dev, &msg, &local_err);
+break;
 default:
 error_setg(&local_err,
"Unknown command (%d) received for device %s"
@@ -55,3 +67,51 @@ void coroutine_fn mpqemu_remote_msg_loop_co(void *data)
 qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
 }
 }
+
+static void process_config_write(QIOChannel *ioc, PCIDevice *dev,
+ MPQemuMsg *msg, Error **errp)
+{
+ERRP_GUARD();
+PciConfDataMsg *conf = (PciConfDataMsg *)&msg->data.pci_conf_data;
+MPQemuMsg ret = { 0 };
+
+if ((conf->addr + sizeof(conf->val)) > pci_config_size(dev)) {
+error_setg(errp, "Bad address for PCI config write, pid "FMT_pid".",
+   getpid());
+ret.data.u64 = UINT64_MAX;
+} else {
+pci_default_write_config(dev, conf->addr, conf->val, conf->len);
+}
+
+ret.cmd = MPQEMU_CMD_RET;
+ret.size = sizeof(ret.data.u64);
+
+if (!mpqemu_msg_send(&ret, ioc, NULL)) {
+error_prepend(errp, "Error returning code to proxy, pid "FMT_pid": ",
+  getpid());
+}
+}
+
+static void process_config_read(QIOChannel *ioc, PCIDevice *dev,
+MPQemuMsg *msg, Error **errp)
+{
+ERRP_GUARD();
+PciConfDataMsg *conf = (PciConfDataMsg *)&msg->data.pci_conf_data;
+MPQemuMsg ret = { 0 };
+
+if ((conf->addr + sizeof(conf->val)) > pci_config_size(dev)) {
+error_setg(errp, "Bad address for PCI config read, pid "FMT_pid".",
+   getpid());
+ret.data.u64 = UINT64_MAX;
+} else {
+ret.data.u64 = pci_default_read_config(dev, conf->addr, conf->len);
+}
+
+ret.cmd = MPQEMU_CMD_RET;
+ret.size = sizeof(ret.data.u64);
+
+if (!mpqemu_msg_send(&ret, ioc, NULL)) {
+error_prepend(errp, "Error returning code to proxy, pid "FMT_pid": ",
+  getpid());
+}
+}
diff --git a/hw/remote/mpqemu-link.c b/hw/remote/mpqemu-link.c
index 88d1f9bd5c..5bd6a9dcbf 100644
--- a/hw/remote/mpqemu-link.c
+++ b/hw/remote/mpqemu-link.c
@@ -207,7 +207,7 @@ uint64_t mpqemu_msg_send_and_await_reply(MPQemuMsg *msg, 
PCIProxyDev *pdev,
 return ret;
 }
 
-if (!mpqemu_msg_valid(&msg_reply)) {
+if (!mpqemu_msg_valid(&msg_reply) || msg_reply.cmd != MPQEMU_CMD_RET) {
 error_setg(errp, "ERROR: Invalid reply received for command %d",
  msg->cmd);
 return ret;
@@ -242,6 +242,12 @@ bool mpqemu_msg_valid(MPQemuMsg *msg)
 return false;
 }
 break;
+case MPQEMU_CMD_PCI_C

Re: [PATCH v4 17/18] migration/rdma: send data for both rdma-pin-all and NOT rdma-pin-all mode

2021-02-04 Thread Dr. David Alan Gilbert
* Chuan Zheng (zhengch...@huawei.com) wrote:
> Signed-off-by: Zhimin Feng 
> Signed-off-by: Chuan Zheng 
> ---
>  migration/rdma.c | 65 
> 
>  1 file changed, 61 insertions(+), 4 deletions(-)
> 
> diff --git a/migration/rdma.c b/migration/rdma.c
> index 2097839..c19a91f 100644
> --- a/migration/rdma.c
> +++ b/migration/rdma.c
> @@ -2002,6 +2002,20 @@ static int qemu_rdma_write_one(QEMUFile *f, 
> RDMAContext *rdma,
> .repeat = 1,
>   };
>  
> +/* use multifd to send data */
> +if (migrate_use_multifd()) {
> +int channel = get_multifd_RDMA_channel();
> +int ret = 0;
> +MultiFDSendParams *multifd_send_param = NULL;
> +ret = get_multifd_send_param(channel, &multifd_send_param);
> +if (ret) {
> +error_report("rdma: error getting multifd_send_param(%d)", 
> channel);
> +return -EINVAL;
> +}
> +rdma = (RDMAContext *)multifd_send_param->rdma;
> +block = &(rdma->local_ram_blocks.block[current_index]);
> +}
> +
>  retry:
>  sge.addr = (uintptr_t)(block->local_host_addr +
>  (current_addr - block->offset));
> @@ -2197,6 +2211,27 @@ retry:
>  return 0;
>  }
>  
> +static int multifd_rdma_write_flush(void)
> +{
> +/* The multifd RDMA threads send data */
> +MultiFDSendParams *multifd_send_param = NULL;
> +RDMAContext *rdma = NULL;
> +MigrationState *s = migrate_get_current();
> +int ret = 0;
> +
> +ret = get_multifd_send_param(s->rdma_channel,
> + &multifd_send_param);
> +if (ret) {
> +error_report("rdma: error getting multifd_send_param(%d)",
> + s->rdma_channel);

Do we need these error_report's for get_multifd_send_param calls - how
can they fail in practice?

> +return ret;
> +}
> +rdma = (RDMAContext *)(multifd_send_param->rdma);
> +rdma->nb_sent++;
> +
> +return ret;

But this doesn't actually 'flush' anything?

> +}
> +
>  /*
>   * Push out any unwritten RDMA operations.
>   *
> @@ -2219,8 +2254,15 @@ static int qemu_rdma_write_flush(QEMUFile *f, 
> RDMAContext *rdma)
>  }
>  
>  if (ret == 0) {
> -rdma->nb_sent++;
> -trace_qemu_rdma_write_flush(rdma->nb_sent);
> +if (migrate_use_multifd()) {
> +ret = multifd_rdma_write_flush();
> +if (ret) {
> +return ret;
> +}
> +} else {
> +rdma->nb_sent++;
> +trace_qemu_rdma_write_flush(rdma->nb_sent);
> +}
>  }
>  
>  rdma->current_length = 0;
> @@ -4062,6 +4104,7 @@ wait_reg_complete:
>  }
>  
>  qemu_sem_post(&multifd_send_param->sem_sync);
> +qemu_sem_wait(&multifd_send_param->sem);

why?

>  }
>  }
>  
> @@ -4443,6 +4486,7 @@ static void *multifd_rdma_send_thread(void *opaque)
>  Error *local_err = NULL;
>  int ret = 0;
>  RDMAControlHeader head = { .len = 0, .repeat = 1 };
> +RDMAContext *rdma = p->rdma;
>  
>  trace_multifd_send_thread_start(p->id);
>  if (multifd_send_initial_packet(p, &local_err) < 0) {
> @@ -4451,7 +4495,7 @@ static void *multifd_rdma_send_thread(void *opaque)
>  
>  /* wait for semaphore notification to register memory */
>  qemu_sem_wait(&p->sem_sync);
> -if (qemu_rdma_registration(p->rdma) < 0) {
> +if (qemu_rdma_registration(rdma) < 0) {
>  goto out;
>  }
>  /*
> @@ -4466,12 +4510,25 @@ static void *multifd_rdma_send_thread(void *opaque)
>  break;
>  }
>  }
> +/* To complete polling(CQE) */
> +while (rdma->nb_sent) {

Where is nb_sent decremented?

> +ret = qemu_rdma_block_for_wrid(rdma, RDMA_WRID_RDMA_WRITE, NULL);
> +if (ret < 0) {
> +error_report("multifd RDMA migration: "
> + "complete polling error!");
> +return NULL;
> +}
> +}
>  /* Send FINISHED to the destination */
>  head.type = RDMA_CONTROL_REGISTER_FINISHED;
> -ret = qemu_rdma_exchange_send(p->rdma, &head, NULL, NULL, NULL, 
> NULL);
> +ret = qemu_rdma_exchange_send(rdma, &head, NULL, NULL, NULL, NULL);
>  if (ret < 0) {
> +error_report("multifd RDMA migration: "
> + "sending remote error!");
>  return NULL;
>  }
> +/* sync main thread */
> +qemu_sem_post(&p->sem);
>  }
>  
>  out:
> -- 
> 1.8.3.1
> 
-- 
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK




Re: [PULL 00/24] tcg patch queue

2021-02-04 Thread Peter Maydell
On Wed, 3 Feb 2021 at 02:15, Richard Henderson
 wrote:
>
> The following changes since commit 77f3804ab7ed94b471a14acb260e5aeacf26193f:
>
>   Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging 
> (2021-02-02 16:47:51 +)
>
> are available in the Git repository at:
>
>   https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20210202
>
> for you to fetch changes up to 0c823e596877a30fd6c17a1ae9f98218a53055ea:
>
>   tcg: Remove TCG_TARGET_CON_SET_H (2021-02-02 12:12:43 -1000)
>
> 
> TCG backend constraints cleanup
>
> 


Applied, thanks.

Please update the changelog at https://wiki.qemu.org/ChangeLog/6.0
for any user-visible changes.

-- PMM



[PULL 20/27] multi-process: add proxy communication functions

2021-02-04 Thread Stefan Hajnoczi
From: Elena Ufimtseva 

Signed-off-by: Elena Ufimtseva 
Signed-off-by: Jagannathan Raman 
Signed-off-by: John G Johnson 
Reviewed-by: Stefan Hajnoczi 
Message-id: 
d54edb4176361eed86b903e8f27058363b6c83b3.1611938319.git.jag.ra...@oracle.com
Signed-off-by: Stefan Hajnoczi 
---
 include/hw/remote/mpqemu-link.h |  4 
 hw/remote/mpqemu-link.c | 34 +
 2 files changed, 38 insertions(+)

diff --git a/include/hw/remote/mpqemu-link.h b/include/hw/remote/mpqemu-link.h
index 6ee5bc5751..1b35d408f8 100644
--- a/include/hw/remote/mpqemu-link.h
+++ b/include/hw/remote/mpqemu-link.h
@@ -15,6 +15,8 @@
 #include "qemu/thread.h"
 #include "io/channel.h"
 #include "exec/hwaddr.h"
+#include "io/channel-socket.h"
+#include "hw/remote/proxy.h"
 
 #define REMOTE_MAX_FDS 8
 
@@ -68,6 +70,8 @@ typedef struct {
 bool mpqemu_msg_send(MPQemuMsg *msg, QIOChannel *ioc, Error **errp);
 bool mpqemu_msg_recv(MPQemuMsg *msg, QIOChannel *ioc, Error **errp);
 
+uint64_t mpqemu_msg_send_and_await_reply(MPQemuMsg *msg, PCIProxyDev *pdev,
+ Error **errp);
 bool mpqemu_msg_valid(MPQemuMsg *msg);
 
 #endif
diff --git a/hw/remote/mpqemu-link.c b/hw/remote/mpqemu-link.c
index 4b25649b39..88d1f9bd5c 100644
--- a/hw/remote/mpqemu-link.c
+++ b/hw/remote/mpqemu-link.c
@@ -182,6 +182,40 @@ fail:
 return ret;
 }
 
+/*
+ * Send msg and wait for a reply with command code RET_MSG.
+ * Returns the message received of size u64 or UINT64_MAX
+ * on error.
+ * Called from VCPU thread in non-coroutine context.
+ * Used by the Proxy object to communicate to remote processes.
+ */
+uint64_t mpqemu_msg_send_and_await_reply(MPQemuMsg *msg, PCIProxyDev *pdev,
+ Error **errp)
+{
+ERRP_GUARD();
+MPQemuMsg msg_reply = {0};
+uint64_t ret = UINT64_MAX;
+
+assert(!qemu_in_coroutine());
+
+QEMU_LOCK_GUARD(&pdev->io_mutex);
+if (!mpqemu_msg_send(msg, pdev->ioc, errp)) {
+return ret;
+}
+
+if (!mpqemu_msg_recv(&msg_reply, pdev->ioc, errp)) {
+return ret;
+}
+
+if (!mpqemu_msg_valid(&msg_reply)) {
+error_setg(errp, "ERROR: Invalid reply received for command %d",
+ msg->cmd);
+return ret;
+}
+
+return msg_reply.data.u64;
+}
+
 bool mpqemu_msg_valid(MPQemuMsg *msg)
 {
 if (msg->cmd >= MPQEMU_CMD_MAX && msg->cmd < 0) {
-- 
2.29.2



[PULL 23/27] multi-process: Synchronize remote memory

2021-02-04 Thread Stefan Hajnoczi
From: Jagannathan Raman 

Add ProxyMemoryListener object which is used to keep the view of the RAM
in sync between QEMU and remote process.
A MemoryListener is registered for system-memory AddressSpace. The
listener sends SYNC_SYSMEM message to the remote process when memory
listener commits the changes to memory, the remote process receives
the message and processes it in the handler for SYNC_SYSMEM message.

Signed-off-by: Jagannathan Raman 
Signed-off-by: John G Johnson 
Signed-off-by: Elena Ufimtseva 
Reviewed-by: Stefan Hajnoczi 
Message-id: 
04fe4e6a9ca90d4f11ab6f59be7652f5b086a071.1611938319.git.jag.ra...@oracle.com
Signed-off-by: Stefan Hajnoczi 
---
 MAINTAINERS   |   2 +
 include/hw/remote/proxy-memory-listener.h |  28 +++
 include/hw/remote/proxy.h |   2 +
 hw/remote/message.c   |   4 +
 hw/remote/proxy-memory-listener.c | 227 ++
 hw/remote/proxy.c |   6 +
 hw/remote/meson.build |   1 +
 7 files changed, 270 insertions(+)
 create mode 100644 include/hw/remote/proxy-memory-listener.h
 create mode 100644 hw/remote/proxy-memory-listener.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 51a8859357..3b0ea950fc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3213,6 +3213,8 @@ F: include/hw/remote/memory.h
 F: hw/remote/memory.c
 F: hw/remote/proxy.c
 F: include/hw/remote/proxy.h
+F: hw/remote/proxy-memory-listener.c
+F: include/hw/remote/proxy-memory-listener.h
 
 Build and test automation
 -
diff --git a/include/hw/remote/proxy-memory-listener.h 
b/include/hw/remote/proxy-memory-listener.h
new file mode 100644
index 00..c4f3efb928
--- /dev/null
+++ b/include/hw/remote/proxy-memory-listener.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef PROXY_MEMORY_LISTENER_H
+#define PROXY_MEMORY_LISTENER_H
+
+#include "exec/memory.h"
+#include "io/channel.h"
+
+typedef struct ProxyMemoryListener {
+MemoryListener listener;
+
+int n_mr_sections;
+MemoryRegionSection *mr_sections;
+
+QIOChannel *ioc;
+} ProxyMemoryListener;
+
+void proxy_memory_listener_configure(ProxyMemoryListener *proxy_listener,
+ QIOChannel *ioc);
+void proxy_memory_listener_deconfigure(ProxyMemoryListener *proxy_listener);
+
+#endif
diff --git a/include/hw/remote/proxy.h b/include/hw/remote/proxy.h
index ea7fa4fb3c..12888b4f90 100644
--- a/include/hw/remote/proxy.h
+++ b/include/hw/remote/proxy.h
@@ -11,6 +11,7 @@
 
 #include "hw/pci/pci.h"
 #include "io/channel.h"
+#include "hw/remote/proxy-memory-listener.h"
 
 #define TYPE_PCI_PROXY_DEV "x-pci-proxy-dev"
 OBJECT_DECLARE_SIMPLE_TYPE(PCIProxyDev, PCI_PROXY_DEV)
@@ -36,6 +37,7 @@ struct PCIProxyDev {
 QemuMutex io_mutex;
 QIOChannel *ioc;
 Error *migration_blocker;
+ProxyMemoryListener proxy_listener;
 ProxyMemoryRegion region[PCI_NUM_REGIONS];
 };
 
diff --git a/hw/remote/message.c b/hw/remote/message.c
index f2e84457e0..25341d8ad2 100644
--- a/hw/remote/message.c
+++ b/hw/remote/message.c
@@ -17,6 +17,7 @@
 #include "sysemu/runstate.h"
 #include "hw/pci/pci.h"
 #include "exec/memattrs.h"
+#include "hw/remote/memory.h"
 
 static void process_config_write(QIOChannel *ioc, PCIDevice *dev,
  MPQemuMsg *msg, Error **errp);
@@ -61,6 +62,9 @@ void coroutine_fn mpqemu_remote_msg_loop_co(void *data)
 case MPQEMU_CMD_BAR_READ:
 process_bar_read(com->ioc, &msg, &local_err);
 break;
+case MPQEMU_CMD_SYNC_SYSMEM:
+remote_sysmem_reconfig(&msg, &local_err);
+break;
 default:
 error_setg(&local_err,
"Unknown command (%d) received for device %s"
diff --git a/hw/remote/proxy-memory-listener.c 
b/hw/remote/proxy-memory-listener.c
new file mode 100644
index 00..af1fa6f5aa
--- /dev/null
+++ b/hw/remote/proxy-memory-listener.c
@@ -0,0 +1,227 @@
+/*
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+
+#include "qemu/compiler.h"
+#include "qemu/int128.h"
+#include "qemu/range.h"
+#include "exec/memory.h"
+#include "exec/cpu-common.h"
+#include "cpu.h"
+#include "exec/ram_addr.h"
+#include "exec/address-spaces.h"
+#include "qapi/error.h"
+#include "hw/remote/mpqemu-link.h"
+#include "hw/remote/proxy-memory-listener.h"
+
+/*
+ * TODO: get_fd_from_hostaddr(), proxy_mrs_can_merge() and
+ * proxy_memory_listener_commit() defined below perform tasks similar to the
+ * functions defined in vhost-user.c. These functions are good candidates
+ * for refactoring.
+

[PULL 13/27] io: add qio_channel_writev_full_all helper

2021-02-04 Thread Stefan Hajnoczi
From: Elena Ufimtseva 

Adds qio_channel_writev_full_all() to transmit both data and FDs.
Refactors existing code to use this helper.

Signed-off-by: Elena Ufimtseva 
Signed-off-by: John G Johnson 
Signed-off-by: Jagannathan Raman 
Reviewed-by: Stefan Hajnoczi 
Acked-by: Daniel P. Berrangé 
Message-id: 
480fbf1fe4152495d60596c9b665124549b426a5.1611938319.git.jag.ra...@oracle.com
Signed-off-by: Stefan Hajnoczi 
---
 include/io/channel.h | 25 +
 io/channel.c | 15 ++-
 2 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/include/io/channel.h b/include/io/channel.h
index ab9ea77959..19e76fc32f 100644
--- a/include/io/channel.h
+++ b/include/io/channel.h
@@ -777,4 +777,29 @@ void qio_channel_set_aio_fd_handler(QIOChannel *ioc,
 IOHandler *io_write,
 void *opaque);
 
+/**
+ * qio_channel_writev_full_all:
+ * @ioc: the channel object
+ * @iov: the array of memory regions to write data from
+ * @niov: the length of the @iov array
+ * @fds: an array of file handles to send
+ * @nfds: number of file handles in @fds
+ * @errp: pointer to a NULL-initialized error object
+ *
+ *
+ * Behaves like qio_channel_writev_full but will attempt
+ * to send all data passed (file handles and memory regions).
+ * The function will wait for all requested data
+ * to be written, yielding from the current coroutine
+ * if required.
+ *
+ * Returns: 0 if all bytes were written, or -1 on error
+ */
+
+int qio_channel_writev_full_all(QIOChannel *ioc,
+const struct iovec *iov,
+size_t niov,
+int *fds, size_t nfds,
+Error **errp);
+
 #endif /* QIO_CHANNEL_H */
diff --git a/io/channel.c b/io/channel.c
index 93d449dee2..0d4b8b5160 100644
--- a/io/channel.c
+++ b/io/channel.c
@@ -156,6 +156,15 @@ int qio_channel_writev_all(QIOChannel *ioc,
const struct iovec *iov,
size_t niov,
Error **errp)
+{
+return qio_channel_writev_full_all(ioc, iov, niov, NULL, 0, errp);
+}
+
+int qio_channel_writev_full_all(QIOChannel *ioc,
+const struct iovec *iov,
+size_t niov,
+int *fds, size_t nfds,
+Error **errp)
 {
 int ret = -1;
 struct iovec *local_iov = g_new(struct iovec, niov);
@@ -168,7 +177,8 @@ int qio_channel_writev_all(QIOChannel *ioc,
 
 while (nlocal_iov > 0) {
 ssize_t len;
-len = qio_channel_writev(ioc, local_iov, nlocal_iov, errp);
+len = qio_channel_writev_full(ioc, local_iov, nlocal_iov, fds, nfds,
+  errp);
 if (len == QIO_CHANNEL_ERR_BLOCK) {
 if (qemu_in_coroutine()) {
 qio_channel_yield(ioc, G_IO_OUT);
@@ -182,6 +192,9 @@ int qio_channel_writev_all(QIOChannel *ioc,
 }
 
 iov_discard_front(&local_iov, &nlocal_iov, len);
+
+fds = NULL;
+nfds = 0;
 }
 
 ret = 0;
-- 
2.29.2



[PULL 22/27] multi-process: PCI BAR read/write handling for proxy & remote endpoints

2021-02-04 Thread Stefan Hajnoczi
From: Jagannathan Raman 

Proxy device object implements handler for PCI BAR writes and reads.
The handler uses BAR_WRITE/BAR_READ message to communicate to the
remote process with the BAR address and value to be written/read.
The remote process implements handler for BAR_WRITE/BAR_READ
message.

Signed-off-by: Jagannathan Raman 
Signed-off-by: Elena Ufimtseva 
Signed-off-by: John G Johnson 
Reviewed-by: Stefan Hajnoczi 
Message-id: 
a8b76714a9688be5552c4c92d089bc9e8a4707ff.1611938319.git.jag.ra...@oracle.com
Signed-off-by: Stefan Hajnoczi 
---
 include/hw/remote/mpqemu-link.h | 10 
 include/hw/remote/proxy.h   |  9 
 hw/remote/message.c | 83 +
 hw/remote/mpqemu-link.c |  6 +++
 hw/remote/proxy.c   | 60 
 5 files changed, 168 insertions(+)

diff --git a/include/hw/remote/mpqemu-link.h b/include/hw/remote/mpqemu-link.h
index 7bc0bddb5a..6303e62b17 100644
--- a/include/hw/remote/mpqemu-link.h
+++ b/include/hw/remote/mpqemu-link.h
@@ -37,6 +37,8 @@ typedef enum {
 MPQEMU_CMD_RET,
 MPQEMU_CMD_PCI_CFGWRITE,
 MPQEMU_CMD_PCI_CFGREAD,
+MPQEMU_CMD_BAR_WRITE,
+MPQEMU_CMD_BAR_READ,
 MPQEMU_CMD_MAX,
 } MPQemuCmd;
 
@@ -52,6 +54,13 @@ typedef struct {
 int len;
 } PciConfDataMsg;
 
+typedef struct {
+hwaddr addr;
+uint64_t val;
+unsigned size;
+bool memory;
+} BarAccessMsg;
+
 /**
  * MPQemuMsg:
  * @cmd: The remote command
@@ -71,6 +80,7 @@ typedef struct {
 uint64_t u64;
 PciConfDataMsg pci_conf_data;
 SyncSysmemMsg sync_sysmem;
+BarAccessMsg bar_access;
 } data;
 
 int fds[REMOTE_MAX_FDS];
diff --git a/include/hw/remote/proxy.h b/include/hw/remote/proxy.h
index faa9c4d580..ea7fa4fb3c 100644
--- a/include/hw/remote/proxy.h
+++ b/include/hw/remote/proxy.h
@@ -15,6 +15,14 @@
 #define TYPE_PCI_PROXY_DEV "x-pci-proxy-dev"
 OBJECT_DECLARE_SIMPLE_TYPE(PCIProxyDev, PCI_PROXY_DEV)
 
+typedef struct ProxyMemoryRegion {
+PCIProxyDev *dev;
+MemoryRegion mr;
+bool memory;
+bool present;
+uint8_t type;
+} ProxyMemoryRegion;
+
 struct PCIProxyDev {
 PCIDevice parent_dev;
 char *fd;
@@ -28,6 +36,7 @@ struct PCIProxyDev {
 QemuMutex io_mutex;
 QIOChannel *ioc;
 Error *migration_blocker;
+ProxyMemoryRegion region[PCI_NUM_REGIONS];
 };
 
 #endif /* PROXY_H */
diff --git a/hw/remote/message.c b/hw/remote/message.c
index 636bd161bd..f2e84457e0 100644
--- a/hw/remote/message.c
+++ b/hw/remote/message.c
@@ -16,11 +16,14 @@
 #include "qapi/error.h"
 #include "sysemu/runstate.h"
 #include "hw/pci/pci.h"
+#include "exec/memattrs.h"
 
 static void process_config_write(QIOChannel *ioc, PCIDevice *dev,
  MPQemuMsg *msg, Error **errp);
 static void process_config_read(QIOChannel *ioc, PCIDevice *dev,
 MPQemuMsg *msg, Error **errp);
+static void process_bar_write(QIOChannel *ioc, MPQemuMsg *msg, Error **errp);
+static void process_bar_read(QIOChannel *ioc, MPQemuMsg *msg, Error **errp);
 
 void coroutine_fn mpqemu_remote_msg_loop_co(void *data)
 {
@@ -52,6 +55,12 @@ void coroutine_fn mpqemu_remote_msg_loop_co(void *data)
 case MPQEMU_CMD_PCI_CFGREAD:
 process_config_read(com->ioc, pci_dev, &msg, &local_err);
 break;
+case MPQEMU_CMD_BAR_WRITE:
+process_bar_write(com->ioc, &msg, &local_err);
+break;
+case MPQEMU_CMD_BAR_READ:
+process_bar_read(com->ioc, &msg, &local_err);
+break;
 default:
 error_setg(&local_err,
"Unknown command (%d) received for device %s"
@@ -115,3 +124,77 @@ static void process_config_read(QIOChannel *ioc, PCIDevice 
*dev,
   getpid());
 }
 }
+
+static void process_bar_write(QIOChannel *ioc, MPQemuMsg *msg, Error **errp)
+{
+ERRP_GUARD();
+BarAccessMsg *bar_access = &msg->data.bar_access;
+AddressSpace *as =
+bar_access->memory ? &address_space_memory : &address_space_io;
+MPQemuMsg ret = { 0 };
+MemTxResult res;
+uint64_t val;
+
+if (!is_power_of_2(bar_access->size) ||
+   (bar_access->size > sizeof(uint64_t))) {
+ret.data.u64 = UINT64_MAX;
+goto fail;
+}
+
+val = cpu_to_le64(bar_access->val);
+
+res = address_space_rw(as, bar_access->addr, MEMTXATTRS_UNSPECIFIED,
+   (void *)&val, bar_access->size, true);
+
+if (res != MEMTX_OK) {
+error_setg(errp, "Bad address %"PRIx64" for mem write, pid "FMT_pid".",
+   bar_access->addr, getpid());
+ret.data.u64 = -1;
+}
+
+fail:
+ret.cmd = MPQEMU_CMD_RET;
+ret.size = sizeof(ret.data.u64);
+
+if (!mpqemu_msg_send(&ret, ioc, NULL)) {
+error_prepend(errp, "Error returning code to proxy, pid "FMT_pid": ",
+  getpid());
+}
+}
+
+static void process_bar_read(QIOChannel *ioc

Re: [PATCH] qemu-storage-daemon: Enable object-add

2021-02-04 Thread Daniel P . Berrangé
On Thu, Feb 04, 2021 at 08:21:37AM +0100, Kevin Wolf wrote:
> As we don't have a fully QAPIfied version of object-add yet and it still
> has 'gen': false in the schema, it needs to be registered explicitly in
> init_qmp_commands() to be available for users.
> 
> Fixes: 2af282ec51a27116d0402cab237b8970800f870c
> Signed-off-by: Kevin Wolf 
> ---
>  storage-daemon/qemu-storage-daemon.c | 2 ++
>  1 file changed, 2 insertions(+)

Reviewed-by: Daniel P. Berrangé 


Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




[PULL 14/27] io: add qio_channel_readv_full_all_eof & qio_channel_readv_full_all helpers

2021-02-04 Thread Stefan Hajnoczi
From: Elena Ufimtseva 

Adds qio_channel_readv_full_all_eof() and qio_channel_readv_full_all()
to read both data and FDs. Refactors existing code to use these helpers.

Signed-off-by: Elena Ufimtseva 
Signed-off-by: John G Johnson 
Signed-off-by: Jagannathan Raman 
Acked-by: Daniel P. Berrangé 
Message-id: 
b059c4cc0fb741e794d644c144cc21372cad877d.1611938319.git.jag.ra...@oracle.com
Signed-off-by: Stefan Hajnoczi 
---
 include/io/channel.h |  53 +++
 io/channel.c | 101 ++-
 2 files changed, 134 insertions(+), 20 deletions(-)

diff --git a/include/io/channel.h b/include/io/channel.h
index 19e76fc32f..88988979f8 100644
--- a/include/io/channel.h
+++ b/include/io/channel.h
@@ -777,6 +777,59 @@ void qio_channel_set_aio_fd_handler(QIOChannel *ioc,
 IOHandler *io_write,
 void *opaque);
 
+/**
+ * qio_channel_readv_full_all_eof:
+ * @ioc: the channel object
+ * @iov: the array of memory regions to read data to
+ * @niov: the length of the @iov array
+ * @fds: an array of file handles to read
+ * @nfds: number of file handles in @fds
+ * @errp: pointer to a NULL-initialized error object
+ *
+ *
+ * Performs same function as qio_channel_readv_all_eof.
+ * Additionally, attempts to read file descriptors shared
+ * over the channel. The function will wait for all
+ * requested data to be read, yielding from the current
+ * coroutine if required. data refers to both file
+ * descriptors and the iovs.
+ *
+ * Returns: 1 if all bytes were read, 0 if end-of-file
+ *  occurs without data, or -1 on error
+ */
+
+int qio_channel_readv_full_all_eof(QIOChannel *ioc,
+   const struct iovec *iov,
+   size_t niov,
+   int **fds, size_t *nfds,
+   Error **errp);
+
+/**
+ * qio_channel_readv_full_all:
+ * @ioc: the channel object
+ * @iov: the array of memory regions to read data to
+ * @niov: the length of the @iov array
+ * @fds: an array of file handles to read
+ * @nfds: number of file handles in @fds
+ * @errp: pointer to a NULL-initialized error object
+ *
+ *
+ * Performs same function as qio_channel_readv_all_eof.
+ * Additionally, attempts to read file descriptors shared
+ * over the channel. The function will wait for all
+ * requested data to be read, yielding from the current
+ * coroutine if required. data refers to both file
+ * descriptors and the iovs.
+ *
+ * Returns: 0 if all bytes were read, or -1 on error
+ */
+
+int qio_channel_readv_full_all(QIOChannel *ioc,
+   const struct iovec *iov,
+   size_t niov,
+   int **fds, size_t *nfds,
+   Error **errp);
+
 /**
  * qio_channel_writev_full_all:
  * @ioc: the channel object
diff --git a/io/channel.c b/io/channel.c
index 0d4b8b5160..4555021b62 100644
--- a/io/channel.c
+++ b/io/channel.c
@@ -91,20 +91,48 @@ int qio_channel_readv_all_eof(QIOChannel *ioc,
   const struct iovec *iov,
   size_t niov,
   Error **errp)
+{
+return qio_channel_readv_full_all_eof(ioc, iov, niov, NULL, NULL, errp);
+}
+
+int qio_channel_readv_all(QIOChannel *ioc,
+  const struct iovec *iov,
+  size_t niov,
+  Error **errp)
+{
+return qio_channel_readv_full_all(ioc, iov, niov, NULL, NULL, errp);
+}
+
+int qio_channel_readv_full_all_eof(QIOChannel *ioc,
+   const struct iovec *iov,
+   size_t niov,
+   int **fds, size_t *nfds,
+   Error **errp)
 {
 int ret = -1;
 struct iovec *local_iov = g_new(struct iovec, niov);
 struct iovec *local_iov_head = local_iov;
 unsigned int nlocal_iov = niov;
+int **local_fds = fds;
+size_t *local_nfds = nfds;
 bool partial = false;
 
+if (nfds) {
+*nfds = 0;
+}
+
+if (fds) {
+*fds = NULL;
+}
+
 nlocal_iov = iov_copy(local_iov, nlocal_iov,
   iov, niov,
   0, iov_size(iov, niov));
 
-while (nlocal_iov > 0) {
+while ((nlocal_iov > 0) || local_fds) {
 ssize_t len;
-len = qio_channel_readv(ioc, local_iov, nlocal_iov, errp);
+len = qio_channel_readv_full(ioc, local_iov, nlocal_iov, local_fds,
+ local_nfds, errp);
 if (len == QIO_CHANNEL_ERR_BLOCK) {
 if (qemu_in_coroutine()) {
 qio_channel_yield(ioc, G_IO_IN);
@@ -112,20 +140,50 @@ int qio_channel_readv_all_eof(QIOChannel *ioc,
 qio_channel_wait(ioc, G_IO_IN);
 }
 continue;
-} else if (len < 

Re: [PULL 00/27] Block patches

2021-02-04 Thread Peter Maydell
On Thu, 4 Feb 2021 at 09:58, Stefan Hajnoczi  wrote:
>
> The following changes since commit 77f3804ab7ed94b471a14acb260e5aeacf26193f:
>
>   Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging 
> (2021-02-02 16:47:51 +)
>
> are available in the Git repository at:
>
>   https://gitlab.com/stefanha/qemu.git tags/block-pull-request
>
> for you to fetch changes up to 026362226f1ff6a1168524a326bbd6347ad40e85:
>
>   docs: fix Parallels Image "dirty bitmap" section (2021-02-03 16:48:21 +)
>
> 
> Pull request
>
> The pull request includes Multi-Process QEMU, GitLab repo URL updates, and 
> even
> a block layer patch to fix the Parallels Image format specification!
>
> 

This has merge conflicts:

Auto-merging util/oslib-posix.c
CONFLICT (content): Merge conflict in util/oslib-posix.c
Auto-merging util/mmap-alloc.c
CONFLICT (content): Merge conflict in util/mmap-alloc.c
Auto-merging softmmu/physmem.c
CONFLICT (content): Merge conflict in softmmu/physmem.c
Auto-merging softmmu/memory.c
CONFLICT (content): Merge conflict in softmmu/memory.c
Auto-merging include/qemu/mmap-alloc.h
Auto-merging include/exec/ram_addr.h
CONFLICT (content): Merge conflict in include/exec/ram_addr.h
Auto-merging include/exec/memory.h
Auto-merging hw/Kconfig
Automatic merge failed; fix conflicts and then commit the result.

Can you rebase and resend, please?

thanks
-- PMM



Re: [PATCH v4 15/18] migration/rdma: only register the memory for multifd channels

2021-02-04 Thread Dr. David Alan Gilbert
* Chuan Zheng (zhengch...@huawei.com) wrote:
> All data is sent by multifd Channels, so we only register its for
> multifd channels and main channel don't register its.
> 
> Signed-off-by: Zhimin Feng 
> Signed-off-by: Chuan Zheng 
> ---
>  migration/rdma.c | 8 
>  1 file changed, 8 insertions(+)
> 
> diff --git a/migration/rdma.c b/migration/rdma.c
> index c906cc7..f5eb563 100644
> --- a/migration/rdma.c
> +++ b/migration/rdma.c
> @@ -3939,6 +3939,12 @@ static int qemu_rdma_registration_stop(QEMUFile *f, 
> void *opaque,
>  
>  qemu_sem_post(&multifd_send_param->sem_sync);
>  }
> +
> +/*
> + * Use multifd to migrate, we only register memory for
> + * multifd RDMA channel and main channel don't register it.
> + */
> +goto wait_reg_complete;

No! No goto's for control flow except for error exits.

>  }
>  
>  /*
> @@ -3999,6 +4005,8 @@ static int qemu_rdma_registration_stop(QEMUFile *f, 
> void *opaque,
>  rdma->dest_blocks[i].remote_host_addr;
>  local->block[i].remote_rkey = rdma->dest_blocks[i].remote_rkey;
>  }
> +
> +wait_reg_complete:
>  /* Wait for all multifd channels to complete registration */
>  if (migrate_use_multifd()) {
>  int i;
> -- 
> 1.8.3.1
> 
-- 
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK




[PULL 25/27] multi-process: Retrieve PCI info from remote process

2021-02-04 Thread Stefan Hajnoczi
From: Jagannathan Raman 

Retrieve PCI configuration info about the remote device and
configure the Proxy PCI object based on the returned information

Signed-off-by: Elena Ufimtseva 
Signed-off-by: John G Johnson 
Signed-off-by: Jagannathan Raman 
Reviewed-by: Stefan Hajnoczi 
Message-id: 
85ee367bbb993aa23699b44cfedd83b4ea6d5221.1611938319.git.jag.ra...@oracle.com
Signed-off-by: Stefan Hajnoczi 
---
 hw/remote/proxy.c | 84 +++
 1 file changed, 84 insertions(+)

diff --git a/hw/remote/proxy.c b/hw/remote/proxy.c
index 555b3103f4..a082709881 100644
--- a/hw/remote/proxy.c
+++ b/hw/remote/proxy.c
@@ -25,6 +25,8 @@
 #include "sysemu/kvm.h"
 #include "util/event_notifier-posix.c"
 
+static void probe_pci_info(PCIDevice *dev, Error **errp);
+
 static void proxy_intx_update(PCIDevice *pci_dev)
 {
 PCIProxyDev *dev = PCI_PROXY_DEV(pci_dev);
@@ -77,6 +79,7 @@ static void pci_proxy_dev_realize(PCIDevice *device, Error 
**errp)
 {
 ERRP_GUARD();
 PCIProxyDev *dev = PCI_PROXY_DEV(device);
+uint8_t *pci_conf = device->config;
 int fd;
 
 if (!dev->fd) {
@@ -106,9 +109,14 @@ static void pci_proxy_dev_realize(PCIDevice *device, Error 
**errp)
 qemu_mutex_init(&dev->io_mutex);
 qio_channel_set_blocking(dev->ioc, true, NULL);
 
+pci_conf[PCI_LATENCY_TIMER] = 0xff;
+pci_conf[PCI_INTERRUPT_PIN] = 0x01;
+
 proxy_memory_listener_configure(&dev->proxy_listener, dev->ioc);
 
 setup_irqfd(dev);
+
+probe_pci_info(PCI_DEVICE(dev), errp);
 }
 
 static void pci_proxy_dev_exit(PCIDevice *pdev)
@@ -274,3 +282,79 @@ const MemoryRegionOps proxy_mr_ops = {
 .max_access_size = 8,
 },
 };
+
+static void probe_pci_info(PCIDevice *dev, Error **errp)
+{
+PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev);
+uint32_t orig_val, new_val, base_class, val;
+PCIProxyDev *pdev = PCI_PROXY_DEV(dev);
+DeviceClass *dc = DEVICE_CLASS(pc);
+uint8_t type;
+int i, size;
+
+config_op_send(pdev, PCI_VENDOR_ID, &val, 2, MPQEMU_CMD_PCI_CFGREAD);
+pc->vendor_id = (uint16_t)val;
+
+config_op_send(pdev, PCI_DEVICE_ID, &val, 2, MPQEMU_CMD_PCI_CFGREAD);
+pc->device_id = (uint16_t)val;
+
+config_op_send(pdev, PCI_CLASS_DEVICE, &val, 2, MPQEMU_CMD_PCI_CFGREAD);
+pc->class_id = (uint16_t)val;
+
+config_op_send(pdev, PCI_SUBSYSTEM_ID, &val, 2, MPQEMU_CMD_PCI_CFGREAD);
+pc->subsystem_id = (uint16_t)val;
+
+base_class = pc->class_id >> 4;
+switch (base_class) {
+case PCI_BASE_CLASS_BRIDGE:
+set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
+break;
+case PCI_BASE_CLASS_STORAGE:
+set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
+break;
+case PCI_BASE_CLASS_NETWORK:
+set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
+break;
+case PCI_BASE_CLASS_INPUT:
+set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
+break;
+case PCI_BASE_CLASS_DISPLAY:
+set_bit(DEVICE_CATEGORY_DISPLAY, dc->categories);
+break;
+case PCI_BASE_CLASS_PROCESSOR:
+set_bit(DEVICE_CATEGORY_CPU, dc->categories);
+break;
+default:
+set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+break;
+}
+
+for (i = 0; i < PCI_NUM_REGIONS; i++) {
+config_op_send(pdev, PCI_BASE_ADDRESS_0 + (4 * i), &orig_val, 4,
+   MPQEMU_CMD_PCI_CFGREAD);
+new_val = 0x;
+config_op_send(pdev, PCI_BASE_ADDRESS_0 + (4 * i), &new_val, 4,
+   MPQEMU_CMD_PCI_CFGWRITE);
+config_op_send(pdev, PCI_BASE_ADDRESS_0 + (4 * i), &new_val, 4,
+   MPQEMU_CMD_PCI_CFGREAD);
+size = (~(new_val & 0xFFF0)) + 1;
+config_op_send(pdev, PCI_BASE_ADDRESS_0 + (4 * i), &orig_val, 4,
+   MPQEMU_CMD_PCI_CFGWRITE);
+type = (new_val & 0x1) ?
+   PCI_BASE_ADDRESS_SPACE_IO : PCI_BASE_ADDRESS_SPACE_MEMORY;
+
+if (size) {
+g_autofree char *name;
+pdev->region[i].dev = pdev;
+pdev->region[i].present = true;
+if (type == PCI_BASE_ADDRESS_SPACE_MEMORY) {
+pdev->region[i].memory = true;
+}
+name = g_strdup_printf("bar-region-%d", i);
+memory_region_init_io(&pdev->region[i].mr, OBJECT(pdev),
+  &proxy_mr_ops, &pdev->region[i],
+  name, size);
+pci_register_bar(dev, i, type, &pdev->region[i].mr);
+}
+}
+}
-- 
2.29.2



[PULL 24/27] multi-process: create IOHUB object to handle irq

2021-02-04 Thread Stefan Hajnoczi
From: Jagannathan Raman 

IOHUB object is added to manage PCI IRQs. It uses KVM_IRQFD
ioctl to create irqfd to injecting PCI interrupts to the guest.
IOHUB object forwards the irqfd to the remote process. Remote process
uses this fd to directly send interrupts to the guest, bypassing QEMU.

Signed-off-by: John G Johnson 
Signed-off-by: Jagannathan Raman 
Signed-off-by: Elena Ufimtseva 
Reviewed-by: Stefan Hajnoczi 
Message-id: 
51d5c3d54e28a68b002e3875c59599c9f5a424a1.1611938319.git.jag.ra...@oracle.com
Signed-off-by: Stefan Hajnoczi 
---
 MAINTAINERS |   2 +
 include/hw/pci/pci_ids.h|   3 +
 include/hw/remote/iohub.h   |  42 +++
 include/hw/remote/machine.h |   2 +
 include/hw/remote/mpqemu-link.h |   1 +
 include/hw/remote/proxy.h   |   4 ++
 hw/remote/iohub.c   | 119 
 hw/remote/machine.c |  10 +++
 hw/remote/message.c |   4 ++
 hw/remote/mpqemu-link.c |   5 ++
 hw/remote/proxy.c   |  56 +++
 hw/remote/meson.build   |   1 +
 12 files changed, 249 insertions(+)
 create mode 100644 include/hw/remote/iohub.h
 create mode 100644 hw/remote/iohub.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 3b0ea950fc..58da5d6e66 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3215,6 +3215,8 @@ F: hw/remote/proxy.c
 F: include/hw/remote/proxy.h
 F: hw/remote/proxy-memory-listener.c
 F: include/hw/remote/proxy-memory-listener.h
+F: hw/remote/iohub.c
+F: include/hw/remote/iohub.h
 
 Build and test automation
 -
diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h
index 11f8ab7149..bd0c17dc78 100644
--- a/include/hw/pci/pci_ids.h
+++ b/include/hw/pci/pci_ids.h
@@ -192,6 +192,9 @@
 #define PCI_DEVICE_ID_SUN_SIMBA  0x5000
 #define PCI_DEVICE_ID_SUN_SABRE  0xa000
 
+#define PCI_VENDOR_ID_ORACLE 0x108e
+#define PCI_DEVICE_ID_REMOTE_IOHUB   0xb000
+
 #define PCI_VENDOR_ID_CMD0x1095
 #define PCI_DEVICE_ID_CMD_6460x0646
 
diff --git a/include/hw/remote/iohub.h b/include/hw/remote/iohub.h
new file mode 100644
index 00..0bf98e0d78
--- /dev/null
+++ b/include/hw/remote/iohub.h
@@ -0,0 +1,42 @@
+/*
+ * IO Hub for remote device
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef REMOTE_IOHUB_H
+#define REMOTE_IOHUB_H
+
+#include "hw/pci/pci.h"
+#include "qemu/event_notifier.h"
+#include "qemu/thread-posix.h"
+#include "hw/remote/mpqemu-link.h"
+
+#define REMOTE_IOHUB_NB_PIRQSPCI_DEVFN_MAX
+
+typedef struct ResampleToken {
+void *iohub;
+int pirq;
+} ResampleToken;
+
+typedef struct RemoteIOHubState {
+PCIDevice d;
+EventNotifier irqfds[REMOTE_IOHUB_NB_PIRQS];
+EventNotifier resamplefds[REMOTE_IOHUB_NB_PIRQS];
+unsigned int irq_level[REMOTE_IOHUB_NB_PIRQS];
+ResampleToken token[REMOTE_IOHUB_NB_PIRQS];
+QemuMutex irq_level_lock[REMOTE_IOHUB_NB_PIRQS];
+} RemoteIOHubState;
+
+int remote_iohub_map_irq(PCIDevice *pci_dev, int intx);
+void remote_iohub_set_irq(void *opaque, int pirq, int level);
+void process_set_irqfd_msg(PCIDevice *pci_dev, MPQemuMsg *msg);
+
+void remote_iohub_init(RemoteIOHubState *iohub);
+void remote_iohub_finalize(RemoteIOHubState *iohub);
+
+#endif
diff --git a/include/hw/remote/machine.h b/include/hw/remote/machine.h
index b92b2ce705..2a2a33c4b2 100644
--- a/include/hw/remote/machine.h
+++ b/include/hw/remote/machine.h
@@ -15,11 +15,13 @@
 #include "hw/boards.h"
 #include "hw/pci-host/remote.h"
 #include "io/channel.h"
+#include "hw/remote/iohub.h"
 
 struct RemoteMachineState {
 MachineState parent_obj;
 
 RemotePCIHost *host;
+RemoteIOHubState iohub;
 };
 
 /* Used to pass to co-routine device and ioc. */
diff --git a/include/hw/remote/mpqemu-link.h b/include/hw/remote/mpqemu-link.h
index 6303e62b17..71d206f00e 100644
--- a/include/hw/remote/mpqemu-link.h
+++ b/include/hw/remote/mpqemu-link.h
@@ -39,6 +39,7 @@ typedef enum {
 MPQEMU_CMD_PCI_CFGREAD,
 MPQEMU_CMD_BAR_WRITE,
 MPQEMU_CMD_BAR_READ,
+MPQEMU_CMD_SET_IRQFD,
 MPQEMU_CMD_MAX,
 } MPQemuCmd;
 
diff --git a/include/hw/remote/proxy.h b/include/hw/remote/proxy.h
index 12888b4f90..741def71f1 100644
--- a/include/hw/remote/proxy.h
+++ b/include/hw/remote/proxy.h
@@ -12,6 +12,7 @@
 #include "hw/pci/pci.h"
 #include "io/channel.h"
 #include "hw/remote/proxy-memory-listener.h"
+#include "qemu/event_notifier.h"
 
 #define TYPE_PCI_PROXY_DEV "x-pci-proxy-dev"
 OBJECT_DECLARE_SIMPLE_TYPE(PCIProxyDev, PCI_PROXY_DEV)
@@ -38,6 +39,9 @@ struct PCIProxyDev {
 QIOChannel *ioc;
 Error *migration_blocker;
 ProxyMemoryListener proxy_listener;
+int virq;
+EventNotifier intr;
+EventNotifier resample;
 ProxyMemoryRegion region[PCI_NUM_REGIONS];
 };
 
diff --git a/hw/remote/i

[PULL 27/27] docs: fix Parallels Image "dirty bitmap" section

2021-02-04 Thread Stefan Hajnoczi
From: "Denis V. Lunev" 

Original specification says that l1 table size if 64 * l1_size, which
is obviously wrong. The size of the l1 entry is 64 _bits_, not bytes.
Thus 64 is to be replaces with 8 as specification says about bytes.

There is also minor tweak, field name is renamed from l1 to l1_table,
which matches with the later text.

Signed-off-by: Denis V. Lunev 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-id: 20210128171313.2210947-1-...@openvz.org
CC: Stefan Hajnoczi 
CC: Vladimir Sementsov-Ogievskiy 

[Replace the original commit message "docs: fix mistake in dirty bitmap
feature description" as suggested by Eric Blake.
--Stefan]

Signed-off-by: Stefan Hajnoczi 
---
 docs/interop/parallels.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/interop/parallels.txt b/docs/interop/parallels.txt
index e9271eba5d..f15bf35bd1 100644
--- a/docs/interop/parallels.txt
+++ b/docs/interop/parallels.txt
@@ -208,7 +208,7 @@ of its data area are:
   28 - 31:l1_size
   The number of entries in the L1 table of the bitmap.
 
-  variable:   l1 (64 * l1_size bytes)
+  variable:   l1_table (8 * l1_size bytes)
   L1 offset table (in bytes)
 
 A dirty bitmap is stored using a one-level structure for the mapping to host
-- 
2.29.2



Re: [PATCH 1/6] travis.yml: Move gprof/gcov test across to gitlab

2021-02-04 Thread Alex Bennée


Thomas Huth  writes:

> On 03/02/2021 20.32, Wainer dos Santos Moschetta wrote:
>> Hi,
>> 
>> On 2/3/21 8:32 AM, Thomas Huth wrote:
>>> From: Philippe Mathieu-Daudé 
>>>
>>> Similarly to commit 8cdb2cef3f1, move the gprof/gcov test to GitLab.
>>>
>>> The coverage-summary.sh script is not Travis-CI specific, make it
>>> generic.
>>>
>>> Signed-off-by: Philippe Mathieu-Daudé 
>>> Message-Id: <20201108204535.2319870-10-phi...@redhat.com>
>>> [thuth: Add gcovr and bsdmainutils which are required for the
>>>  overage-summary.sh script to the ubuntu docker file]
>> s/overage/coverage/
>>> Signed-off-by: Thomas Huth 
>>> ---
>>>   .gitlab-ci.yml | 12 
>>>   .travis.yml| 14 --
>>>   MAINTAINERS|  2 +-
>>>   scripts/{travis => ci}/coverage-summary.sh |  2 +-
>>>   tests/docker/dockerfiles/ubuntu2004.docker |  2 ++
>>>   5 files changed, 16 insertions(+), 16 deletions(-)
>>>   rename scripts/{travis => ci}/coverage-summary.sh (92%)
>>>
>>> diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
>>> index 7c0db64710..8b97b512bb 100644
>>> --- a/.gitlab-ci.yml
>>> +++ b/.gitlab-ci.yml
>>> @@ -468,6 +468,18 @@ check-deprecated:
>>>   MAKE_CHECK_ARGS: check-tcg
>>> allow_failure: true
>>> +# gprof/gcov are GCC features
>>> +build-gprof-gcov:
>>> +  <<: *native_build_job_definition
>>> +  variables:
>>> +IMAGE: ubuntu2004
>>> +CONFIGURE_ARGS: --enable-gprof --enable-gcov
>>> +MAKE_CHECK_ARGS: build-tcg
>> 
>> With build-tcg it generates an empty report, e.g., 
>> https://gitlab.com/wainersm/qemu/-/jobs/1005923421
>> 
>> Shouldn't it run `make check`?
>
> D'oh, you're right. I think we need to run at least a "make check-unit" 
> here. I'll rework my patch accordingly...
>
> By the way, it's broken on Travis since a long time, e.g. with version 5.0 
> there is already only a stack trace:
>
> https://travis-ci.org/github/qemu/qemu/jobs/680661167#L8411
>
> Seems like nobody noticed this for almost a year now...

doh - the check_coverage was an after_success step so never influenced
the result. It was only a band aid really though - it would be better if
we published the html coverage report on gitlab's pages (like we now do
for annotated gtags source: https://qemu-project.gitlab.io/qemu/src/

>
>   Thomas


-- 
Alex Bennée



Re: [PATCH v3 0/3]

2021-02-04 Thread Daniel P . Berrangé
On Wed, Feb 03, 2021 at 03:35:36PM -0800, dje--- via wrote:
> Add support for ipv6 host forwarding
> 
> This patchset takes the original patch from Maxim,
> https://www.mail-archive.com/qemu-devel@nongnu.org/msg569573.html
> and updates it.
> 
> New option: -ipv6-hostfwd
> 
> New commands: ipv6_hostfwd_add, ipv6_hostfwd_remove
> 
> These are the ipv6 equivalents of their ipv4 counterparts.

Before I noticed this v3, I send a reply to your v2 sugesting
that we don't need to add any new commands/options. We can
use existing inet_parse() helper function to parse the address
info and transparently support IPv4/6 in the existing commands
and options. This matches normal practice elsewhere in QEMU
for IP dual stack.


Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




Re: [PATCH v15 21/23] hw/core/cpu: call qemu_init_vcpu in cpu_common_realizefn

2021-02-04 Thread Claudio Fontana
On 2/3/21 5:51 PM, Alex Bennée wrote:
> 
> Claudio Fontana  writes:
> 
>> move the call to qemu_init_vcpu inside cpu_common_realizefn,
>> so it does not need to be done explicitly in each target cpu.
>>
>> Despite this, the way cpu realize is done continues to be not ideal;
>>
>> ideally the cpu_list_add would be done in common_cpu,
>> and in this case we could avoid even more redundant open coded
>> additional calls in target/xxx/cpu.c,
>>
>> but this cannot happen because target cpu code, plugins, etc
>> now all came to rely on cpu->index
>> (which is updated in cpu_list_add), since no particular order
>> was defined previously, so we are stuck with the freak call
>> order for the target cpu realizefn.
>>
>> After this patch the target/xxx/cpu.c realizefn body becomes:
>>
>> void mycpu_realizefn(DeviceState *dev, Error **errp)
>> {
>> /* ... */
>> cpu_exec_realizefn(CPU_STATE(dev), errp);
>>
>> /* ... anything that needs done pre-qemu_vcpu_init */
>>
>> xcc->parent_realize(dev, errp); /* does qemu_vcpu_init */
>>
>> /* ... anything that needs to be done after qemu_vcpu_init */
>> }
> 
> Uggh, introducing a magic order seems like inviting trouble for later
> on. Is there anyway we can improve things? Paolo?
> 


The magic order is there already. I call it "freak order" instead of "magic", 
because this is more the result of uncontrolled code growth rather than the 
work of magic :-)

This patch attempts to remove one degree of freedom, but the current situation 
of cpu realizing is basically fubar. This patch attempts to improve things 
slightly,
but as mentioned elsewhere it basically fails to achieve the goal,

so I am tempted to just retire it. Maybe someone interested could look at the 
situation with new eyes (possibly even me later on).

Ciao,

Claudio







Re: [PATCH v4 18/18] migration/rdma: RDMA cleanup for multifd migration

2021-02-04 Thread Dr. David Alan Gilbert
* Chuan Zheng (zhengch...@huawei.com) wrote:
> Signed-off-by: Chuan Zheng 
> ---
>  migration/multifd.c |  6 ++
>  migration/multifd.h |  1 +
>  migration/rdma.c| 16 +++-
>  3 files changed, 22 insertions(+), 1 deletion(-)
> 
> diff --git a/migration/multifd.c b/migration/multifd.c
> index 1186246..4031648 100644
> --- a/migration/multifd.c
> +++ b/migration/multifd.c
> @@ -577,6 +577,9 @@ void multifd_save_cleanup(void)
>  p->packet_len = 0;
>  g_free(p->packet);
>  p->packet = NULL;
> +#ifdef CONFIG_RDMA
> +multifd_rdma_cleanup(p->rdma);
> +#endif

You may find it easier to add an entry into stubs/ for
multifd_rdma_cleanup; it then avoids the need for the ifdef.

>  multifd_send_state->ops->send_cleanup(p, &local_err);
>  if (local_err) {
>  migrate_set_error(migrate_get_current(), local_err);
> @@ -1039,6 +1042,9 @@ int multifd_load_cleanup(Error **errp)
>  p->packet_len = 0;
>  g_free(p->packet);
>  p->packet = NULL;
> +#ifdef CONFIG_RDMA
> +multifd_rdma_cleanup(p->rdma);
> +#endif
>  multifd_recv_state->ops->recv_cleanup(p);
>  }
>  qemu_sem_destroy(&multifd_recv_state->sem_sync);
> diff --git a/migration/multifd.h b/migration/multifd.h
> index 26d4489..0ecec5e 100644
> --- a/migration/multifd.h
> +++ b/migration/multifd.h
> @@ -183,6 +183,7 @@ typedef struct {
>  
>  #ifdef CONFIG_RDMA
>  extern MultiFDSetup multifd_rdma_ops;
> +void multifd_rdma_cleanup(void *opaque);
>  #endif
>  void multifd_send_terminate_threads(Error *err);
>  int multifd_send_initial_packet(MultiFDSendParams *p, Error **errp);
> diff --git a/migration/rdma.c b/migration/rdma.c
> index c19a91f..f14357f 100644
> --- a/migration/rdma.c
> +++ b/migration/rdma.c
> @@ -2369,7 +2369,7 @@ static void qemu_rdma_cleanup(RDMAContext *rdma)
>  {
>  int idx;
>  
> -if (rdma->cm_id && rdma->connected) {
> +if (rdma->channel && rdma->cm_id && rdma->connected) {
>  if ((rdma->error_state ||
>   migrate_get_current()->state == MIGRATION_STATUS_CANCELLING) &&
>  !rdma->received_error) {
> @@ -4599,6 +4599,20 @@ static void multifd_rdma_recv_channel_setup(QIOChannel 
> *ioc,
>  return;
>  }
>  
> +void multifd_rdma_cleanup(void *opaque)

I think you need to make it clear that this is only to cleanup one
channel, rather than the whole multifd-rdma connection;
multifd_load_cleanup for example cleans up all the channels, where as I
think this is only doing one?

Don't use a 'void *opaque' except for something that's called via
a registration/callback scheme that's designed to be generic
(e.g. multifd_send_thread does it because it's called from
qemu_thread_create that doesn't know the type).  Where you know
the type, use it!

> +{
> +RDMAContext *rdma = (RDMAContext *)opaque;
> +
> +if (!migrate_use_rdma()) {
> +return;
> +}
> +
> +rdma->listen_id = NULL;
> +rdma->channel = NULL;
> +qemu_rdma_cleanup(rdma);
> +g_free(rdma);
> +}
> +
>  MultiFDSetup multifd_rdma_ops = {
>  .send_thread = multifd_rdma_send_thread,
>  .recv_thread = multifd_rdma_recv_thread,
> -- 
> 1.8.3.1
> 
-- 
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK




Re: [PATCH 01/22] tests/acceptance/boot_linux.py: fix typo on cloudinit error message

2021-02-04 Thread Alex Bennée


Cleber Rosa  writes:

> Signed-off-by: Cleber Rosa 

Reviewed-by: Alex Bennée 

-- 
Alex Bennée



Re: [PATCH 01/22] tests/acceptance/boot_linux.py: fix typo on cloudinit error message

2021-02-04 Thread Beraldo Leal
On Wed, Feb 03, 2021 at 12:23:36PM -0500, Cleber Rosa wrote:
> Signed-off-by: Cleber Rosa 
> ---
>  tests/acceptance/boot_linux.py | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

Reviewed-by: Beraldo Leal 




Re: [PATCH 02/22] tests/acceptance/boot_linux.py: rename misleading cloudinit method

2021-02-04 Thread Alex Bennée


Cleber Rosa  writes:

> There's no downloading happening on that method, so let's call it
> "prepare" instead.  While at it, and because of it, the current
> "prepare_boot" and "prepare_cloudinit" are also renamed.
>
> The reasoning here is that "prepare_" methods will just work on the
> images, while "set_up_" will make them effective to the VM that will
> be launched.  Inspiration comes from the "virtiofs_submounts.py"
> tests, which this expects to converge more into.
>
> Signed-off-by: Cleber Rosa 

Reviewed-by: Alex Bennée 

-- 
Alex Bennée



Re: [PATCH 03/22] Acceptance Tests: remove unnecessary tag from documentation example

2021-02-04 Thread Alex Bennée


Cleber Rosa  writes:

> The ":avocado: enable" is not necessary and was removed in 9531d26c,
> so let's remove from the docs.
>
> Signed-off-by: Cleber Rosa 

Reviewed-by: Alex Bennée 

-- 
Alex Bennée



Re: [PATCH 02/22] tests/acceptance/boot_linux.py: rename misleading cloudinit method

2021-02-04 Thread Beraldo Leal
On Wed, Feb 03, 2021 at 12:23:37PM -0500, Cleber Rosa wrote:
> There's no downloading happening on that method, so let's call it
> "prepare" instead.  While at it, and because of it, the current
> "prepare_boot" and "prepare_cloudinit" are also renamed.
> 
> The reasoning here is that "prepare_" methods will just work on the
> images, while "set_up_" will make them effective to the VM that will
> be launched.  Inspiration comes from the "virtiofs_submounts.py"
> tests, which this expects to converge more into.
> 
> Signed-off-by: Cleber Rosa 
> ---
>  tests/acceptance/boot_linux.py | 12 ++--
>  1 file changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/tests/acceptance/boot_linux.py b/tests/acceptance/boot_linux.py
> index 2ac3e57587..bcd923bb4a 100644
> --- a/tests/acceptance/boot_linux.py
> +++ b/tests/acceptance/boot_linux.py
> @@ -57,7 +57,7 @@ class BootLinuxBase(Test):
>  self.cancel('Failed to download/prepare boot image')
>  return boot.path
>  
> -def download_cloudinit(self, ssh_pubkey=None):
> +def prepare_cloudinit(self, ssh_pubkey=None):
>  self.log.info('Preparing cloudinit image')
>  try:
>  cloudinit_iso = os.path.join(self.workdir, 'cloudinit.iso')
> @@ -85,15 +85,15 @@ class BootLinux(BootLinuxBase):
>  super(BootLinux, self).setUp()
>  self.vm.add_args('-smp', '2')
>  self.vm.add_args('-m', '1024')
> -self.prepare_boot()
> -self.prepare_cloudinit(ssh_pubkey)
> +self.set_up_boot()
> +self.set_up_cloudinit(ssh_pubkey)
>  
> -def prepare_boot(self):
> +def set_up_boot(self):
>  path = self.download_boot()
>  self.vm.add_args('-drive', 'file=%s' % path)
>  
> -def prepare_cloudinit(self, ssh_pubkey=None):
> -cloudinit_iso = self.download_cloudinit(ssh_pubkey)
> +def set_up_cloudinit(self, ssh_pubkey=None):
> +cloudinit_iso = self.prepare_cloudinit(ssh_pubkey)
>  self.vm.add_args('-drive', 'file=%s,format=raw' % cloudinit_iso)
>  
>  def launch_and_wait(self):
> -- 
> 2.25.4
>

Reviewed-by: Beraldo Leal 




Re: [PATCH 04/22] tests/acceptance/virtiofs_submounts.py: use workdir property

2021-02-04 Thread Alex Bennée


Cleber Rosa  writes:

> For Avocado Instrumented based tests, it's a better idea to just use
> the property.  The environment variable is a fall back for tests not
> written using that Python API.
>
> Reference: 
> https://avocado-framework.readthedocs.io/en/84.0/api/test/avocado.html#avocado.Test.workdir
> Signed-off-by: Cleber Rosa 

Reviewed-by: Alex Bennée 

-- 
Alex Bennée



[PATCH v2 01/20] vhost-user-gpu: check backend for EDID support

2021-02-04 Thread marcandre . lureau
From: Marc-André Lureau 

EDID has been enabled by default, but the backend may not implement
it (such as the contrib backend). This results in extra warnings and
potentially other issues in the guest.

The option shouldn't probably have been added to VIRTIO_GPU_BASE, but
it's a bit too late now, report an error and disable EDID when it's
not available.

Fixes: 0a7196625 ("edid: flip the default to enabled")

Signed-off-by: Marc-André Lureau 
---
 hw/display/vhost-user-gpu.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/hw/display/vhost-user-gpu.c b/hw/display/vhost-user-gpu.c
index 51f1747c4a..55b0ed15f2 100644
--- a/hw/display/vhost-user-gpu.c
+++ b/hw/display/vhost-user-gpu.c
@@ -555,6 +555,12 @@ vhost_user_gpu_device_realize(DeviceState *qdev, Error 
**errp)
 if (virtio_has_feature(g->vhost->dev.features, VIRTIO_GPU_F_VIRGL)) {
 g->parent_obj.conf.flags |= 1 << VIRTIO_GPU_FLAG_VIRGL_ENABLED;
 }
+if (virtio_has_feature(g->vhost->dev.features, VIRTIO_GPU_F_EDID)) {
+g->parent_obj.conf.flags |= 1 << VIRTIO_GPU_FLAG_EDID_ENABLED;
+} else {
+error_report("EDID requested but the backend doesn't support it.");
+g->parent_obj.conf.flags &= ~(1 << VIRTIO_GPU_FLAG_EDID_ENABLED);
+}
 
 if (!virtio_gpu_base_device_realize(qdev, NULL, NULL, errp)) {
 return;
-- 
2.29.0




[PATCH v2 02/20] vhost-user-gpu: handle vhost-user-gpu features in a callback

2021-02-04 Thread marcandre . lureau
From: Marc-André Lureau 

Fixes a deadlock where the backend calls QEMU, while QEMU also calls the
backend simultaneously, both ends waiting for each other.

Signed-off-by: Marc-André Lureau 
---
 contrib/vhost-user-gpu/vugpu.h  |  2 +-
 contrib/vhost-user-gpu/vhost-user-gpu.c | 37 ++---
 2 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/contrib/vhost-user-gpu/vugpu.h b/contrib/vhost-user-gpu/vugpu.h
index ad664c4df8..86f3ac86aa 100644
--- a/contrib/vhost-user-gpu/vugpu.h
+++ b/contrib/vhost-user-gpu/vugpu.h
@@ -118,7 +118,7 @@ typedef struct VuGpu {
 int sock_fd;
 int drm_rnode_fd;
 GSource *renderer_source;
-guint wait_ok;
+guint wait_in;
 
 bool virgl;
 bool virgl_inited;
diff --git a/contrib/vhost-user-gpu/vhost-user-gpu.c 
b/contrib/vhost-user-gpu/vhost-user-gpu.c
index f445ef28ec..85c16404fb 100644
--- a/contrib/vhost-user-gpu/vhost-user-gpu.c
+++ b/contrib/vhost-user-gpu/vhost-user-gpu.c
@@ -124,7 +124,7 @@ source_wait_cb(gint fd, GIOCondition condition, gpointer 
user_data)
 }
 
 /* resume */
-g->wait_ok = 0;
+g->wait_in = 0;
 vg_handle_ctrl(&g->dev.parent, 0);
 
 return G_SOURCE_REMOVE;
@@ -133,8 +133,8 @@ source_wait_cb(gint fd, GIOCondition condition, gpointer 
user_data)
 void
 vg_wait_ok(VuGpu *g)
 {
-assert(g->wait_ok == 0);
-g->wait_ok = g_unix_fd_add(g->sock_fd, G_IO_IN | G_IO_HUP,
+assert(g->wait_in == 0);
+g->wait_in = g_unix_fd_add(g->sock_fd, G_IO_IN | G_IO_HUP,
source_wait_cb, g);
 }
 
@@ -270,7 +270,7 @@ vg_get_display_info(VuGpu *vg, struct 
virtio_gpu_ctrl_command *cmd)
 .size = 0,
 };
 
-assert(vg->wait_ok == 0);
+assert(vg->wait_in == 0);
 
 vg_send_msg(vg, &msg, -1);
 if (!vg_recv_msg(vg, msg.request, sizeof(dpy_info), &dpy_info)) {
@@ -815,7 +815,7 @@ vg_handle_ctrl(VuDev *dev, int qidx)
 size_t len;
 
 for (;;) {
-if (vg->wait_ok != 0) {
+if (vg->wait_in != 0) {
 return;
 }
 
@@ -969,18 +969,17 @@ vg_queue_set_started(VuDev *dev, int qidx, bool started)
 }
 }
 
-static void
-set_gpu_protocol_features(VuGpu *g)
+static gboolean
+protocol_features_cb(gint fd, GIOCondition condition, gpointer user_data)
 {
+VuGpu *g = user_data;
 uint64_t u64;
 VhostUserGpuMsg msg = {
 .request = VHOST_USER_GPU_GET_PROTOCOL_FEATURES
 };
 
-assert(g->wait_ok == 0);
-vg_send_msg(g, &msg, -1);
 if (!vg_recv_msg(g, msg.request, sizeof(u64), &u64)) {
-return;
+return G_SOURCE_CONTINUE;
 }
 
 msg = (VhostUserGpuMsg) {
@@ -989,6 +988,24 @@ set_gpu_protocol_features(VuGpu *g)
 .payload.u64 = 0
 };
 vg_send_msg(g, &msg, -1);
+
+g->wait_in = 0;
+vg_handle_ctrl(&g->dev.parent, 0);
+
+return G_SOURCE_REMOVE;
+}
+
+static void
+set_gpu_protocol_features(VuGpu *g)
+{
+VhostUserGpuMsg msg = {
+.request = VHOST_USER_GPU_GET_PROTOCOL_FEATURES
+};
+
+vg_send_msg(g, &msg, -1);
+assert(g->wait_in == 0);
+g->wait_in = g_unix_fd_add(g->sock_fd, G_IO_IN | G_IO_HUP,
+   protocol_features_cb, g);
 }
 
 static int
-- 
2.29.0




[PATCH v2 06/20] ui: remove gl_ctx_get_current

2021-02-04 Thread marcandre . lureau
From: Marc-André Lureau 

There are no users left.

Signed-off-by: Marc-André Lureau 
---
 include/ui/console.h | 2 --
 include/ui/egl-context.h | 1 -
 include/ui/sdl2.h| 1 -
 ui/console.c | 6 --
 ui/egl-context.c | 5 -
 ui/egl-headless.c| 1 -
 ui/gtk.c | 2 --
 ui/sdl2-gl.c | 8 
 ui/sdl2.c| 1 -
 ui/spice-display.c   | 1 -
 10 files changed, 28 deletions(-)

diff --git a/include/ui/console.h b/include/ui/console.h
index 7a3fc11abf..ce6c72e37c 100644
--- a/include/ui/console.h
+++ b/include/ui/console.h
@@ -201,7 +201,6 @@ typedef struct DisplayChangeListenerOps {
QEMUGLContext ctx);
 int (*dpy_gl_ctx_make_current)(DisplayChangeListener *dcl,
QEMUGLContext ctx);
-QEMUGLContext (*dpy_gl_ctx_get_current)(DisplayChangeListener *dcl);
 
 void (*dpy_gl_scanout_disable)(DisplayChangeListener *dcl);
 void (*dpy_gl_scanout_texture)(DisplayChangeListener *dcl,
@@ -303,7 +302,6 @@ QEMUGLContext dpy_gl_ctx_create(QemuConsole *con,
 QEMUGLParams *params);
 void dpy_gl_ctx_destroy(QemuConsole *con, QEMUGLContext ctx);
 int dpy_gl_ctx_make_current(QemuConsole *con, QEMUGLContext ctx);
-QEMUGLContext dpy_gl_ctx_get_current(QemuConsole *con);
 
 bool console_has_gl(QemuConsole *con);
 bool console_has_gl_dmabuf(QemuConsole *con);
diff --git a/include/ui/egl-context.h b/include/ui/egl-context.h
index f004ce11a7..9374fe41e3 100644
--- a/include/ui/egl-context.h
+++ b/include/ui/egl-context.h
@@ -9,6 +9,5 @@ QEMUGLContext qemu_egl_create_context(DisplayChangeListener 
*dcl,
 void qemu_egl_destroy_context(DisplayChangeListener *dcl, QEMUGLContext ctx);
 int qemu_egl_make_context_current(DisplayChangeListener *dcl,
   QEMUGLContext ctx);
-QEMUGLContext qemu_egl_get_current_context(DisplayChangeListener *dcl);
 
 #endif /* EGL_CONTEXT_H */
diff --git a/include/ui/sdl2.h b/include/ui/sdl2.h
index 0875b8d56b..f85c117a78 100644
--- a/include/ui/sdl2.h
+++ b/include/ui/sdl2.h
@@ -70,7 +70,6 @@ QEMUGLContext sdl2_gl_create_context(DisplayChangeListener 
*dcl,
 void sdl2_gl_destroy_context(DisplayChangeListener *dcl, QEMUGLContext ctx);
 int sdl2_gl_make_context_current(DisplayChangeListener *dcl,
  QEMUGLContext ctx);
-QEMUGLContext sdl2_gl_get_current_context(DisplayChangeListener *dcl);
 
 void sdl2_gl_scanout_disable(DisplayChangeListener *dcl);
 void sdl2_gl_scanout_texture(DisplayChangeListener *dcl,
diff --git a/ui/console.c b/ui/console.c
index d80ce7037c..c0b1a3689c 100644
--- a/ui/console.c
+++ b/ui/console.c
@@ -1803,12 +1803,6 @@ int dpy_gl_ctx_make_current(QemuConsole *con, 
QEMUGLContext ctx)
 return con->gl->ops->dpy_gl_ctx_make_current(con->gl, ctx);
 }
 
-QEMUGLContext dpy_gl_ctx_get_current(QemuConsole *con)
-{
-assert(con->gl);
-return con->gl->ops->dpy_gl_ctx_get_current(con->gl);
-}
-
 void dpy_gl_scanout_disable(QemuConsole *con)
 {
 assert(con->gl);
diff --git a/ui/egl-context.c b/ui/egl-context.c
index 4aa1cbb50c..368ffa49d8 100644
--- a/ui/egl-context.c
+++ b/ui/egl-context.c
@@ -35,8 +35,3 @@ int qemu_egl_make_context_current(DisplayChangeListener *dcl,
return eglMakeCurrent(qemu_egl_display,
  EGL_NO_SURFACE, EGL_NO_SURFACE, ctx);
 }
-
-QEMUGLContext qemu_egl_get_current_context(DisplayChangeListener *dcl)
-{
-return eglGetCurrentContext();
-}
diff --git a/ui/egl-headless.c b/ui/egl-headless.c
index fe2a0d1eab..da377a74af 100644
--- a/ui/egl-headless.c
+++ b/ui/egl-headless.c
@@ -160,7 +160,6 @@ static const DisplayChangeListenerOps egl_ops = {
 .dpy_gl_ctx_create   = egl_create_context,
 .dpy_gl_ctx_destroy  = qemu_egl_destroy_context,
 .dpy_gl_ctx_make_current = qemu_egl_make_context_current,
-.dpy_gl_ctx_get_current  = qemu_egl_get_current_context,
 
 .dpy_gl_scanout_disable  = egl_scanout_disable,
 .dpy_gl_scanout_texture  = egl_scanout_texture,
diff --git a/ui/gtk.c b/ui/gtk.c
index e1ee0840b3..a0e6b60ac4 100644
--- a/ui/gtk.c
+++ b/ui/gtk.c
@@ -637,7 +637,6 @@ static const DisplayChangeListenerOps dcl_gl_area_ops = {
 .dpy_gl_ctx_create   = gd_gl_area_create_context,
 .dpy_gl_ctx_destroy  = gd_gl_area_destroy_context,
 .dpy_gl_ctx_make_current = gd_gl_area_make_current,
-.dpy_gl_ctx_get_current  = gd_gl_area_get_current_context,
 .dpy_gl_scanout_texture  = gd_gl_area_scanout_texture,
 .dpy_gl_update   = gd_gl_area_scanout_flush,
 };
@@ -654,7 +653,6 @@ static const DisplayChangeListenerOps dcl_egl_ops = {
 .dpy_gl_ctx_create   = gd_egl_create_context,
 .dpy_gl_ctx_destroy  = qemu_egl_destroy_context,
 .dpy_gl_ctx_make_current = gd_egl_make_current,
-.dpy_gl_ctx_get_current  = qemu_egl_get_current_context,
 .dpy_gl_scanout_disable  = gd_egl_scanout_disable,
 .dpy_gl_scanout_te

Re: [PATCH 04/22] tests/acceptance/virtiofs_submounts.py: use workdir property

2021-02-04 Thread Beraldo Leal
On Wed, Feb 03, 2021 at 12:23:39PM -0500, Cleber Rosa wrote:
> For Avocado Instrumented based tests, it's a better idea to just use
> the property.  The environment variable is a fall back for tests not
> written using that Python API.
> 
> Reference: 
> https://avocado-framework.readthedocs.io/en/84.0/api/test/avocado.html#avocado.Test.workdir
> Signed-off-by: Cleber Rosa 
> ---
>  tests/acceptance/virtiofs_submounts.py | 6 ++
>  1 file changed, 2 insertions(+), 4 deletions(-)
> 
> diff --git a/tests/acceptance/virtiofs_submounts.py 
> b/tests/acceptance/virtiofs_submounts.py
> index 361e5990b6..68d3cd6869 100644
> --- a/tests/acceptance/virtiofs_submounts.py
> +++ b/tests/acceptance/virtiofs_submounts.py
> @@ -136,8 +136,7 @@ class VirtiofsSubmountsTest(BootLinux):
>  return (stdout, stderr, ret)
>  
>  def set_up_shared_dir(self):
> -atwd = os.getenv('AVOCADO_TEST_WORKDIR')
> -self.shared_dir = os.path.join(atwd, 'virtiofs-shared')
> +self.shared_dir = os.path.join(self.workdir, 'virtiofs-shared')
>  
>  os.mkdir(self.shared_dir)
>  
> @@ -234,8 +233,7 @@ class VirtiofsSubmountsTest(BootLinux):
>  
>  self.seed = self.params.get('seed')
>  
> -atwd = os.getenv('AVOCADO_TEST_WORKDIR')
> -self.ssh_key = os.path.join(atwd, 'id_ed25519')
> +self.ssh_key = os.path.join(self.workdir, 'id_ed25519')
>  
>  self.run(('ssh-keygen', '-t', 'ed25519', '-f', self.ssh_key))
>  
> -- 
> 2.25.4
>

Reviewed-by: Beraldo Leal 




[PATCH v2 04/20] vhost-user-gpu: handle display-info in a callback

2021-02-04 Thread marcandre . lureau
From: Marc-André Lureau 

Fixes a deadlock where the backend calls QEMU, while QEMU also calls the
backend simultaneously, both ends waiting for each other.

Signed-off-by: Marc-André Lureau 
---
 contrib/vhost-user-gpu/vhost-user-gpu.c | 33 -
 1 file changed, 27 insertions(+), 6 deletions(-)

diff --git a/contrib/vhost-user-gpu/vhost-user-gpu.c 
b/contrib/vhost-user-gpu/vhost-user-gpu.c
index 7dcc02966c..b27990ffdb 100644
--- a/contrib/vhost-user-gpu/vhost-user-gpu.c
+++ b/contrib/vhost-user-gpu/vhost-user-gpu.c
@@ -261,10 +261,33 @@ vg_ctrl_response_nodata(VuGpu *g,
 vg_ctrl_response(g, cmd, &resp, sizeof(resp));
 }
 
+
+static gboolean
+get_display_info_cb(gint fd, GIOCondition condition, gpointer user_data)
+{
+struct virtio_gpu_resp_display_info dpy_info = { {} };
+VuGpu *vg = user_data;
+struct virtio_gpu_ctrl_command *cmd = QTAILQ_LAST(&vg->fenceq);
+
+g_debug("disp info cb");
+assert(cmd->cmd_hdr.type == VIRTIO_GPU_CMD_GET_DISPLAY_INFO);
+if (!vg_recv_msg(vg, VHOST_USER_GPU_GET_DISPLAY_INFO,
+ sizeof(dpy_info), &dpy_info)) {
+return G_SOURCE_CONTINUE;
+}
+
+QTAILQ_REMOVE(&vg->fenceq, cmd, next);
+vg_ctrl_response(vg, cmd, &dpy_info.hdr, sizeof(dpy_info));
+
+vg->wait_in = 0;
+vg_handle_ctrl(&vg->dev.parent, 0);
+
+return G_SOURCE_REMOVE;
+}
+
 void
 vg_get_display_info(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd)
 {
-struct virtio_gpu_resp_display_info dpy_info = { {} };
 VhostUserGpuMsg msg = {
 .request = VHOST_USER_GPU_GET_DISPLAY_INFO,
 .size = 0,
@@ -273,11 +296,9 @@ vg_get_display_info(VuGpu *vg, struct 
virtio_gpu_ctrl_command *cmd)
 assert(vg->wait_in == 0);
 
 vg_send_msg(vg, &msg, -1);
-if (!vg_recv_msg(vg, msg.request, sizeof(dpy_info), &dpy_info)) {
-return;
-}
-
-vg_ctrl_response(vg, cmd, &dpy_info.hdr, sizeof(dpy_info));
+vg->wait_in = g_unix_fd_add(vg->sock_fd, G_IO_IN | G_IO_HUP,
+   get_display_info_cb, vg);
+cmd->state = VG_CMD_STATE_PENDING;
 }
 
 static void
-- 
2.29.0




[PATCH v2 03/20] vhost-user-gpu: use an extandable state enum for commands

2021-02-04 Thread marcandre . lureau
From: Marc-André Lureau 

Introduce a pending state for commands which aren't finished yet, but
are being handled. See following patch.

Signed-off-by: Marc-André Lureau 
---
 contrib/vhost-user-gpu/vugpu.h  | 8 +++-
 contrib/vhost-user-gpu/vhost-user-gpu.c | 8 
 contrib/vhost-user-gpu/virgl.c  | 2 +-
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/contrib/vhost-user-gpu/vugpu.h b/contrib/vhost-user-gpu/vugpu.h
index 86f3ac86aa..04d5615812 100644
--- a/contrib/vhost-user-gpu/vugpu.h
+++ b/contrib/vhost-user-gpu/vugpu.h
@@ -129,12 +129,18 @@ typedef struct VuGpu {
 QTAILQ_HEAD(, virtio_gpu_ctrl_command) fenceq;
 } VuGpu;
 
+enum {
+VG_CMD_STATE_NEW,
+VG_CMD_STATE_PENDING,
+VG_CMD_STATE_FINISHED,
+};
+
 struct virtio_gpu_ctrl_command {
 VuVirtqElement elem;
 VuVirtq *vq;
 struct virtio_gpu_ctrl_hdr cmd_hdr;
 uint32_t error;
-bool finished;
+int state;
 QTAILQ_ENTRY(virtio_gpu_ctrl_command) next;
 };
 
diff --git a/contrib/vhost-user-gpu/vhost-user-gpu.c 
b/contrib/vhost-user-gpu/vhost-user-gpu.c
index 85c16404fb..7dcc02966c 100644
--- a/contrib/vhost-user-gpu/vhost-user-gpu.c
+++ b/contrib/vhost-user-gpu/vhost-user-gpu.c
@@ -246,7 +246,7 @@ vg_ctrl_response(VuGpu *g,
 }
 vu_queue_push(&g->dev.parent, cmd->vq, &cmd->elem, s);
 vu_queue_notify(&g->dev.parent, cmd->vq);
-cmd->finished = true;
+cmd->state = VG_CMD_STATE_FINISHED;
 }
 
 void
@@ -800,7 +800,7 @@ vg_process_cmd(VuGpu *vg, struct virtio_gpu_ctrl_command 
*cmd)
 cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
 break;
 }
-if (!cmd->finished) {
+if (cmd->state == VG_CMD_STATE_NEW) {
 vg_ctrl_response_nodata(vg, cmd, cmd->error ? cmd->error :
 VIRTIO_GPU_RESP_OK_NODATA);
 }
@@ -825,7 +825,7 @@ vg_handle_ctrl(VuDev *dev, int qidx)
 }
 cmd->vq = vq;
 cmd->error = 0;
-cmd->finished = false;
+cmd->state = VG_CMD_STATE_NEW;
 
 len = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num,
  0, &cmd->cmd_hdr, sizeof(cmd->cmd_hdr));
@@ -844,7 +844,7 @@ vg_handle_ctrl(VuDev *dev, int qidx)
 vg_process_cmd(vg, cmd);
 }
 
-if (!cmd->finished) {
+if (cmd->state != VG_CMD_STATE_FINISHED) {
 QTAILQ_INSERT_TAIL(&vg->fenceq, cmd, next);
 vg->inflight++;
 } else {
diff --git a/contrib/vhost-user-gpu/virgl.c b/contrib/vhost-user-gpu/virgl.c
index e647278052..8bb3c563d9 100644
--- a/contrib/vhost-user-gpu/virgl.c
+++ b/contrib/vhost-user-gpu/virgl.c
@@ -482,7 +482,7 @@ void vg_virgl_process_cmd(VuGpu *g, struct 
virtio_gpu_ctrl_command *cmd)
 break;
 }
 
-if (cmd->finished) {
+if (cmd->state != VG_CMD_STATE_NEW) {
 return;
 }
 
-- 
2.29.0




[PATCH v2 08/20] ui: annotate DCLOps callback requirements

2021-02-04 Thread marcandre . lureau
From: Marc-André Lureau 

Signed-off-by: Marc-André Lureau 
---
 include/ui/console.h | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/include/ui/console.h b/include/ui/console.h
index ce6c72e37c..bea2b6329a 100644
--- a/include/ui/console.h
+++ b/include/ui/console.h
@@ -174,35 +174,49 @@ typedef struct DisplayState DisplayState;
 typedef struct DisplayChangeListenerOps {
 const char *dpy_name;
 
+/* optional */
 void (*dpy_refresh)(DisplayChangeListener *dcl);
 
+/* optional */
 void (*dpy_gfx_update)(DisplayChangeListener *dcl,
int x, int y, int w, int h);
+/* optional */
 void (*dpy_gfx_switch)(DisplayChangeListener *dcl,
struct DisplaySurface *new_surface);
+/* optional */
 bool (*dpy_gfx_check_format)(DisplayChangeListener *dcl,
  pixman_format_code_t format);
 
+/* optional */
 void (*dpy_text_cursor)(DisplayChangeListener *dcl,
 int x, int y);
+/* optional */
 void (*dpy_text_resize)(DisplayChangeListener *dcl,
 int w, int h);
+/* optional */
 void (*dpy_text_update)(DisplayChangeListener *dcl,
 int x, int y, int w, int h);
 
+/* optional */
 void (*dpy_mouse_set)(DisplayChangeListener *dcl,
   int x, int y, int on);
+/* optional */
 void (*dpy_cursor_define)(DisplayChangeListener *dcl,
   QEMUCursor *cursor);
 
+/* required if GL */
 QEMUGLContext (*dpy_gl_ctx_create)(DisplayChangeListener *dcl,
QEMUGLParams *params);
+/* required if GL */
 void (*dpy_gl_ctx_destroy)(DisplayChangeListener *dcl,
QEMUGLContext ctx);
+/* required if GL */
 int (*dpy_gl_ctx_make_current)(DisplayChangeListener *dcl,
QEMUGLContext ctx);
 
+/* required if GL */
 void (*dpy_gl_scanout_disable)(DisplayChangeListener *dcl);
+/* required if GL */
 void (*dpy_gl_scanout_texture)(DisplayChangeListener *dcl,
uint32_t backing_id,
bool backing_y_0_top,
@@ -210,15 +224,20 @@ typedef struct DisplayChangeListenerOps {
uint32_t backing_height,
uint32_t x, uint32_t y,
uint32_t w, uint32_t h);
+/* optional */
 void (*dpy_gl_scanout_dmabuf)(DisplayChangeListener *dcl,
   QemuDmaBuf *dmabuf);
+/* optional */
 void (*dpy_gl_cursor_dmabuf)(DisplayChangeListener *dcl,
  QemuDmaBuf *dmabuf, bool have_hot,
  uint32_t hot_x, uint32_t hot_y);
+/* optional */
 void (*dpy_gl_cursor_position)(DisplayChangeListener *dcl,
uint32_t pos_x, uint32_t pos_y);
+/* optional */
 void (*dpy_gl_release_dmabuf)(DisplayChangeListener *dcl,
   QemuDmaBuf *dmabuf);
+/* required if GL */
 void (*dpy_gl_update)(DisplayChangeListener *dcl,
   uint32_t x, uint32_t y, uint32_t w, uint32_t h);
 
-- 
2.29.0




[PATCH v2 00/20] Various vhost-user-gpu & UI fixes

2021-02-04 Thread marcandre . lureau
From: Marc-André Lureau 

Hi,

Here is a collection of fixes and improvements mostly around vhost-user-gpu. In
combination with spice, they work best with the previously sent fixes ("[PATCH
0/2] Fix (non-vga) QXL async screendump" & "[PATCH v2] spice: delay starting
until display are initialized"). I also use virgl from git, which seems to work
better. You also need mesa with the recent fix !8663 from last week if you test
this.

The last patch is a simple RFC avocado test to have the basics under some
control and avoid simple regressions.

I have further refactoring work pending to separate the DisplayChangeListener
from the GL context. This should allow to run a GL-less qemu with Spice client &
vhost-user-gpu/virgl for example.

v2:
 - styles fixes
 - drop RFC from avocado test

Marc-André Lureau (20):
  vhost-user-gpu: check backend for EDID support
  vhost-user-gpu: handle vhost-user-gpu features in a callback
  vhost-user-gpu: use an extandable state enum for commands
  vhost-user-gpu: handle display-info in a callback
  ui: remove extra #ifdef CONFIG_OPENGL
  ui: remove gl_ctx_get_current
  ui: add gd_gl_area_scanout_disable
  ui: annotate DCLOps callback requirements
  ui: remove console_has_gl_dmabuf()
  vhost-user-gpu: add a configuration flag for dmabuf usage
  ui: add an optional get_flags callback to GraphicHwOps
  ui: add a DCLOps callback to check dmabuf support
  ui: check hw requirements during DCL registration
  ui: add qemu_egl_has_dmabuf helper
  ui: check gtk-egl dmabuf support
  ui: add egl dmabuf import to gtkglarea
  virtio-gpu: avoid re-entering cmdq processing
  display/ui: add a callback to indicate GL state is flushed
  chardev: check if the chardev is registered for yanking
  tests: add some virtio-gpu & vhost-user-gpu acceptance test

 contrib/vhost-user-gpu/vugpu.h  |  10 +-
 include/hw/virtio/virtio-gpu.h  |   6 +-
 include/ui/console.h|  35 +-
 include/ui/egl-context.h|   1 -
 include/ui/egl-helpers.h|   1 +
 include/ui/gtk.h|   4 +
 include/ui/sdl2.h   |   1 -
 chardev/char-socket.c   |  53 +---
 contrib/vhost-user-gpu/vhost-user-gpu.c |  78 +---
 contrib/vhost-user-gpu/virgl.c  |   2 +-
 hw/display/vhost-user-gpu.c |  17 +--
 hw/display/virtio-gpu-base.c|  31 -
 hw/display/virtio-gpu.c |   9 +-
 hw/display/virtio-vga.c |  20 +++
 hw/vfio/display.c   |   6 +
 ui/console.c|  64 +++---
 ui/egl-context.c|   5 -
 ui/egl-headless.c   |   1 -
 ui/egl-helpers.c|  10 ++
 ui/gtk-egl.c|   3 +
 ui/gtk-gl-area.c|  28 +
 ui/gtk.c|  43 +--
 ui/sdl2-gl.c|  10 +-
 ui/sdl2.c   |   1 -
 ui/spice-display.c  |   2 +-
 tests/acceptance/virtio-gpu.py  | 161 
 26 files changed, 501 insertions(+), 101 deletions(-)
 create mode 100644 tests/acceptance/virtio-gpu.py

-- 
2.29.0





[PATCH v2 11/20] ui: add an optional get_flags callback to GraphicHwOps

2021-02-04 Thread marcandre . lureau
From: Marc-André Lureau 

Those flags can be used to express different requirements for the
display or other needs.

Signed-off-by: Marc-André Lureau 
---
 include/ui/console.h |  9 +
 hw/display/virtio-gpu-base.c | 18 ++
 hw/display/virtio-vga.c  |  9 +
 hw/vfio/display.c|  6 ++
 4 files changed, 42 insertions(+)

diff --git a/include/ui/console.h b/include/ui/console.h
index ac989fdf70..0595aa9953 100644
--- a/include/ui/console.h
+++ b/include/ui/console.h
@@ -368,7 +368,16 @@ static inline void console_write_ch(console_ch_t *dest, 
uint32_t ch)
 *dest = ch;
 }
 
+enum {
+GRAPHIC_FLAGS_NONE = 0,
+/* require a console/display with GL callbacks */
+GRAPHIC_FLAGS_GL   = 1 << 0,
+/* require a console/display with DMABUF import */
+GRAPHIC_FLAGS_DMABUF   = 1 << 1,
+};
+
 typedef struct GraphicHwOps {
+int (*get_flags)(void *opaque); /* optional, default 0 */
 void (*invalidate)(void *opaque);
 void (*gfx_update)(void *opaque);
 bool gfx_update_async; /* if true, calls graphic_hw_update_done() */
diff --git a/hw/display/virtio-gpu-base.c b/hw/display/virtio-gpu-base.c
index 40ccd00f94..f27a6fbe75 100644
--- a/hw/display/virtio-gpu-base.c
+++ b/hw/display/virtio-gpu-base.c
@@ -114,7 +114,25 @@ virtio_gpu_gl_block(void *opaque, bool block)
 }
 }
 
+static int
+virtio_gpu_get_flags(void *opaque)
+{
+VirtIOGPUBase *g = opaque;
+int flags = GRAPHIC_FLAGS_NONE;
+
+if (virtio_gpu_virgl_enabled(g->conf)) {
+flags |= GRAPHIC_FLAGS_GL;
+}
+
+if (virtio_gpu_dmabuf_enabled(g->conf)) {
+flags |= GRAPHIC_FLAGS_DMABUF;
+}
+
+return flags;
+}
+
 static const GraphicHwOps virtio_gpu_ops = {
+.get_flags = virtio_gpu_get_flags,
 .invalidate = virtio_gpu_invalidate_display,
 .gfx_update = virtio_gpu_update_display,
 .text_update = virtio_gpu_text_update,
diff --git a/hw/display/virtio-vga.c b/hw/display/virtio-vga.c
index 81f776ee36..b071909b68 100644
--- a/hw/display/virtio-vga.c
+++ b/hw/display/virtio-vga.c
@@ -68,7 +68,16 @@ static void virtio_vga_base_gl_block(void *opaque, bool 
block)
 }
 }
 
+static int virtio_vga_base_get_flags(void *opaque)
+{
+VirtIOVGABase *vvga = opaque;
+VirtIOGPUBase *g = vvga->vgpu;
+
+return g->hw_ops->get_flags(g);
+}
+
 static const GraphicHwOps virtio_vga_base_ops = {
+.get_flags = virtio_vga_base_get_flags,
 .invalidate = virtio_vga_base_invalidate_display,
 .gfx_update = virtio_vga_base_update_display,
 .text_update = virtio_vga_base_text_update,
diff --git a/hw/vfio/display.c b/hw/vfio/display.c
index 42d67e870b..f04473e3ce 100644
--- a/hw/vfio/display.c
+++ b/hw/vfio/display.c
@@ -335,7 +335,13 @@ static void vfio_display_dmabuf_update(void *opaque)
 }
 }
 
+static int vfio_display_get_flags(void *opaque)
+{
+return GRAPHIC_FLAGS_GL | GRAPHIC_FLAGS_DMABUF;
+}
+
 static const GraphicHwOps vfio_display_dmabuf_ops = {
+.get_flags  = vfio_display_get_flags,
 .gfx_update = vfio_display_dmabuf_update,
 .ui_info= vfio_display_edid_ui_info,
 };
-- 
2.29.0




[PATCH v2 12/20] ui: add a DCLOps callback to check dmabuf support

2021-02-04 Thread marcandre . lureau
From: Marc-André Lureau 

Signed-off-by: Marc-André Lureau 
---
 include/ui/console.h |  2 ++
 ui/console.c | 13 +
 2 files changed, 15 insertions(+)

diff --git a/include/ui/console.h b/include/ui/console.h
index 0595aa9953..875885d9c7 100644
--- a/include/ui/console.h
+++ b/include/ui/console.h
@@ -224,6 +224,8 @@ typedef struct DisplayChangeListenerOps {
uint32_t backing_height,
uint32_t x, uint32_t y,
uint32_t w, uint32_t h);
+/* optional (default to true if has dpy_gl_scanout_dmabuf) */
+bool (*dpy_has_dmabuf)(DisplayChangeListener *dcl);
 /* optional */
 void (*dpy_gl_scanout_dmabuf)(DisplayChangeListener *dcl,
   QemuDmaBuf *dmabuf);
diff --git a/ui/console.c b/ui/console.c
index b5bc3f7699..a645418ada 100644
--- a/ui/console.c
+++ b/ui/console.c
@@ -1463,6 +1463,19 @@ bool console_has_gl(QemuConsole *con)
 return con->gl != NULL;
 }
 
+static bool displaychangelistener_has_dmabuf(DisplayChangeListener *dcl)
+{
+if (dcl->ops->dpy_has_dmabuf) {
+return dcl->ops->dpy_has_dmabuf(dcl);
+}
+
+if (dcl->ops->dpy_gl_scanout_dmabuf) {
+return true;
+}
+
+return false;
+}
+
 void register_displaychangelistener(DisplayChangeListener *dcl)
 {
 static const char nodev[] =
-- 
2.29.0




[PATCH v2 10/20] vhost-user-gpu: add a configuration flag for dmabuf usage

2021-02-04 Thread marcandre . lureau
From: Marc-André Lureau 

Let's inform VirtioGPUBase that vhost-user-gpu require DMABUF messages.

Signed-off-by: Marc-André Lureau 
---
 include/hw/virtio/virtio-gpu.h | 3 +++
 hw/display/vhost-user-gpu.c| 2 ++
 2 files changed, 5 insertions(+)

diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
index 1aed7275c8..4f3dbf79f9 100644
--- a/include/hw/virtio/virtio-gpu.h
+++ b/include/hw/virtio/virtio-gpu.h
@@ -71,6 +71,7 @@ enum virtio_gpu_base_conf_flags {
 VIRTIO_GPU_FLAG_VIRGL_ENABLED = 1,
 VIRTIO_GPU_FLAG_STATS_ENABLED,
 VIRTIO_GPU_FLAG_EDID_ENABLED,
+VIRTIO_GPU_FLAG_DMABUF_ENABLED,
 };
 
 #define virtio_gpu_virgl_enabled(_cfg) \
@@ -79,6 +80,8 @@ enum virtio_gpu_base_conf_flags {
 (_cfg.flags & (1 << VIRTIO_GPU_FLAG_STATS_ENABLED))
 #define virtio_gpu_edid_enabled(_cfg) \
 (_cfg.flags & (1 << VIRTIO_GPU_FLAG_EDID_ENABLED))
+#define virtio_gpu_dmabuf_enabled(_cfg) \
+(_cfg.flags & (1 << VIRTIO_GPU_FLAG_DMABUF_ENABLED))
 
 struct virtio_gpu_base_conf {
 uint32_t max_outputs;
diff --git a/hw/display/vhost-user-gpu.c b/hw/display/vhost-user-gpu.c
index dd587436ff..b7bde9feb6 100644
--- a/hw/display/vhost-user-gpu.c
+++ b/hw/display/vhost-user-gpu.c
@@ -547,6 +547,8 @@ vhost_user_gpu_device_realize(DeviceState *qdev, Error 
**errp)
 return;
 }
 
+/* existing backend may send DMABUF, so let's add that requirement */
+g->parent_obj.conf.flags |= 1 << VIRTIO_GPU_FLAG_DMABUF_ENABLED;
 if (virtio_has_feature(g->vhost->dev.features, VIRTIO_GPU_F_VIRGL)) {
 g->parent_obj.conf.flags |= 1 << VIRTIO_GPU_FLAG_VIRGL_ENABLED;
 }
-- 
2.29.0




[PATCH v2 05/20] ui: remove extra #ifdef CONFIG_OPENGL

2021-02-04 Thread marcandre . lureau
From: Marc-André Lureau 

Since commit 5cb69566daa8081abb82a13403dcc0fffed02007 ("gtk: remove
CONFIG_GTK_GL"), some #ifdef are redundants.

Signed-off-by: Marc-André Lureau 
---
 ui/gtk.c | 9 +
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/ui/gtk.c b/ui/gtk.c
index 26665cd2e6..e1ee0840b3 100644
--- a/ui/gtk.c
+++ b/ui/gtk.c
@@ -625,8 +625,6 @@ static const DisplayChangeListenerOps dcl_ops = {
 
 /** DisplayState Callbacks (opengl version) **/
 
-#if defined(CONFIG_OPENGL)
-
 static const DisplayChangeListenerOps dcl_gl_area_ops = {
 .dpy_name = "gtk-egl",
 .dpy_gfx_update   = gd_gl_area_update,
@@ -644,8 +642,6 @@ static const DisplayChangeListenerOps dcl_gl_area_ops = {
 .dpy_gl_update   = gd_gl_area_scanout_flush,
 };
 
-#endif /* CONFIG_OPENGL */
-
 static const DisplayChangeListenerOps dcl_egl_ops = {
 .dpy_name = "gtk-egl",
 .dpy_gfx_update   = gd_egl_update,
@@ -1993,13 +1989,10 @@ static GSList *gd_vc_gfx_init(GtkDisplayState *s, 
VirtualConsole *vc,
 
 #if defined(CONFIG_OPENGL)
 if (display_opengl) {
-#if defined(CONFIG_OPENGL)
 if (gtk_use_gl_area) {
 vc->gfx.drawing_area = gtk_gl_area_new();
 vc->gfx.dcl.ops = &dcl_gl_area_ops;
-} else
-#endif /* CONFIG_OPENGL */
-{
+} else {
 vc->gfx.drawing_area = gtk_drawing_area_new();
 /*
  * gtk_widget_set_double_buffered() was deprecated in 3.14.
-- 
2.29.0




[PATCH v2 13/20] ui: check hw requirements during DCL registration

2021-02-04 Thread marcandre . lureau
From: Marc-André Lureau 

Signed-off-by: Marc-André Lureau 
---
 ui/console.c | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/ui/console.c b/ui/console.c
index a645418ada..d8cc640c28 100644
--- a/ui/console.c
+++ b/ui/console.c
@@ -1476,12 +1476,37 @@ static bool 
displaychangelistener_has_dmabuf(DisplayChangeListener *dcl)
 return false;
 }
 
+static bool dpy_compatible_with(QemuConsole *con,
+DisplayChangeListener *dcl, Error **errp)
+{
+ERRP_GUARD();
+int flags;
+
+flags = con->hw_ops->get_flags ? con->hw_ops->get_flags(con->hw) : 0;
+
+if (flags & GRAPHIC_FLAGS_GL &&
+!console_has_gl(con)) {
+error_setg(errp, "The console requires a GL context.");
+return false;
+
+}
+
+if (flags & GRAPHIC_FLAGS_DMABUF &&
+!displaychangelistener_has_dmabuf(dcl)) {
+error_setg(errp, "The console requires display DMABUF support.");
+return false;
+}
+
+return true;
+}
+
 void register_displaychangelistener(DisplayChangeListener *dcl)
 {
 static const char nodev[] =
 "This VM has no graphic display device.";
 static DisplaySurface *dummy;
 QemuConsole *con;
+Error *err = NULL;
 
 assert(!dcl->ds);
 
@@ -1496,6 +1521,11 @@ void 
register_displaychangelistener(DisplayChangeListener *dcl)
 dcl->con->gl = dcl;
 }
 
+if (dcl->con && !dpy_compatible_with(dcl->con, dcl, &err)) {
+error_report_err(err);
+exit(1);
+}
+
 trace_displaychangelistener_register(dcl, dcl->ops->dpy_name);
 dcl->ds = get_alloc_displaystate();
 QLIST_INSERT_HEAD(&dcl->ds->listeners, dcl, next);
-- 
2.29.0




[PATCH v2 15/20] ui: check gtk-egl dmabuf support

2021-02-04 Thread marcandre . lureau
From: Marc-André Lureau 

Signed-off-by: Marc-André Lureau 
---
 include/ui/gtk.h | 1 +
 ui/gtk.c | 9 +
 2 files changed, 10 insertions(+)

diff --git a/include/ui/gtk.h b/include/ui/gtk.h
index 7569d090fa..aaef884b95 100644
--- a/include/ui/gtk.h
+++ b/include/ui/gtk.h
@@ -48,6 +48,7 @@ typedef struct VirtualGfxConsole {
 int cursor_y;
 bool y0_top;
 bool scanout_mode;
+bool has_dmabuf;
 #endif
 } VirtualGfxConsole;
 
diff --git a/ui/gtk.c b/ui/gtk.c
index 00045881b1..f41c396cb9 100644
--- a/ui/gtk.c
+++ b/ui/gtk.c
@@ -623,6 +623,13 @@ static const DisplayChangeListenerOps dcl_ops = {
 
 #if defined(CONFIG_OPENGL)
 
+static bool gd_has_dmabuf(DisplayChangeListener *dcl)
+{
+VirtualConsole *vc = container_of(dcl, VirtualConsole, gfx.dcl);
+
+return vc->gfx.has_dmabuf;
+}
+
 /** DisplayState Callbacks (opengl version) **/
 
 static const DisplayChangeListenerOps dcl_gl_area_ops = {
@@ -661,6 +668,7 @@ static const DisplayChangeListenerOps dcl_egl_ops = {
 .dpy_gl_cursor_position  = gd_egl_cursor_position,
 .dpy_gl_release_dmabuf   = gd_egl_release_dmabuf,
 .dpy_gl_update   = gd_egl_scanout_flush,
+.dpy_has_dmabuf  = gd_has_dmabuf,
 };
 
 #endif /* CONFIG_OPENGL */
@@ -2004,6 +2012,7 @@ static GSList *gd_vc_gfx_init(GtkDisplayState *s, 
VirtualConsole *vc,
 gtk_widget_set_double_buffered(vc->gfx.drawing_area, FALSE);
 #pragma GCC diagnostic pop
 vc->gfx.dcl.ops = &dcl_egl_ops;
+vc->gfx.has_dmabuf = qemu_egl_has_dmabuf();
 }
 } else
 #endif
-- 
2.29.0




[PATCH v2 14/20] ui: add qemu_egl_has_dmabuf helper

2021-02-04 Thread marcandre . lureau
From: Marc-André Lureau 

Signed-off-by: Marc-André Lureau 
---
 include/ui/egl-helpers.h |  1 +
 ui/egl-helpers.c | 10 ++
 2 files changed, 11 insertions(+)

diff --git a/include/ui/egl-helpers.h b/include/ui/egl-helpers.h
index 94a4b3e6f3..5b1f7fafe0 100644
--- a/include/ui/egl-helpers.h
+++ b/include/ui/egl-helpers.h
@@ -51,5 +51,6 @@ EGLSurface qemu_egl_init_surface_x11(EGLContext ectx, 
EGLNativeWindowType win);
 int qemu_egl_init_dpy_x11(EGLNativeDisplayType dpy, DisplayGLMode mode);
 int qemu_egl_init_dpy_mesa(EGLNativeDisplayType dpy, DisplayGLMode mode);
 EGLContext qemu_egl_init_ctx(void);
+bool qemu_egl_has_dmabuf(void);
 
 #endif /* EGL_HELPERS_H */
diff --git a/ui/egl-helpers.c b/ui/egl-helpers.c
index 7c530c2825..73fe61f878 100644
--- a/ui/egl-helpers.c
+++ b/ui/egl-helpers.c
@@ -441,6 +441,16 @@ int qemu_egl_init_dpy_mesa(EGLNativeDisplayType dpy, 
DisplayGLMode mode)
 #endif
 }
 
+bool qemu_egl_has_dmabuf(void)
+{
+if (qemu_egl_display == EGL_NO_DISPLAY) {
+return false;
+}
+
+return epoxy_has_egl_extension(qemu_egl_display,
+   "EGL_EXT_image_dma_buf_import");
+}
+
 EGLContext qemu_egl_init_ctx(void)
 {
 static const EGLint ctx_att_core[] = {
-- 
2.29.0




[PATCH v2 17/20] virtio-gpu: avoid re-entering cmdq processing

2021-02-04 Thread marcandre . lureau
From: Marc-André Lureau 

The next patch will notify the GL context got flush, which will resume
the queue processing. However, if this happens within the caller
context, it will end up with a stack overflow flush/update loop.

Signed-off-by: Marc-André Lureau 
---
 include/hw/virtio/virtio-gpu.h | 1 +
 hw/display/virtio-gpu.c| 5 +
 2 files changed, 6 insertions(+)

diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
index 4f3dbf79f9..0043268e90 100644
--- a/include/hw/virtio/virtio-gpu.h
+++ b/include/hw/virtio/virtio-gpu.h
@@ -148,6 +148,7 @@ struct VirtIOGPU {
 
 uint64_t hostmem;
 
+bool processing_cmdq;
 bool renderer_inited;
 bool renderer_reset;
 QEMUTimer *fence_poll;
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index 0e833a462b..7eb4265a6d 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -814,6 +814,10 @@ void virtio_gpu_process_cmdq(VirtIOGPU *g)
 {
 struct virtio_gpu_ctrl_command *cmd;
 
+if (g->processing_cmdq) {
+return;
+}
+g->processing_cmdq = true;
 while (!QTAILQ_EMPTY(&g->cmdq)) {
 cmd = QTAILQ_FIRST(&g->cmdq);
 
@@ -843,6 +847,7 @@ void virtio_gpu_process_cmdq(VirtIOGPU *g)
 g_free(cmd);
 }
 }
+g->processing_cmdq = false;
 }
 
 static void virtio_gpu_gl_unblock(VirtIOGPUBase *b)
-- 
2.29.0




  1   2   3   4   5   6   >