[PATCH v4 5/6] tests: acpi: aarch64: Add topology test for aarch64

2022-11-01 Thread Yicong Yang via
From: Yicong Yang 

Add test for aarch64's ACPI topology building for all the supported
levels.

Acked-by: Michael S. Tsirkin 
Signed-off-by: Yicong Yang 
---
 tests/qtest/bios-tables-test.c | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c
index e6096e7f73..37befe570b 100644
--- a/tests/qtest/bios-tables-test.c
+++ b/tests/qtest/bios-tables-test.c
@@ -1533,6 +1533,24 @@ static void test_acpi_virt_tcg(void)
 free_test_data(&data);
 }
 
+static void test_acpi_virt_tcg_topology(void)
+{
+test_data data = {
+.machine = "virt",
+.variant = ".topology",
+.tcg_only = true,
+.uefi_fl1 = "pc-bios/edk2-aarch64-code.fd",
+.uefi_fl2 = "pc-bios/edk2-arm-vars.fd",
+.cd = "tests/data/uefi-boot-images/bios-tables-test.aarch64.iso.qcow2",
+.ram_start = 0x4000ULL,
+.scan_len = 128ULL * 1024 * 1024,
+};
+
+test_acpi_one("-cpu cortex-a57 "
+  "-smp sockets=1,clusters=2,cores=2,threads=2", &data);
+free_test_data(&data);
+}
+
 static void test_acpi_q35_viot(void)
 {
 test_data data = {
@@ -1864,6 +1882,7 @@ int main(int argc, char *argv[])
 } else if (strcmp(arch, "aarch64") == 0) {
 if (has_tcg) {
 qtest_add_func("acpi/virt", test_acpi_virt_tcg);
+qtest_add_func("acpi/virt/topology", test_acpi_virt_tcg_topology);
 qtest_add_func("acpi/virt/numamem", test_acpi_virt_tcg_numamem);
 qtest_add_func("acpi/virt/memhp", test_acpi_virt_tcg_memhp);
 qtest_add_func("acpi/virt/pxb", test_acpi_virt_tcg_pxb);
-- 
2.24.0




[PATCH v4 6/6] tests: acpi: aarch64: Add *.topology tables

2022-11-01 Thread Yicong Yang via
From: Yicong Yang 

Add *.topology tables for the aarch64's topology test and empty
bios-tables-test-allowed-diff.h

The disassembled differences between actual and expected
PPTT (the table which we actually care about):

 +/*
 + * Intel ACPI Component Architecture
 + * AML/ASL+ Disassembler version 20180105 (64-bit version)
 + * Copyright (c) 2000 - 2018 Intel Corporation
 + *
 + * Disassembly of /tmp/aml-WUN4U1, Tue Nov  1 09:51:52 2022
 + *
 + * ACPI Data Table [PPTT]
 + *
 + * Format: [HexOffset DecimalOffset ByteLength]  FieldName : FieldValue
 + */
 +
 +[000h    4]Signature : "PPTT"[Processor 
Properties Topology Table]
 +[004h 0004   4] Table Length : 0150
 +[008h 0008   1] Revision : 02
 +[009h 0009   1] Checksum : 7C
 +[00Ah 0010   6]   Oem ID : "BOCHS "
 +[010h 0016   8] Oem Table ID : "BXPC"
 +[018h 0024   4] Oem Revision : 0001
 +[01Ch 0028   4]  Asl Compiler ID : "BXPC"
 +[020h 0032   4]Asl Compiler Revision : 0001
 +
 +
 +[024h 0036   1]Subtable Type : 00 [Processor Hierarchy Node]
 +[025h 0037   1]   Length : 14
 +[026h 0038   2] Reserved : 
 +[028h 0040   4]Flags (decoded below) : 0001
 +Physical package : 1
 + ACPI Processor ID valid : 0
 +[02Ch 0044   4]   Parent : 
 +[030h 0048   4]ACPI Processor ID : 
 +[034h 0052   4]  Private Resource Number : 
 +
 +[038h 0056   1]Subtable Type : 00 [Processor Hierarchy Node]
 +[039h 0057   1]   Length : 14
 +[03Ah 0058   2] Reserved : 
 +[03Ch 0060   4]Flags (decoded below) : 
 +Physical package : 0
 + ACPI Processor ID valid : 0
 +[040h 0064   4]   Parent : 0024
 +[044h 0068   4]ACPI Processor ID : 
 +[048h 0072   4]  Private Resource Number : 
 +
 +[04Ch 0076   1]Subtable Type : 00 [Processor Hierarchy Node]
 +[04Dh 0077   1]   Length : 14
 +[04Eh 0078   2] Reserved : 
 +[050h 0080   4]Flags (decoded below) : 
 +Physical package : 0
 + ACPI Processor ID valid : 0
 +[054h 0084   4]   Parent : 0038
 +[058h 0088   4]ACPI Processor ID : 
 +[05Ch 0092   4]  Private Resource Number : 
 +
 +[060h 0096   1]Subtable Type : 00 [Processor Hierarchy Node]
 +[061h 0097   1]   Length : 14
 +[062h 0098   2] Reserved : 
 +[064h 0100   4]Flags (decoded below) : 000E
 +Physical package : 0
 + ACPI Processor ID valid : 1
 +[068h 0104   4]   Parent : 004C
 +[06Ch 0108   4]ACPI Processor ID : 
 +[070h 0112   4]  Private Resource Number : 
 +
 +[074h 0116   1]Subtable Type : 00 [Processor Hierarchy Node]
 +[075h 0117   1]   Length : 14
 +[076h 0118   2] Reserved : 
 +[078h 0120   4]Flags (decoded below) : 000E
 +Physical package : 0
 + ACPI Processor ID valid : 1
 +[07Ch 0124   4]   Parent : 004C
 +[080h 0128   4]ACPI Processor ID : 0001
 +[084h 0132   4]  Private Resource Number : 
 +
 +[088h 0136   1]Subtable Type : 00 [Processor Hierarchy Node]
 +[089h 0137   1]   Length : 14
 +[08Ah 0138   2] Reserved : 
 +[08Ch 0140   4]Flags (decoded below) : 
 +Physical package : 0
 + ACPI Processor ID valid : 0
 +[090h 0144   4]   Parent : 0038
 +[094h 0148   4]ACPI Processor ID : 0001
 +[098h 0152   4]  Private Resource Number : 
 +
 +[09Ch 0156   1]Subtable Type : 00 [Processor Hierarchy Node]
 +[09Dh 0157   1]   Length : 14
 +[09Eh 0158   2] Reserved : 
 +[0A0h 0160   4]Flags (decoded below) : 000E
 +Physical package : 0
 + ACPI Processor ID valid : 1
 +[0A4h 0164   4]   Parent : 0088
 +[0A8h 0168   4]ACPI Processor ID : 0002
 +[0ACh 0172   4]  Private Resource Number : 
 +
 +[0B0h 0176   1]Subtable Type : 00 [Processor Hierarchy Node]
 +[0B1h 0177   1]   Length : 14
 +[0B2h 0178   2] Reserved : 
 +[0B4h 0180   4]Flags (decoded 

[PATCH v4 1/6] tests: virt: Allow changes to PPTT test table

2022-11-01 Thread Yicong Yang via
From: Yicong Yang 

Allow changes to test/data/acpi/virt/PPTT, prepare to change the
building policy of the cluster topology.

Signed-off-by: Yicong Yang 
---
 tests/qtest/bios-tables-test-allowed-diff.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/qtest/bios-tables-test-allowed-diff.h 
b/tests/qtest/bios-tables-test-allowed-diff.h
index dfb8523c8b..cb143a55a6 100644
--- a/tests/qtest/bios-tables-test-allowed-diff.h
+++ b/tests/qtest/bios-tables-test-allowed-diff.h
@@ -1 +1,2 @@
 /* List of comma-separated changed AML files to ignore */
+"tests/data/acpi/virt/PPTT",
-- 
2.24.0




[PATCH v4 3/6] tests: virt: Update expected ACPI tables for virt test

2022-11-01 Thread Yicong Yang via
From: Yicong Yang 

Update the ACPI tables according to the acpi aml_build change, also
empty bios-tables-test-allowed-diff.h.

The disassembled differences between actual and expected PPTT:

  /*
   * Intel ACPI Component Architecture
   * AML/ASL+ Disassembler version 20180105 (64-bit version)
   * Copyright (c) 2000 - 2018 Intel Corporation
   *
 - * Disassembly of tests/data/acpi/virt/PPTT, Tue Nov  1 09:29:12 2022
 + * Disassembly of /tmp/aml-DIIGV1, Tue Nov  1 09:29:12 2022
   *
   * ACPI Data Table [PPTT]
   *
   * Format: [HexOffset DecimalOffset ByteLength]  FieldName : FieldValue
   */

  [000h    4]Signature : "PPTT"[Processor 
Properties Topology Table]
 -[004h 0004   4] Table Length : 0060
 +[004h 0004   4] Table Length : 004C
  [008h 0008   1] Revision : 02
 -[009h 0009   1] Checksum : 48
 +[009h 0009   1] Checksum : A8
  [00Ah 0010   6]   Oem ID : "BOCHS "
  [010h 0016   8] Oem Table ID : "BXPC"
  [018h 0024   4] Oem Revision : 0001
  [01Ch 0028   4]  Asl Compiler ID : "BXPC"
  [020h 0032   4]Asl Compiler Revision : 0001

  [024h 0036   1]Subtable Type : 00 [Processor Hierarchy Node]
  [025h 0037   1]   Length : 14
  [026h 0038   2] Reserved : 
  [028h 0040   4]Flags (decoded below) : 0001
  Physical package : 1
   ACPI Processor ID valid : 0
  [02Ch 0044   4]   Parent : 
  [030h 0048   4]ACPI Processor ID : 
  [034h 0052   4]  Private Resource Number : 

  [038h 0056   1]Subtable Type : 00 [Processor Hierarchy Node]
  [039h 0057   1]   Length : 14
  [03Ah 0058   2] Reserved : 
 -[03Ch 0060   4]Flags (decoded below) : 
 +[03Ch 0060   4]Flags (decoded below) : 000A
  Physical package : 0
 - ACPI Processor ID valid : 0
 + ACPI Processor ID valid : 1
  [040h 0064   4]   Parent : 0024
  [044h 0068   4]ACPI Processor ID : 
  [048h 0072   4]  Private Resource Number : 

 -[04Ch 0076   1]Subtable Type : 00 [Processor Hierarchy Node]
 -[04Dh 0077   1]   Length : 14
 -[04Eh 0078   2] Reserved : 
 -[050h 0080   4]Flags (decoded below) : 000A
 -Physical package : 0
 - ACPI Processor ID valid : 1
 -[054h 0084   4]   Parent : 0038
 -[058h 0088   4]ACPI Processor ID : 
 -[05Ch 0092   4]  Private Resource Number : 
 -
 -Raw Table Data: Length 96 (0x60)
 +Raw Table Data: Length 76 (0x4C)

 -  : 50 50 54 54 60 00 00 00 02 48 42 4F 43 48 53 20  // PPTT`HBOCHS
 +  : 50 50 54 54 4C 00 00 00 02 A8 42 4F 43 48 53 20  // PPTTL.BOCHS
0010: 42 58 50 43 20 20 20 20 01 00 00 00 42 58 50 43  // BXPCBXPC
0020: 01 00 00 00 00 14 00 00 01 00 00 00 00 00 00 00  // 
 -  0030: 00 00 00 00 00 00 00 00 00 14 00 00 00 00 00 00  // 
 -  0040: 24 00 00 00 00 00 00 00 00 00 00 00 00 14 00 00  // $...
 -  0050: 0A 00 00 00 38 00 00 00 00 00 00 00 00 00 00 00  // 8...
 +  0030: 00 00 00 00 00 00 00 00 00 14 00 00 0A 00 00 00  // 
 +  0040: 24 00 00 00 00 00 00 00 00 00 00 00  // $...

Acked-by: Michael S. Tsirkin 
Signed-off-by: Yicong Yang 
---
 tests/data/acpi/virt/PPTT   | Bin 96 -> 76 bytes
 tests/qtest/bios-tables-test-allowed-diff.h |   1 -
 2 files changed, 1 deletion(-)

diff --git a/tests/data/acpi/virt/PPTT b/tests/data/acpi/virt/PPTT
index 
f56ea63b369a604877374ad696c396e796ab1c83..7a1258ecf123555b24462c98ccbb76b4ac1d0c2b
 100644
GIT binary patch
delta 32
fcmYfB;R*-{3GrcIU|?D?k;`ae01J-_kOKn%ZFdCM

delta 53
pcmeZC;0g!`2}xjJU|{l?$YrDgWH5jU5Ca567#O&Klm(arApowi1QY-O

diff --git a/tests/qtest/bios-tables-test-allowed-diff.h 
b/tests/qtest/bios-tables-test-allowed-diff.h
index cb143a55a6..dfb8523c8b 100644
--- a/tests/qtest/bios-tables-test-allowed-diff.h
+++ b/tests/qtest/bios-tables-test-allowed-diff.h
@@ -1,2 +1 @@
 /* List of comma-separated changed AML files to ignore */
-"tests/data/acpi/virt/PPTT",
-- 
2.24.0




[PATCH v4 4/6] tests: acpi: Add and whitelist *.topology blobs

2022-11-01 Thread Yicong Yang via
From: Yicong Yang 

Add and whitelist *.topology blobs, prepares for the aarch64's ACPI
topology building test.

Signed-off-by: Yicong Yang 
---
 tests/data/acpi/virt/APIC.topology  | 0
 tests/data/acpi/virt/DSDT.topology  | 0
 tests/data/acpi/virt/PPTT.topology  | 0
 tests/qtest/bios-tables-test-allowed-diff.h | 3 +++
 4 files changed, 3 insertions(+)
 create mode 100644 tests/data/acpi/virt/APIC.topology
 create mode 100644 tests/data/acpi/virt/DSDT.topology
 create mode 100644 tests/data/acpi/virt/PPTT.topology

diff --git a/tests/data/acpi/virt/APIC.topology 
b/tests/data/acpi/virt/APIC.topology
new file mode 100644
index 00..e69de29bb2
diff --git a/tests/data/acpi/virt/DSDT.topology 
b/tests/data/acpi/virt/DSDT.topology
new file mode 100644
index 00..e69de29bb2
diff --git a/tests/data/acpi/virt/PPTT.topology 
b/tests/data/acpi/virt/PPTT.topology
new file mode 100644
index 00..e69de29bb2
diff --git a/tests/qtest/bios-tables-test-allowed-diff.h 
b/tests/qtest/bios-tables-test-allowed-diff.h
index dfb8523c8b..90f53f9c1d 100644
--- a/tests/qtest/bios-tables-test-allowed-diff.h
+++ b/tests/qtest/bios-tables-test-allowed-diff.h
@@ -1 +1,4 @@
 /* List of comma-separated changed AML files to ignore */
+"tests/data/acpi/virt/APIC.topology",
+"tests/data/acpi/virt/DSDT.topology",
+"tests/data/acpi/virt/PPTT.topology",
-- 
2.24.0




[PATCH v4 0/6] Only generate cluster node in PPTT when specified

2022-11-01 Thread Yicong Yang via
From: Yicong Yang 

This series mainly change the policy for building a cluster topology node
in PPTT. Previously we'll always build a cluster node in PPTT without
asking the user, after this set the cluster node will be built only the
the user specify through "-smp clusters=X".

One problem is related to this but not fully caused by this, see the
discussion in [*]. When booting the VM with `-smp 8` and 4 numa nodes,
the linux scheduling domains in the VM misses the NUMA domains. It's
because the MC level span extends to Cluster level (which is generated
by the Qemu by default) that spans all the cpus in the system, then the
scheduling domain building stops at MC level since it already includes all
the cpus.

Considering cluster is an optional level and most platforms don't have it,
they may even don't realize this is built and a always build policy cannot
emulate the real topology on these platforms. So in this series improve the
policy to only generate cluster when the user explicitly want it.

Update the tests and test tables accordingly.

[*] 
https://lore.kernel.org/lkml/2c079860-ee82-7719-d3d2-756192f41...@huawei.com/

Change since v3:
- Improve and attach the diff of the affected ACPI tables in the commit, and 
minor cleanups
Link: 
https://lore.kernel.org/qemu-devel/20221031090523.34146-1-yangyic...@huawei.com/

Change since v2:
- Add tag from Micheal, thanks
- Handle the tests changes with bios-tables-test-allowed-diff.h, Per Micheal
- Address the comments per Yanan
Link: 
https://lore.kernel.org/qemu-devel/20221027032613.18377-1-yangyic...@huawei.com/

Change since v1:
- Only includes the test tables which is really needed
- Enrich the commit
Link: 
https://lore.kernel.org/qemu-devel/20220922131143.58003-1-yangyic...@huawei.com/

Yicong Yang (6):
  tests: virt: Allow changes to PPTT test table
  hw/acpi/aml-build: Only generate cluster node in PPTT when specified
  tests: virt: Update expected ACPI tables for virt test
  tests: acpi: Add and whitelist *.topology blobs
  tests: acpi: aarch64: Add topology test for aarch64
  tests: acpi: aarch64: Add *.topology tables

 hw/acpi/aml-build.c|   2 +-
 hw/core/machine-smp.c  |   2 ++
 include/hw/boards.h|   3 +++
 qemu-options.hx|   3 +++
 tests/data/acpi/virt/APIC.topology | Bin 0 -> 700 bytes
 tests/data/acpi/virt/DSDT.topology | Bin 0 -> 5398 bytes
 tests/data/acpi/virt/PPTT  | Bin 96 -> 76 bytes
 tests/data/acpi/virt/PPTT.topology | Bin 0 -> 336 bytes
 tests/qtest/bios-tables-test.c |  19 +++
 9 files changed, 28 insertions(+), 1 deletion(-)
 create mode 100644 tests/data/acpi/virt/APIC.topology
 create mode 100644 tests/data/acpi/virt/DSDT.topology
 create mode 100644 tests/data/acpi/virt/PPTT.topology

-- 
2.24.0




[PATCH v4 2/6] hw/acpi/aml-build: Only generate cluster node in PPTT when specified

2022-11-01 Thread Yicong Yang via
From: Yicong Yang 

Currently we'll always generate a cluster node no matter user has
specified '-smp clusters=X' or not. Cluster is an optional level
and will participant the building of Linux scheduling domains and
only appears on a few platforms. It's unnecessary to always build
it when it cannot reflect the real topology on platforms having no
cluster implementation. So only generate the cluster topology in
ACPI PPTT when the user has specified it explicitly in -smp.

Tested qemu-system-aarch64 with `-smp 8` and linux 6.1-rc1, without
this patch:
estuary:/sys/devices/system/cpu/cpu0/topology$ cat cluster_*
ff  # cluster_cpus
0-7 # cluster_cpus_list
56  # cluster_id

with this patch:
estuary:/sys/devices/system/cpu/cpu0/topology$ cat cluster_*
ff  # cluster_cpus
0-7 # cluster_cpus_list
36  # cluster_id, with no cluster node kernel will make it to
  physical package id

Acked-by: Michael S. Tsirkin 
Signed-off-by: Yicong Yang 
---
 hw/acpi/aml-build.c   | 2 +-
 hw/core/machine-smp.c | 2 ++
 include/hw/boards.h   | 3 +++
 qemu-options.hx   | 3 +++
 4 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index e6bfac95c7..60c1acf3da 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -2030,7 +2030,7 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, 
MachineState *ms,
 0, socket_id, NULL, 0);
 }
 
-if (mc->smp_props.clusters_supported) {
+if (mc->smp_props.clusters_supported && mc->smp_props.has_clusters) {
 if (cpus->cpus[n].props.cluster_id != cluster_id) {
 assert(cpus->cpus[n].props.cluster_id > cluster_id);
 cluster_id = cpus->cpus[n].props.cluster_id;
diff --git a/hw/core/machine-smp.c b/hw/core/machine-smp.c
index b39ed21e65..c3dab007da 100644
--- a/hw/core/machine-smp.c
+++ b/hw/core/machine-smp.c
@@ -158,6 +158,8 @@ void machine_parse_smp_config(MachineState *ms,
 ms->smp.threads = threads;
 ms->smp.max_cpus = maxcpus;
 
+mc->smp_props.has_clusters = config->has_clusters;
+
 /* sanity-check of the computed topology */
 if (sockets * dies * clusters * cores * threads != maxcpus) {
 g_autofree char *topo_msg = cpu_hierarchy_to_string(ms);
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 311ed17e18..06ed66453f 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -130,11 +130,14 @@ typedef struct {
  * @prefer_sockets - whether sockets are preferred over cores in smp parsing
  * @dies_supported - whether dies are supported by the machine
  * @clusters_supported - whether clusters are supported by the machine
+ * @has_clusters - whether clusters are explicitly specified in the user
+ * provided SMP configuration
  */
 typedef struct {
 bool prefer_sockets;
 bool dies_supported;
 bool clusters_supported;
+bool has_clusters;
 } SMPCompatProps;
 
 /**
diff --git a/qemu-options.hx b/qemu-options.hx
index eb38e5dc40..bbdbdef0af 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -349,6 +349,9 @@ SRST
 ::
 
 -smp 2
+
+Note: The cluster topology will only be generated in ACPI and exposed
+to guest if it's explicitly specified in -smp.
 ERST
 
 DEF("numa", HAS_ARG, QEMU_OPTION_numa,
-- 
2.24.0




[PATCH 1/2] target/loongarch: Add exception subcode

2022-11-01 Thread Song Gao
We need subcodes to distinguish the same excode cs->exception_indexs,
such as EXCCODE_ADEF/EXCCODE_ADEM.

Signed-off-by: Song Gao 
---
 target/loongarch/cpu.c |  7 +++--
 target/loongarch/cpu.h | 58 ++
 2 files changed, 36 insertions(+), 29 deletions(-)

diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
index 49393d95d8..b28aaed5ba 100644
--- a/target/loongarch/cpu.c
+++ b/target/loongarch/cpu.c
@@ -220,7 +220,10 @@ static void loongarch_cpu_do_interrupt(CPUState *cs)
 env->CSR_TLBRERA = FIELD_DP64(env->CSR_TLBRERA, CSR_TLBRERA,
   PC, (env->pc >> 2));
 } else {
-env->CSR_ESTAT = FIELD_DP64(env->CSR_ESTAT, CSR_ESTAT, ECODE, cause);
+env->CSR_ESTAT = FIELD_DP64(env->CSR_ESTAT, CSR_ESTAT, ECODE,
+EXCODE_MCODE(cause));
+env->CSR_ESTAT = FIELD_DP64(env->CSR_ESTAT, CSR_ESTAT, ESUBCODE,
+EXCODE_SUBCODE(cause));
 env->CSR_PRMD = FIELD_DP64(env->CSR_PRMD, CSR_PRMD, PPLV,
FIELD_EX64(env->CSR_CRMD, CSR_CRMD, PLV));
 env->CSR_PRMD = FIELD_DP64(env->CSR_PRMD, CSR_PRMD, PIE,
@@ -257,7 +260,7 @@ static void loongarch_cpu_do_interrupt(CPUState *cs)
 env->pc = env->CSR_TLBRENTRY;
 } else {
 env->pc = env->CSR_EENTRY;
-env->pc += cause * vec_size;
+env->pc += EXCODE_MCODE(cause) * vec_size;
 }
 qemu_log_mask(CPU_LOG_INT,
   "%s: PC " TARGET_FMT_lx " ERA " TARGET_FMT_lx
diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h
index dce999aaac..dbce176564 100644
--- a/target/loongarch/cpu.h
+++ b/target/loongarch/cpu.h
@@ -75,33 +75,37 @@ FIELD(FCSR0, CAUSE, 24, 5)
 #define FP_DIV0   8
 #define FP_INVALID16
 
-#define  EXCCODE_EXTERNAL_INT   64   /* plus external interrupt number */
-#define  EXCCODE_INT 0
-#define  EXCCODE_PIL 1
-#define  EXCCODE_PIS 2
-#define  EXCCODE_PIF 3
-#define  EXCCODE_PME 4
-#define  EXCCODE_PNR 5
-#define  EXCCODE_PNX 6
-#define  EXCCODE_PPI 7
-#define  EXCCODE_ADEF8 /* Different exception subcode */
-#define  EXCCODE_ADEM8
-#define  EXCCODE_ALE 9
-#define  EXCCODE_BCE 10
-#define  EXCCODE_SYS 11
-#define  EXCCODE_BRK 12
-#define  EXCCODE_INE 13
-#define  EXCCODE_IPE 14
-#define  EXCCODE_FPD 15
-#define  EXCCODE_SXD 16
-#define  EXCCODE_ASXD17
-#define  EXCCODE_FPE 18 /* Different exception subcode */
-#define  EXCCODE_VFPE18
-#define  EXCCODE_WPEF19 /* Different exception subcode */
-#define  EXCCODE_WPEM19
-#define  EXCCODE_BTD 20
-#define  EXCCODE_BTE 21
-#define  EXCCODE_DBP 26 /* Reserved subcode used for debug */
+#define EXCODE(code, subcode) ( ((subcode) << 6) | (code) )
+#define EXCODE_MCODE(code)( (code) & 0x3f )
+#define EXCODE_SUBCODE(code)  ( (code) >> 6 )
+
+#define  EXCCODE_EXTERNAL_INT64   /* plus external interrupt number */
+#define  EXCCODE_INT EXCODE(0, 0)
+#define  EXCCODE_PIL EXCODE(1, 0)
+#define  EXCCODE_PIS EXCODE(2, 0)
+#define  EXCCODE_PIF EXCODE(3, 0)
+#define  EXCCODE_PME EXCODE(4, 0)
+#define  EXCCODE_PNR EXCODE(5, 0)
+#define  EXCCODE_PNX EXCODE(6, 0)
+#define  EXCCODE_PPI EXCODE(7, 0)
+#define  EXCCODE_ADEFEXCODE(8, 0) /* Different exception 
subcode */
+#define  EXCCODE_ADEMEXCODE(8, 1)
+#define  EXCCODE_ALE EXCODE(9, 0)
+#define  EXCCODE_BCE EXCODE(10, 0)
+#define  EXCCODE_SYS EXCODE(11, 0)
+#define  EXCCODE_BRK EXCODE(12, 0)
+#define  EXCCODE_INE EXCODE(13, 0)
+#define  EXCCODE_IPE EXCODE(14, 0)
+#define  EXCCODE_FPD EXCODE(15, 0)
+#define  EXCCODE_SXD EXCODE(16, 0)
+#define  EXCCODE_ASXDEXCODE(17, 0)
+#define  EXCCODE_FPE EXCODE(18, 0) /* Different exception 
subcode */
+#define  EXCCODE_VFPEEXCODE(18, 1)
+#define  EXCCODE_WPEFEXCODE(19, 0) /* Different exception 
subcode */
+#define  EXCCODE_WPEMEXCODE(19, 1)
+#define  EXCCODE_BTD EXCODE(20, 0)
+#define  EXCCODE_BTE EXCODE(21, 0)
+#define  EXCCODE_DBP EXCODE(26, 0) /* Reserved subcode used 
for debug */
 
 /* cpucfg[0] bits */
 FIELD(CPUCFG0, PRID, 0, 32)
-- 
2.31.1




[PATCH 2/2] target/loongarch: Fix raise_mmu_exception() set wrong exception_index

2022-11-01 Thread Song Gao
When the address is invalid address, We should set exception_index
according to MMUAccessType, and EXCCODE_ADEF need't update badinstr.
Otherwise, The system enters an infinite loop. e.g:
run test.c on system mode
test.c:
#include

void (*func)(int *);

int main()
{
int i = 8;
void *ptr = (void *)0x4000;
func = ptr;
func(&i);
return 0;
}

Signed-off-by: Song Gao 
---
 target/loongarch/cpu.c| 1 +
 target/loongarch/tlb_helper.c | 5 +++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
index b28aaed5ba..1512664214 100644
--- a/target/loongarch/cpu.c
+++ b/target/loongarch/cpu.c
@@ -177,6 +177,7 @@ static void loongarch_cpu_do_interrupt(CPUState *cs)
 }
 QEMU_FALLTHROUGH;
 case EXCCODE_PIF:
+case EXCCODE_ADEF:
 cause = cs->exception_index;
 update_badinstr = 0;
 break;
diff --git a/target/loongarch/tlb_helper.c b/target/loongarch/tlb_helper.c
index 610b6d123c..d2f8fb0c60 100644
--- a/target/loongarch/tlb_helper.c
+++ b/target/loongarch/tlb_helper.c
@@ -229,7 +229,8 @@ static void raise_mmu_exception(CPULoongArchState *env, 
target_ulong address,
 switch (tlb_error) {
 default:
 case TLBRET_BADADDR:
-cs->exception_index = EXCCODE_ADEM;
+cs->exception_index = access_type == MMU_INST_FETCH
+  ? EXCCODE_ADEF : EXCCODE_ADEM;
 break;
 case TLBRET_NOMATCH:
 /* No TLB match for a mapped address */
@@ -643,7 +644,7 @@ bool loongarch_cpu_tlb_fill(CPUState *cs, vaddr address, 
int size,
 CPULoongArchState *env = &cpu->env;
 hwaddr physical;
 int prot;
-int ret = TLBRET_BADADDR;
+int ret;
 
 /* Data access */
 ret = get_physical_address(env, &physical, &prot, address,
-- 
2.31.1




Re: [PATCH 3/3] vdpa: Expose VIRTIO_NET_F_STATUS unconditionally

2022-11-01 Thread Jason Wang
On Fri, Oct 28, 2022 at 5:30 PM Eugenio Perez Martin
 wrote:
>
> On Fri, Oct 28, 2022 at 3:59 AM Jason Wang  wrote:
> >
> > On Thu, Oct 27, 2022 at 6:18 PM Eugenio Perez Martin
> >  wrote:
> > >
> > > On Thu, Oct 27, 2022 at 8:54 AM Jason Wang  wrote:
> > > >
> > > > On Thu, Oct 27, 2022 at 2:47 PM Eugenio Perez Martin
> > > >  wrote:
> > > > >
> > > > > On Thu, Oct 27, 2022 at 6:32 AM Jason Wang  
> > > > > wrote:
> > > > > >
> > > > > >
> > > > > > 在 2022/10/26 17:53, Eugenio Pérez 写道:
> > > > > > > Now that qemu can handle and emulate it if the vdpa backend does 
> > > > > > > not
> > > > > > > support it we can offer it always.
> > > > > > >
> > > > > > > Signed-off-by: Eugenio Pérez 
> > > > > >
> > > > > >
> > > > > > I may miss something but isn't more easier to simply remove the
> > > > > > _F_STATUS from vdpa_feature_bits[]?
> > > > > >
> > > > >
> > > > > How is that? if we remove it, the guest cannot ack it so it cannot
> > > > > access the net status, isn't it?
> > > >
> > > > My understanding is that the bits stored in the vdpa_feature_bits[]
> > > > are the features that must be explicitly supported by the vhost
> > > > device.
> > >
> > > (Non English native here, so maybe I don't get what you mean :) ) The
> > > device may not support them. net simulator lacks some of them
> > > actually, and it works.
> >
> > Speaking too fast, I think I meant that, if the bit doesn't belong to
> > vdpa_feature_bits[], it is assumed to be supported by the Qemu without
> > the support of the vhost. So Qemu won't even try to validate if vhost
> > has this support. E.g for vhost-net, we only have:
> >
> > static const int kernel_feature_bits[] = {
> > VIRTIO_F_NOTIFY_ON_EMPTY,
> > VIRTIO_RING_F_INDIRECT_DESC,
> > VIRTIO_RING_F_EVENT_IDX,
> > VIRTIO_NET_F_MRG_RXBUF,
> > VIRTIO_F_VERSION_1,
> > VIRTIO_NET_F_MTU,
> > VIRTIO_F_IOMMU_PLATFORM,
> > VIRTIO_F_RING_PACKED,
> > VIRTIO_NET_F_HASH_REPORT,
> > VHOST_INVALID_FEATURE_BIT
> > };
> >
> > You can see there's no STATUS bit there since it is emulated by Qemu.
> >
>
> Ok now I get what you mean, and yes we may modify the patches in that 
> direction.
>
> But if we go then we need to modify how qemu ack the features, because
> the features that are not in vdpa_feature_bits are not acked to the
> device. More on this later.
>
> > >
> > > From what I see these are the only features that will be forwarded to
> > > the guest as device_features. If it is not in the list, the feature
> > > will be masked out,
> >
> > Only when there's no support for this feature from the vhost.
> >
> > > as if the device does not support it.
> > >
> > > So now _F_STATUS it was forwarded only if the device supports it. If
> > > we remove it from bit_mask, it will never be offered to the guest. But
> > > we want to offer it always, since we will need it for
> > > _F_GUEST_ANNOUNCE.
> > >
> > > Things get more complex because we actually need to ack it back if the
> > > device offers it, so the vdpa device can report link_down. We will
> > > only emulate LINK_UP always in the case the device does not support
> > > _F_STATUS.
> > >
> > > > So if we remove _F_STATUS, Qemu vhost code won't validate if
> > > > vhost-vdpa device has this support:
> > > >
> > > > uint64_t vhost_get_features(struct vhost_dev *hdev, const int 
> > > > *feature_bits,
> > > > uint64_t features)
> > > > {
> > > > const int *bit = feature_bits;
> > > > while (*bit != VHOST_INVALID_FEATURE_BIT) {
> > > > uint64_t bit_mask = (1ULL << *bit);
> > > > if (!(hdev->features & bit_mask)) {
> > > > features &= ~bit_mask;
> > > > }
> > > > bit++;
> > > > }
> > > > return features;
> > > > }
> > > >
> > >
> > > Now maybe I'm the one missing something, but why is this not done as a
> > > masking directly?
> >
> > Not sure, the code has been there since day 0.
> >
> > But you can see from the code:
> >
> > 1) if STATUS is in feature_bits, we need validate the hdev->features
> > and mask it if the vhost doesn't have the support
> > 2) if STATUS is not, we don't do the check and driver may still see STATUS
> >
>
> That's useful for _F_GUEST_ANNOUNCE, but we need to ack _F_STATUS for
> the device if it supports it.

Rethink about this, I don't see why ANNOUNCE depends on STATUS (spec
doesn't say so).

> QEMU cannot detect by itself when the
> link is not up. I think that setting unconditionally
> VIRTIO_NET_S_LINK_UP is actually a regression, since the guest cannot
> detect the link down that way.

I think the idea is to still read status from config if the device
supports this.

>
> To enable _F_STATUS unconditionally is only done in the case the
> device does not support it, because its emulation is very easy. That
> way we support _F_GUEST_ANNOUNCE in all cases without device's
> cooperation.
>
> Having said that, should we go the opposite route and ack _F_STATE as
> long as the device supports it? As an 

Re: [PATCH v3 16/24] disas/nanomips: Remove IMMEDIATE functions

2022-11-01 Thread Stefan Weil via

Am 12.09.22 um 14:26 schrieb Milica Lazarevic:

Both versions of IMMEDIATE functions have been removed.

Before this patch, we'd been calling img_format twice, the first time
through the IMMEDIATE to get an appropriate string and the second time
to print that string. There's no more need for that. Therefore, calls to
IMMEDIATE are removed, and now we're directly printing the integer
values instead.

Signed-off-by: Milica Lazarevic 
---
  disas/nanomips.cpp | 756 -
  1 file changed, 265 insertions(+), 491 deletions(-)

diff --git a/disas/nanomips.cpp b/disas/nanomips.cpp
index 816155527d..441204bb84 100644
--- a/disas/nanomips.cpp
+++ b/disas/nanomips.cpp

[...]

@@ -3305,11 +3271,9 @@ static char *CACHE(uint64 instruction, Dis_info *info)
  uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
  int64 s_value = extract_s__se8_15_7_6_5_4_3_2_1_0(instruction);
  
-char *op = IMMEDIATE(op_value);

-char *s = IMMEDIATE(s_value);
  const char *rs = GPR(rs_value);
  
-return img_format("CACHE %s, %s(%s)", op, s, rs);

+return img_format("CACHE 0x%" PRIx64 ", %s(%s)", op_value, s_value, rs);
  }
  
  
@@ -3329,11 +3293,9 @@ static char *CACHEE(uint64 instruction, Dis_info *info)

  uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
  int64 s_value = extract_s__se8_15_7_6_5_4_3_2_1_0(instruction);
  
-char *op = IMMEDIATE(op_value);

-char *s = IMMEDIATE(s_value);
  const char *rs = GPR(rs_value);
  
-return img_format("CACHEE %s, %s(%s)", op, s, rs);

+return img_format("CACHEE 0x%" PRIx64 ", %s(%s)", op_value, s_value, rs);
  }


Do we really want to format "int64 s_value" as a string? The code now 
has lots of wrong format strings. Add the patch below to get the 
compiler report.


We once had a discussion about using G_GNUC_PRINTF for local functions 
or not. I think that this example clearly shows that it should be mandatory.


Regards,
Stefan

diff --git a/disas/nanomips.c b/disas/nanomips.c
index 9647f1a8e3..c875818cb9 100644
--- a/disas/nanomips.c
+++ b/disas/nanomips.c
@@ -95,7 +95,7 @@ typedef struct Pool {
 #define IMGASSERTONCE(test)


-static char *img_format(const char *format, ...)
+static char * G_GNUC_PRINTF(1, 2) img_format(const char *format, ...)
 {
 char *buffer;
 va_list args;



OpenPGP_0xE08C21D5677450AD.asc
Description: OpenPGP public key


OpenPGP_signature
Description: OpenPGP digital signature


[PULL v2 for 7.2 00/31] testing and plugin updates

2022-11-01 Thread Alex Bennée
warn: No match for commit 339bf0c071eff5e6ff1d9ddb3ad5cd02e4cd9ca3 found at 
https://github.com/stsquad/qemu.git
warn: Are you sure you pushed 'pull-target-arm-20221027-326-g339bf0c071' there?
The following changes since commit 5107fd3effb1cfec3b96d9e819f1605048640e31:

  net/vhost-vdpa.c: Fix clang compilation failure (2022-10-31 13:01:31 -0400)

are available in the Git repository at:

  https://github.com/stsquad/qemu.git pull-target-arm-20221027-326-g339bf0c071

for you to fetch changes up to 339bf0c071eff5e6ff1d9ddb3ad5cd02e4cd9ca3:

  tests/vm: use -o IdentitiesOnly=yes for ssh (2022-10-31 20:37:59 +)


Alex Bennée (21):
  tests/lcitool: Rename non-Debian specific helper
  tests/docker: update fedora-win[32|64]-cross with lcitool
  tests/lcitool: Refresh to latest libvirt-ci module
  tests/docker: update test-mingw to run single build
  configure: don't enable cross compilers unless in target_list
  configure: fix the --enable-static --disable-pie case
  tests/avocado: extend the timeout for x86_64 tcg tests
  tests/tcg: use regular semihosting for nios2-softmmu
  MAINTAINERS: add entries for the key build bits
  MAINTAINERS: add features_to_c.sh to gdbstub files
  MAINTAINERS: fix-up for check-tcg Makefile changes
  tests/avocado: set -machine none for userfwd and vnc tests
  tests/avocado: disable sh4 rd2 tests on Gitlab
  tests/tcg: re-enable linux-test for sh4
  tests/tcg: re-enable threadcount for sh4
  target/s390x: don't use ld_code2 to probe next pc
  target/s390x: don't probe next pc for EXecuted insns
  target/s390x: fake instruction loading when handling 'ex'
  contrib/plugins: enable debug on CONFIG_DEBUG_TCG
  contrib/plugins: protect execlog's last_exec expansion
  tests/unit: cleanups for test-io-channel-command

Anton Johansson (2):
  tests/docker: Add flex/bison to `debian-all-test`
  tests/docker: Add flex/bison to `debian-hexagon-cross`

Bin Meng (4):
  semihosting/arm-compat-semi: Avoid using hardcoded /tmp
  tcg: Avoid using hardcoded /tmp
  block/vvfat: Unify the mkdir() call
  hw/usb: dev-mtp: Use g_mkdir()

Ilya Leoshkevich (1):
  tests/vm: use -o IdentitiesOnly=yes for ssh

Paolo Bonzini (1):
  tests/tcg: include CONFIG_PLUGIN in config-host.mak

Peter Maydell (1):
  tests/avocado: raspi2_initrd: Wait for guest shutdown message before 
stopping

Richard Henderson (1):
  tests/tcg/nios2: Tweak 10m50-ghrd.ld

 configure  |  17 ++-
 include/exec/translator.h  |  17 +++
 block/vvfat.c  |   9 +-
 contrib/plugins/execlog.c  |  38 --
 hw/usb/dev-mtp.c   |   4 +-
 semihosting/arm-compat-semi.c  |   3 +-
 target/s390x/tcg/translate.c   |  14 ++-
 tcg/tcg.c  |   3 +-
 tests/unit/test-io-channel-command.c   |  45 ---
 MAINTAINERS|  29 -
 contrib/plugins/Makefile   |   1 +
 tests/avocado/boot_linux.py|   1 +
 tests/avocado/boot_linux_console.py|   7 +-
 tests/avocado/info_usernet.py  |   3 +
 tests/avocado/vnc.py   |   1 +
 .../dockerfiles/debian-all-test-cross.docker   |   2 +
 .../docker/dockerfiles/debian-hexagon-cross.docker |   2 +-
 tests/docker/dockerfiles/fedora-win32-cross.docker | 139 +++--
 tests/docker/dockerfiles/fedora-win64-cross.docker | 138 ++--
 tests/docker/test-mingw|  16 +--
 tests/lcitool/libvirt-ci   |   2 +-
 tests/lcitool/refresh  |  48 ---
 tests/tcg/nios2/10m50-ghrd.ld  |  14 ++-
 tests/tcg/nios2/Makefile.softmmu-target|   3 +-
 tests/tcg/sh4/Makefile.target  |  12 --
 tests/vm/basevm.py |   3 +-
 26 files changed, 396 insertions(+), 175 deletions(-)


-- 
2.34.1




Re: [PATCH v3 16/24] disas/nanomips: Remove IMMEDIATE functions

2022-11-01 Thread Philippe Mathieu-Daudé

On 1/11/22 09:28, Stefan Weil via wrote:

Am 12.09.22 um 14:26 schrieb Milica Lazarevic:

Both versions of IMMEDIATE functions have been removed.

Before this patch, we'd been calling img_format twice, the first time
through the IMMEDIATE to get an appropriate string and the second time
to print that string. There's no more need for that. Therefore, calls to
IMMEDIATE are removed, and now we're directly printing the integer
values instead.

Signed-off-by: Milica Lazarevic 
---
  disas/nanomips.cpp | 756 -
  1 file changed, 265 insertions(+), 491 deletions(-)

diff --git a/disas/nanomips.cpp b/disas/nanomips.cpp
index 816155527d..441204bb84 100644
--- a/disas/nanomips.cpp
+++ b/disas/nanomips.cpp

[...]
@@ -3305,11 +3271,9 @@ static char *CACHE(uint64 instruction, Dis_info 
*info)

  uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
  int64 s_value = extract_s__se8_15_7_6_5_4_3_2_1_0(instruction);
-    char *op = IMMEDIATE(op_value);
-    char *s = IMMEDIATE(s_value);
  const char *rs = GPR(rs_value);
-    return img_format("CACHE %s, %s(%s)", op, s, rs);
+    return img_format("CACHE 0x%" PRIx64 ", %s(%s)", op_value, 
s_value, rs);

  }
@@ -3329,11 +3293,9 @@ static char *CACHEE(uint64 instruction, 
Dis_info *info)

  uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
  int64 s_value = extract_s__se8_15_7_6_5_4_3_2_1_0(instruction);
-    char *op = IMMEDIATE(op_value);
-    char *s = IMMEDIATE(s_value);
  const char *rs = GPR(rs_value);
-    return img_format("CACHEE %s, %s(%s)", op, s, rs);
+    return img_format("CACHEE 0x%" PRIx64 ", %s(%s)", op_value, 
s_value, rs);

  }


Do we really want to format "int64 s_value" as a string? The code now 
has lots of wrong format strings. Add the patch below to get the 
compiler report.


We once had a discussion about using G_GNUC_PRINTF for local functions 
or not. I think that this example clearly shows that it should be 
mandatory.


Yes. The problem here is nobody wants to maintain this code, but we
inherited it. IIUC this series doesn't make it worst, it just remove
the C++ dependency on UNIX-based hosts.


Regards,
Stefan

diff --git a/disas/nanomips.c b/disas/nanomips.c
index 9647f1a8e3..c875818cb9 100644
--- a/disas/nanomips.c
+++ b/disas/nanomips.c
@@ -95,7 +95,7 @@ typedef struct Pool {
  #define IMGASSERTONCE(test)


-static char *img_format(const char *format, ...)
+static char * G_GNUC_PRINTF(1, 2) img_format(const char *format, ...)
  {
  char *buffer;
  va_list args;






Re: [PULL 08/30] target/arm: Add ptw_idx to S1Translate

2022-11-01 Thread Philippe Mathieu-Daudé

On 1/11/22 00:14, Philippe Mathieu-Daudé wrote:

On 25/10/22 18:39, Peter Maydell wrote:

From: Richard Henderson 

Hoist the computation of the mmu_idx for the ptw up to
get_phys_addr_with_struct and get_phys_addr_twostage.
This removes the duplicate check for stage2 disabled
from the middle of the walk, performing it only once.

Signed-off-by: Richard Henderson 
Reviewed-by: Alex Bennée 
Tested-by: Alex Bennée 
Message-id: 20221024051851.3074715-3-richard.hender...@linaro.org
Signed-off-by: Peter Maydell 
---
  target/arm/ptw.c | 71 
  1 file changed, 54 insertions(+), 17 deletions(-)

diff --git a/target/arm/ptw.c b/target/arm/ptw.c
index 32d64125865..3c153f68318 100644
--- a/target/arm/ptw.c
+++ b/target/arm/ptw.c
@@ -17,6 +17,7 @@
  typedef struct S1Translate {
  ARMMMUIdx in_mmu_idx;
+    ARMMMUIdx in_ptw_idx;
  bool in_secure;
  bool in_debug;
  bool out_secure;
@@ -214,33 +215,24 @@ static bool S1_ptw_translate(CPUARMState *env, 
S1Translate *ptw,

  {
  bool is_secure = ptw->in_secure;
  ARMMMUIdx mmu_idx = ptw->in_mmu_idx;
-    ARMMMUIdx s2_mmu_idx = is_secure ? ARMMMUIdx_Stage2_S : 
ARMMMUIdx_Stage2;

-    bool s2_phys = false;
+    ARMMMUIdx s2_mmu_idx = ptw->in_ptw_idx;
  uint8_t pte_attrs;
  bool pte_secure;
-    if (!arm_mmu_idx_is_stage1_of_2(mmu_idx)
-    || regime_translation_disabled(env, s2_mmu_idx, is_secure)) {
-    s2_mmu_idx = is_secure ? ARMMMUIdx_Phys_S : ARMMMUIdx_Phys_NS;
-    s2_phys = true;
-    }
-
  if (unlikely(ptw->in_debug)) {
  /*
   * From gdbstub, do not use softmmu so that we don't modify the
   * state of the cpu at all, including softmmu tlb contents.
   */
-    if (s2_phys) {
-    ptw->out_phys = addr;
-    pte_attrs = 0;
-    pte_secure = is_secure;
-    } else {
+    if (regime_is_stage2(s2_mmu_idx)) {
  S1Translate s2ptw = {
  .in_mmu_idx = s2_mmu_idx,
+    .in_ptw_idx = is_secure ? ARMMMUIdx_Phys_S : 
ARMMMUIdx_Phys_NS,

  .in_secure = is_secure,
  .in_debug = true,
  };
  GetPhysAddrResult s2 = { };
+
  if (!get_phys_addr_lpae(env, &s2ptw, addr, MMU_DATA_LOAD,
  false, &s2, fi)) {
  goto fail;
@@ -248,6 +240,11 @@ static bool S1_ptw_translate(CPUARMState *env, 
S1Translate *ptw,

  ptw->out_phys = s2.f.phys_addr;
  pte_attrs = s2.cacheattrs.attrs;
  pte_secure = s2.f.attrs.secure;
+    } else {
+    /* Regime is physical. */
+    ptw->out_phys = addr;
+    pte_attrs = 0;
+    pte_secure = is_secure;
  }
  ptw->out_host = NULL;
  } else {
@@ -268,7 +265,7 @@ static bool S1_ptw_translate(CPUARMState *env, 
S1Translate *ptw,

  pte_secure = full->attrs.secure;
  }
-    if (!s2_phys) {
+    if (regime_is_stage2(s2_mmu_idx)) {
  uint64_t hcr = arm_hcr_el2_eff_secstate(env, is_secure);
  if ((hcr & HCR_PTW) && S2_attrs_are_device(hcr, pte_attrs)) {
@@ -1263,7 +1260,18 @@ static bool get_phys_addr_lpae(CPUARMState 
*env, S1Translate *ptw,

  descaddr |= (address >> (stride * (4 - level))) & indexmask;
  descaddr &= ~7ULL;
  nstable = extract32(tableattrs, 4, 1);
-    ptw->in_secure = !nstable;
+    if (!nstable) {
+    /*
+ * Stage2_S -> Stage2 or Phys_S -> Phys_NS
+ * Assert that the non-secure idx are even, and relative 
order.

+ */
+    QEMU_BUILD_BUG_ON((ARMMMUIdx_Phys_NS & 1) != 0);
+    QEMU_BUILD_BUG_ON((ARMMMUIdx_Stage2 & 1) != 0);
+    QEMU_BUILD_BUG_ON(ARMMMUIdx_Phys_NS + 1 != 
ARMMMUIdx_Phys_S);
+    QEMU_BUILD_BUG_ON(ARMMMUIdx_Stage2 + 1 != 
ARMMMUIdx_Stage2_S);

+    ptw->in_ptw_idx &= ~1;
+    ptw->in_secure = false;
+    }
  descriptor = arm_ldq_ptw(env, ptw, descaddr, fi);
  if (fi->type != ARMFault_None) {
  goto do_fault;
@@ -2449,6 +2457,7 @@ static bool get_phys_addr_twostage(CPUARMState 
*env, S1Translate *ptw,

  is_el0 = ptw->in_mmu_idx == ARMMMUIdx_Stage1_E0;
  ptw->in_mmu_idx = s2walk_secure ? ARMMMUIdx_Stage2_S : 
ARMMMUIdx_Stage2;
+    ptw->in_ptw_idx = s2walk_secure ? ARMMMUIdx_Phys_S : 
ARMMMUIdx_Phys_NS;

  ptw->in_secure = s2walk_secure;
  /*
@@ -2508,10 +2517,32 @@ static bool 
get_phys_addr_with_struct(CPUARMState *env, S1Translate *ptw,

    ARMMMUFaultInfo *fi)
  {
  ARMMMUIdx mmu_idx = ptw->in_mmu_idx;
-    ARMMMUIdx s1_mmu_idx = stage_1_mmu_idx(mmu_idx);
  bool is_secure = ptw->in_secure;
+    ARMMMUIdx s1_mmu_idx;
-    if (mmu_idx != s1_mmu_idx) {
+    switch (mmu_idx) {
+    case ARMMMUIdx_Phys_S:
+    case ARMMMUIdx_Phys_NS:
+    /* Checking Phys early avoids special casing 

HMAT patches failure (was Re: [PULL 00/86] pci,pc,virtio: features, tests, fixes, cleanups)

2022-11-01 Thread Michael S. Tsirkin
On Mon, Oct 31, 2022 at 04:06:03PM -0400, Stefan Hajnoczi wrote:
> Here is another CI failure:
> 
> qemu-system-i386: current -smp configuration requires kernel irqchip
> and X2APIC API support.
> Broken pipe
> ../tests/qtest/libqtest.c:179: kill_qemu() tried to terminate QEMU
> process but encountered exit status 1 (expected 0)
> TAP parsing error: Too few tests run (expected 49, got 22)
> (test program exited with status code -6)
> ――
> 6/202 qemu:qtest+qtest-i386 / qtest-i386/test-hmp OK 7.46s 9 subtests passed
> ▶ 7/202 ERROR:../tests/qtest/bios-tables-test.c:533:test_acpi_asl:
> assertion failed: (all_tables_match) ERROR
> 7/202 qemu:qtest+qtest-aarch64 / qtest-aarch64/bios-tables-test ERROR
> 108.34s killed by signal 6 SIGABRT
> >>> G_TEST_DBUS_DAEMON=/builds/qemu-project/qemu/tests/dbus-vmstate-daemon.sh 
> >>> QTEST_QEMU_BINARY=./qemu-system-aarch64 MALLOC_PERTURB_=89 
> >>> /builds/qemu-project/qemu/build/tests/qtest/bios-tables-test --tap -k
> ― ✀ ―
> stderr:
> acpi-test: Warning! APIC binary file mismatch. Actual
> [aml:/tmp/aml-UKB6U1], Expected
> [aml:tests/data/acpi/virt/APIC.acpihmatvirt].
> See source file tests/qtest/bios-tables-test.c for instructions on how
> to update expected files.
> to see ASL diff between mismatched files install IASL, rebuild QEMU
> from scratch and re-run tests with V=1 environment variable set**
> ERROR:../tests/qtest/bios-tables-test.c:533:test_acpi_asl: assertion
> failed: (all_tables_match)
> (test program exited with status code -6)
> 
> https://gitlab.com/qemu-project/qemu/-/jobs/3253817453


Hesham Jonathan pls take a look, if you post a fixup today
or early tomorrow I can squash it
and then this patchset can still be included in the release.

Thanks!

-- 
MST




Re: [PATCH v3 0/8] AArch64/HMAT support and tests

2022-11-01 Thread Michael S. Tsirkin
On Thu, Oct 27, 2022 at 11:00:29AM +0100, Hesham Almatary wrote:
> This patchset adds support for AArch64/HMAT including a test.
> It relies on other two patch sets from:
> 
> Brice Goglin: to support -numa without initiators on q35/x86.
>   https://lore.kernel.org/all/ed23accb-2c8b-90f4-a7a3-f81cc57bf...@inria.fr/
> Xiang Chen: to enable/support HMAT on AArch64.
>   
> https://lore.kernel.org/all/1643102134-15506-1-git-send-email-chenxian...@hisilicon.com/
> 
> I further add a test with ACPI/HMAT tables that uses the two
> patch sets.

pipeline failures:
https://gitlab.com/qemu-project/qemu/-/jobs/3253817453
this looks like a 32 bit host.


> Changes from v2:
> - Rebased and fixed a merge conflict
> 
> Changes from v1:
> - Generate APIC and PPTT ACPI tables for AArch64/virt
> - Avoid using legacy syntax in numa/bios tests
> - Delete unchanged FACP tables
> 
> Brice Goglin (4):
>   hmat acpi: Don't require initiator value in -numa
>   tests: acpi: add and whitelist *.hmat-noinitiator expected blobs
>   tests: acpi: q35: add test for hmat nodes without initiators
>   tests: acpi: q35: update expected blobs *.hmat-noinitiators expected
> HMAT:
> 
> Hesham Almatary (3):
>   tests: Add HMAT AArch64/virt empty table files
>   tests: acpi: aarch64/virt: add a test for hmat nodes with no
> initiators
>   tests: virt: Update expected *.acpihmatvirt tables
> 
> Xiang Chen (1):
>   hw/arm/virt: Enable HMAT on arm virt machine
> 
>  hw/arm/Kconfig|   1 +
>  hw/arm/virt-acpi-build.c  |   7 ++
>  hw/core/machine.c |   4 +-
>  tests/data/acpi/q35/APIC.acpihmat-noinitiator | Bin 0 -> 144 bytes
>  tests/data/acpi/q35/DSDT.acpihmat-noinitiator | Bin 0 -> 8553 bytes
>  tests/data/acpi/q35/HMAT.acpihmat-noinitiator | Bin 0 -> 288 bytes
>  tests/data/acpi/q35/SRAT.acpihmat-noinitiator | Bin 0 -> 312 bytes
>  tests/data/acpi/virt/APIC.acpihmatvirt| Bin 0 -> 396 bytes
>  tests/data/acpi/virt/DSDT.acpihmatvirt| Bin 0 -> 5282 bytes
>  tests/data/acpi/virt/HMAT.acpihmatvirt| Bin 0 -> 288 bytes
>  tests/data/acpi/virt/PPTT.acpihmatvirt| Bin 0 -> 196 bytes
>  tests/data/acpi/virt/SRAT.acpihmatvirt| Bin 0 -> 240 bytes
>  tests/qtest/bios-tables-test.c| 109 ++
>  13 files changed, 118 insertions(+), 3 deletions(-)
>  create mode 100644 tests/data/acpi/q35/APIC.acpihmat-noinitiator
>  create mode 100644 tests/data/acpi/q35/DSDT.acpihmat-noinitiator
>  create mode 100644 tests/data/acpi/q35/HMAT.acpihmat-noinitiator
>  create mode 100644 tests/data/acpi/q35/SRAT.acpihmat-noinitiator
>  create mode 100644 tests/data/acpi/virt/APIC.acpihmatvirt
>  create mode 100644 tests/data/acpi/virt/DSDT.acpihmatvirt
>  create mode 100644 tests/data/acpi/virt/HMAT.acpihmatvirt
>  create mode 100644 tests/data/acpi/virt/PPTT.acpihmatvirt
>  create mode 100644 tests/data/acpi/virt/SRAT.acpihmatvirt
> 
> -- 
> 2.25.1




Re: [PATCH] hw/nvme: reenable cqe batching

2022-11-01 Thread Klaus Jensen
On Oct 20 13:35, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Commit 2e53b0b45024 ("hw/nvme: Use ioeventfd to handle doorbell
> updates") had the unintended effect of disabling batching of CQEs.
> 
> This patch changes the sq/cq timers to bottom halfs and instead of
> calling nvme_post_cqes() immediately (causing an interrupt per cqe), we
> defer the call.
> 
>| iops
>   -+--
> baseline   | 138k
> +cqe batching  | 233k
> 
> Fixes: 2e53b0b45024 ("hw/nvme: Use ioeventfd to handle doorbell updates")
> Signed-off-by: Klaus Jensen 

Thanks for the reviews, applied to nvme-next!


signature.asc
Description: PGP signature


Re: [PATCH v2 1/4] virtio-crypto: Support asynchronous mode

2022-11-01 Thread Michael S. Tsirkin
On Sat, Oct 08, 2022 at 04:50:27PM +0800, Lei He wrote:
> virtio-crypto: Modify the current interface of virtio-crypto
> device to support asynchronous mode.
> 
> Signed-off-by: lei he 
> ---
>  backends/cryptodev-builtin.c|  69 ++---
>  backends/cryptodev-vhost-user.c |  51 +--
>  backends/cryptodev.c|  44 +++---
>  hw/virtio/virtio-crypto.c   | 324 
> ++--
>  include/sysemu/cryptodev.h  |  60 +---
>  5 files changed, 336 insertions(+), 212 deletions(-)
> 
> diff --git a/backends/cryptodev-builtin.c b/backends/cryptodev-builtin.c
> index 125cbad1d3..cda6ca3b71 100644
> --- a/backends/cryptodev-builtin.c
> +++ b/backends/cryptodev-builtin.c
> @@ -355,42 +355,62 @@ static int cryptodev_builtin_create_akcipher_session(
>  return index;
>  }
>  
> -static int64_t cryptodev_builtin_create_session(
> +static int cryptodev_builtin_create_session(
> CryptoDevBackend *backend,
> CryptoDevBackendSessionInfo *sess_info,
> -   uint32_t queue_index, Error **errp)
> +   uint32_t queue_index,
> +   CryptoDevCompletionFunc cb,
> +   void *opaque)
>  {
>  CryptoDevBackendBuiltin *builtin =
>CRYPTODEV_BACKEND_BUILTIN(backend);
>  CryptoDevBackendSymSessionInfo *sym_sess_info;
>  CryptoDevBackendAsymSessionInfo *asym_sess_info;
> +int ret, status;
> +Error *local_error = NULL;
>  
>  switch (sess_info->op_code) {
>  case VIRTIO_CRYPTO_CIPHER_CREATE_SESSION:
>  sym_sess_info = &sess_info->u.sym_sess_info;
> -return cryptodev_builtin_create_cipher_session(
> -   builtin, sym_sess_info, errp);
> +ret = cryptodev_builtin_create_cipher_session(
> +builtin, sym_sess_info, &local_error);
> +break;
>  
>  case VIRTIO_CRYPTO_AKCIPHER_CREATE_SESSION:
>  asym_sess_info = &sess_info->u.asym_sess_info;
> -return cryptodev_builtin_create_akcipher_session(
> -   builtin, asym_sess_info, errp);
> +ret = cryptodev_builtin_create_akcipher_session(
> +   builtin, asym_sess_info, &local_error);
> +break;
>  
>  case VIRTIO_CRYPTO_HASH_CREATE_SESSION:
>  case VIRTIO_CRYPTO_MAC_CREATE_SESSION:
>  default:
> -error_setg(errp, "Unsupported opcode :%" PRIu32 "",
> +error_setg(&local_error, "Unsupported opcode :%" PRIu32 "",
> sess_info->op_code);
> -return -1;
> +return -VIRTIO_CRYPTO_NOTSUPP;
>  }
>  
> -return -1;
> +if (local_error) {
> +error_report_err(local_error);
> +}
> +if (ret < 0) {
> +status = -VIRTIO_CRYPTO_ERR;
> +} else {
> +sess_info->session_id = ret;
> +status = VIRTIO_CRYPTO_OK;
> +}
> +if (cb) {
> +cb(opaque, status);
> +}
> +return 0;
>  }
>  
>  static int cryptodev_builtin_close_session(
> CryptoDevBackend *backend,
> uint64_t session_id,
> -   uint32_t queue_index, Error **errp)
> +   uint32_t queue_index,
> +   CryptoDevCompletionFunc cb,
> +   void *opaque)
>  {
>  CryptoDevBackendBuiltin *builtin =
>CRYPTODEV_BACKEND_BUILTIN(backend);
> @@ -407,6 +427,9 @@ static int cryptodev_builtin_close_session(
>  
>  g_free(session);
>  builtin->sessions[session_id] = NULL;
> +if (cb) {
> +cb(opaque, VIRTIO_CRYPTO_OK);
> +}
>  return 0;
>  }
>  
> @@ -506,7 +529,9 @@ static int cryptodev_builtin_asym_operation(
>  static int cryptodev_builtin_operation(
>   CryptoDevBackend *backend,
>   CryptoDevBackendOpInfo *op_info,
> - uint32_t queue_index, Error **errp)
> + uint32_t queue_index,
> + CryptoDevCompletionFunc cb,
> + void *opaque)
>  {
>  CryptoDevBackendBuiltin *builtin =
>CRYPTODEV_BACKEND_BUILTIN(backend);
> @@ -514,11 +539,12 @@ static int cryptodev_builtin_operation(
>  CryptoDevBackendSymOpInfo *sym_op_info;
>  CryptoDevBackendAsymOpInfo *asym_op_info;
>  enum CryptoDevBackendAlgType algtype = op_info->algtype;
> -int ret = -VIRTIO_CRYPTO_ERR;
> +int status = -VIRTIO_CRYPTO_ERR;
> +Error *local_error = NULL;
>  
>  if (op_info->session_id >= MAX_NUM_SESSIONS ||
>builtin->sessions[op_info->session_id] == NULL) {
> -error_setg(errp, "Cannot find a valid session id: %" PRIu64 "",
> +error_setg(&local_error, "Cannot find a valid session id: %" PRIu64 
> "",
> op_info->session_id);
>  return -VIRTIO_CRYPTO_INVSESS;
>  }
> @@ -526,14 +552,21 @@ static int cryptodev_builtin_operation(
>  sess = builtin->sessions[op_info->session_id];
>  if (algtype == CRYPTODEV_BACKEND_ALG_SYM) {
>  sym_op_info = op_info->u.sym_op_

Re: [PATCH v5 6/6] hw/ppc/e500: Add Freescale eSDHC to e500plat

2022-11-01 Thread Bernhard Beschow
On Mon, Oct 31, 2022 at 4:19 PM Philippe Mathieu-Daudé 
wrote:

> On 31/10/22 16:12, Philippe Mathieu-Daudé wrote:
> > On 31/10/22 12:54, Philippe Mathieu-Daudé wrote:
> >> From: Bernhard Beschow 
> >>
> >> Adds missing functionality to e500plat machine which increases the
> >> chance of given "real" firmware images to access SD cards.
> >>
> >> Signed-off-by: Bernhard Beschow 
> >> Message-Id: <20221018210146.193159-8-shen...@gmail.com>
> >> [PMD: Simplify using create_unimplemented_device("esdhc")]
> >> Signed-off-by: Philippe Mathieu-Daudé 
> >> ---
> >>   docs/system/ppc/ppce500.rst | 12 ++
> >>   hw/ppc/Kconfig  |  2 ++
> >>   hw/ppc/e500.c   | 48 -
> >>   hw/ppc/e500.h   |  1 +
> >>   hw/ppc/e500plat.c   |  1 +
> >>   5 files changed, 63 insertions(+), 1 deletion(-)
> >
> >> @@ -992,6 +1018,26 @@ void ppce500_init(MachineState *machine)
> >>   i2c = (I2CBus *)qdev_get_child_bus(dev, "i2c");
> >>   i2c_slave_create_simple(i2c, "ds1338", RTC_REGS_OFFSET);
> >> +/* eSDHC */
> >> +if (pmc->has_esdhc) {
> >> +uint64_t sdhci_regsize;
> >> +
> >> +dev = qdev_new(TYPE_SYSBUS_SDHCI);
> >> +/*
> >> + * Compatible with:
> >> + * - SD Host Controller Specification Version 2.0 Part A2
> >> + */
> >> +qdev_prop_set_uint8(dev, "sd-spec-version", 2);
> >> +s = SYS_BUS_DEVICE(dev);
> >> +sysbus_realize_and_unref(s, &error_fatal);
> >> +sysbus_mmio_map(s, 0, pmc->ccsrbar_base +
> >> MPC85XX_ESDHC_REGS_OFFSET);
> >> +sysbus_connect_irq(s, 0, qdev_get_gpio_in(mpicdev,
> >> MPC85XX_ESDHC_IRQ));
> >> +sdhci_regsize = memory_region_size(sysbus_mmio_get_region(s,
> >> 0));
> >> +create_unimplemented_device("esdhc",
> >> +pmc->ccsrbar_base
> >> ++ MPC85XX_ESDHC_REGS_OFFSET +
> >> sdhci_regsize,
> >> +MPC85XX_ESDHC_REGS_SIZE -
> >> sdhci_regsize);
> >> +}
> >
> > Since the UNIMP device has lower priority, we can simplify as:
> >
> > if (pmc->has_esdhc) {
> >  create_unimplemented_device("esdhc",
> >  pmc->ccsrbar_base
> >  + MPC85XX_ESDHC_REGS_OFFSET,
> >  MPC85XX_ESDHC_REGS_SIZE);
> >
> >  dev = qdev_new(TYPE_SYSBUS_SDHCI);
> >  /*
> >   * Compatible with:
> >   * - SD Host Controller Specification Version 2.0 Part A2
> >   */
> >  qdev_prop_set_uint8(dev, "sd-spec-version", 2);
> >  s = SYS_BUS_DEVICE(dev);
> >  sysbus_realize_and_unref(s, &error_fatal);
> >  sysbus_mmio_map(s, 0, pmc->ccsrbar_base +
> MPC85XX_ESDHC_REGS_OFFSET);
>
  memory_region_add_subregion(ccsr_addr_space, MPC85XX_ESDHC_REGS_OFFSET,
  sysbus_mmio_get_region(s, 0));

seems to be equivalent, works as well and mimics other devices, e.g. i2c.
So perhaps use that?

>
> So the SDHCI is mapped inside the CCSR block. Better would be to map it
> into ccsr_addr_space.
>

Doesn't the above code map it into ccsr_addr_space?

>
> I presume the CCSR is the device responsible of endian swapping, but TBH
> I have no clue about this board.
>
>


Re: [PATCH v5 0/6] ppc/e500: Add support for two types of flash, cleanup

2022-11-01 Thread Bernhard Beschow
On Mon, Oct 31, 2022 at 12:54 PM Philippe Mathieu-Daudé 
wrote:

> This is a respin of Bernhard's v4 with Freescale eSDHC implemented
> as an 'UNIMP' region. See v4 cover here:
>
> https://lore.kernel.org/qemu-devel/20221018210146.193159-1-shen...@gmail.com/
>
> Only tested with the ppce500 machine (no further regression testing).
>
> Since v4:
> - Do not rename ESDHC_* definitions to USDHC_*
> - Do not modify SDHCIState structure
>

Works beautifully, both for the buildroot load and for my proprietary load.
So:
Tested-by: Bernhard Beschow

>
> Bernhard Beschow (4):
>   hw/block/pflash_cfi0{1, 2}: Error out if device length isn't a power
> of two
>   docs/system/ppc/ppce500: Use qemu-system-ppc64 across the board(s)
>   hw/ppc/e500: Implement pflash handling
>   hw/ppc/e500: Add Freescale eSDHC to e500plat
>
> Philippe Mathieu-Daudé (2):
>   hw/sd/sdhci: MMIO region is implemented in 32-bit accesses
>   hw/sd/sdhci: Map host controller interface in host endianess
>
>  docs/system/ppc/ppce500.rst |  38 +--
>  hw/block/pflash_cfi01.c |   8 ++-
>  hw/block/pflash_cfi02.c |   5 ++
>  hw/ppc/Kconfig  |   3 +
>  hw/ppc/e500.c   | 127 +++-
>  hw/ppc/e500.h   |   1 +
>  hw/ppc/e500plat.c   |   1 +
>  hw/sd/sdhci.c   |   6 +-
>  8 files changed, 180 insertions(+), 9 deletions(-)
>
> --
> 2.37.3
>
>


Re: [PATCH] qapi: virtio: Fix the introduced version

2022-11-01 Thread Laurent Vivier

Le 01/11/2022 à 02:46, Han Han a écrit :

The items of qapi/virtio.json are introduced at a5ebce38576. They will be
in the version 7.2 not 7.1.

Signed-off-by: Han Han 
---
  qapi/virtio.json | 34 +-
  1 file changed, 17 insertions(+), 17 deletions(-)


Reviewed-by: Laurent Vivier 




Re: [PATCH v4 6/7] hw/sd/sdhci: Implement Freescale eSDHC device model

2022-11-01 Thread Bernhard Beschow
Am 31. Oktober 2022 12:11:37 UTC schrieb "Philippe Mathieu-Daudé" 
:
>On 30/10/22 12:46, Bernhard Beschow wrote:
>> 
>> 
>> On Sun, Oct 30, 2022 at 1:10 AM Philippe Mathieu-Daudé > > wrote:
>> 
>> On 29/10/22 20:28, Bernhard Beschow wrote:
>>  > Am 29. Oktober 2022 13:04:00 UTC schrieb Bernhard Beschow
>> mailto:shen...@gmail.com>>:
>>  >> Am 29. Oktober 2022 11:33:51 UTC schrieb Bernhard Beschow
>> mailto:shen...@gmail.com>>:
>>  >>> Am 27. Oktober 2022 21:40:01 UTC schrieb "Philippe
>> Mathieu-Daudé" mailto:phi...@linaro.org>>:
>>   Hi Bernhard,
>>  
>>   On 18/10/22 23:01, Bernhard Beschow wrote:
>>  > Will allow e500 boards to access SD cards using just their
>> own devices.
>>  >
>>  > Signed-off-by: Bernhard Beschow > >
>>  > ---
>>  >    hw/sd/sdhci.c         | 120
>> +-
>>  >    include/hw/sd/sdhci.h |   3 ++
>>  >    2 files changed, 122 insertions(+), 1 deletion(-)
>> 
>>   So now, I'd create 1 UNIMP region for ESDHC_WML and map it
>>   into SDHC_REGISTERS_MAP (s->iomem) with priority 1, and add
>>   another UNIMP region of ESDHC_REGISTERS_MAP_SIZE -
>> SDHC_REGISTERS_MAP_SIZE (= 0x310) and map it normally at offset
>>   0x100 (SDHC_REGISTERS_MAP_SIZE). Look at create_unimp() in
>>   hw/arm/bcm2835_peripherals.c.
>>  
>>   But the ESDHC_WML register has address 0x44 and fits inside the
>>   SDHC_REGISTERS_MAP region, so likely belong there. 0x44 is the
>>   upper part of the SDHC_CAPAB register. These bits are undefined
>>   on the spec v2, which I see you are setting in esdhci_init().
>>   So this register should already return 0, otherwise we have
>>   a bug. Thus we don't need to handle this ESDHC_WML particularly.
>>  >>
>>  >> My idea here was to catch this unimplemented case in order to
>> indicate this clearly to users. Perhaps it nudges somebody to
>> provide a patch?
>>  >>
>>  
>>   And your model is reduced to handling create_unimp() in
>> esdhci_realize().
>>  
>>   Am I missing something?
>>  >>>
>>  >>> The mmio ops are big endian and need to be aligned to a 4-byte
>> boundary. It took me quite a while to debug this. So shall I just
>> create an additional memory region for the region above
>> SDHC_REGISTERS_MAP_SIZE for ESDHC_DMA_SYSCTL?
>>  >>
>>  >> All in all I currently don't have a better idea than keeping the
>> custom i/o ops for the standard region and adding an additional
>> unimplemented region for ESDHC_DMA_SYSCTL. I think I'd have to
>> dynamically allocate memory for it where I still need to figure out
>> how not to leak it.
>>  >
>>  > By simply reusing sdhci_{read,write} in eSDHC's io_ops struct I
>> was able to remove the custom implementations while having big
>> endian and the alignments proper. However, I don't see a way of
>> adding two memory regions - with or without a container. With a
>> container I'd have to somehow preserve the mmio attribute which is
>> initialized by the parent class, re-initialize it with the
>> container, and add the preserved memory region as child. This seems
>> very fragile, esp. since the parent class has created an alias for
>> mmio in sysbus. Without a container, one would have two memory
>> regions that both have to be mapped separately by the caller, i.e.
>> it burdens the caller with an implementation detail.
>>  >
>>  > Any suggestions?
>
>See 
>https://lore.kernel.org/qemu-devel/20221031115402.91912-7-phi...@linaro.org/
>
>> Can you share branch and how to test?
>> 
>> 
>> QEMU branch: https://github.com/shentok/qemu/tree/e500-flash 
>> 
>> 
>> How to test:
>> 1. `git clone -b e500 https://github.com/shentok/buildroot.git 
>> `
>> 2. `cd buildroot`
>> 3. `make qemu_ppc_e500mc_defconfig`
>> 4. `make`
>> 5. `cd output/images`
>> 6. `dd if=/dev/zero of=root.img bs=1M count=64 && dd if=rootfs.ext2 
>> of=root.img bs=1M conv=notrunc`
>> 7. `qemu-system-ppc -M ppce500 -cpu e500mc -m 256 -kernel uImage -append 
>> "console=ttyS0 rootwait root=/dev/mmcblk0" -device sd-card,drive=mydrive 
>> -drive id=mydrive,if=none,file=root.img,format=raw`
>
>Could you add an Avocado-based test?

I could give it a try at least. Where would I drop the binaries?

Best regards,
Bernhard
>
>>    Welcome to Buildroot
>>    buildroot login:
>
>Regards,
>
>Phil.




Re: [PATCH 1/9] target/s390x: Use a single return for helper_divs32/u32

2022-11-01 Thread Ilya Leoshkevich
On Fri, Oct 21, 2022 at 05:29:58PM +1000, Richard Henderson wrote:
> Pack the quotient and remainder into a single uint64_t.
> 
> Signed-off-by: Richard Henderson 
> ---
>  target/s390x/helper.h |  2 +-
>  target/s390x/tcg/int_helper.c | 26 +-
>  target/s390x/tcg/translate.c  | 10 ++
>  3 files changed, 20 insertions(+), 18 deletions(-)
> 
> diff --git a/target/s390x/helper.h b/target/s390x/helper.h
> index bf33d86f74..97a9668eef 100644
> --- a/target/s390x/helper.h
> +++ b/target/s390x/helper.h
> @@ -10,7 +10,7 @@ DEF_HELPER_FLAGS_4(clc, TCG_CALL_NO_WG, i32, env, i32, i64, 
> i64)
>  DEF_HELPER_3(mvcl, i32, env, i32, i32)
>  DEF_HELPER_3(clcl, i32, env, i32, i32)
>  DEF_HELPER_FLAGS_4(clm, TCG_CALL_NO_WG, i32, env, i32, i32, i64)
> -DEF_HELPER_FLAGS_3(divs32, TCG_CALL_NO_WG, s64, env, s64, s64)
> +DEF_HELPER_FLAGS_3(divs32, TCG_CALL_NO_WG, i64, env, s64, s64)
>  DEF_HELPER_FLAGS_3(divu32, TCG_CALL_NO_WG, i64, env, i64, i64)
>  DEF_HELPER_FLAGS_3(divs64, TCG_CALL_NO_WG, s64, env, s64, s64)
>  DEF_HELPER_FLAGS_4(divu64, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
> diff --git a/target/s390x/tcg/int_helper.c b/target/s390x/tcg/int_helper.c
> index 954542388a..130b8bd4d2 100644
> --- a/target/s390x/tcg/int_helper.c
> +++ b/target/s390x/tcg/int_helper.c
> @@ -34,45 +34,45 @@
>  #endif
>  
>  /* 64/32 -> 32 signed division */
> -int64_t HELPER(divs32)(CPUS390XState *env, int64_t a, int64_t b64)
> +uint64_t HELPER(divs32)(CPUS390XState *env, int64_t a, int64_t b64)
>  {
> -int32_t ret, b = b64;
> -int64_t q;
> +int32_t b = b64;
> +int64_t q, r;
>  
>  if (b == 0) {
>  tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC());
>  }
>  
> -ret = q = a / b;
> -env->retxl = a % b;
> +q = a / b;
> +r = a % b;
>  
>  /* Catch non-representable quotient.  */
> -if (ret != q) {
> +if (q != (int32_t)q) {
>  tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC());
>  }
>  
> -return ret;
> +return deposit64(r, 32, 32, q);
>  }
>  
>  /* 64/32 -> 32 unsigned division */
>  uint64_t HELPER(divu32)(CPUS390XState *env, uint64_t a, uint64_t b64)
>  {
> -uint32_t ret, b = b64;
> -uint64_t q;
> +uint32_t b = b64;
> +uint64_t q, r;
>  
>  if (b == 0) {
>  tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC());
>  }
>  
> -ret = q = a / b;
> -env->retxl = a % b;
> +q = a / b;
> +r = a % b;
>  
>  /* Catch non-representable quotient.  */
> -if (ret != q) {
> +if (q != (uint32_t)q) {
>  tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC());
>  }
>  
> -return ret;
> +return deposit64(r, 32, 32, q);
>  }
>  
>  /* 64/64 -> 64 signed division */
> diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
> index 1d2dddab1c..525317c9df 100644
> --- a/target/s390x/tcg/translate.c
> +++ b/target/s390x/tcg/translate.c
> @@ -2395,15 +2395,17 @@ static DisasJumpType op_diag(DisasContext *s, 
> DisasOps *o)
>  
>  static DisasJumpType op_divs32(DisasContext *s, DisasOps *o)
>  {
> -gen_helper_divs32(o->out2, cpu_env, o->in1, o->in2);
> -return_low128(o->out);
> +gen_helper_divs32(o->out, cpu_env, o->in1, o->in2);
> +tcg_gen_ext32u_i64(o->out2, o->out);
> +tcg_gen_shri_i64(o->out, o->out, 32);
>  return DISAS_NEXT;
>  }
>  
>  static DisasJumpType op_divu32(DisasContext *s, DisasOps *o)
>  {
> -gen_helper_divu32(o->out2, cpu_env, o->in1, o->in2);
> -return_low128(o->out);
> +gen_helper_divu32(o->out, cpu_env, o->in1, o->in2);
> +tcg_gen_ext32u_i64(o->out2, o->out);
> +tcg_gen_shri_i64(o->out, o->out, 32);
>  return DISAS_NEXT;
>  }
>  
> -- 
> 2.34.1

Hi,

The wasmtime testsuite was failing and bisect pointed to this commit.
Apparently this needs a fixup:


--- a/target/s390x/tcg/int_helper.c
+++ b/target/s390x/tcg/int_helper.c
@@ -51,7 +51,7 @@ uint64_t HELPER(divs32)(CPUS390XState *env, int64_t a, 
int64_t b64)
 tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC());
 }
 
-return deposit64(r, 32, 32, q);
+return deposit64(q, 32, 32, r);
 }
 
 /* 64/32 -> 32 unsigned division */
@@ -72,7 +72,7 @@ uint64_t HELPER(divu32)(CPUS390XState *env, uint64_t a, 
uint64_t b64)
 tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC());
 }
 
-return deposit64(r, 32, 32, q);
+return deposit64(q, 32, 32, r);
 }
 
 /* 64/64 -> 64 signed division */


Currently we return out = r | (q << 32) here.
op_divu32 makes out2 = r, out = q from this.
Finally, r1_P32 stores r1 = q, r1+1 = r.
But DLR wants the opposite:

The remainder is placed in bit
positions 32-63 of general register R1, and the quo-
tient is placed in bit positions 32-63 of general regis-
ter R1 + 1.

Ditto DR.

Best regards,
Ilya



[PATCH] tests/tcg/s390x: Add div.c

2022-11-01 Thread Ilya Leoshkevich
Add a basic test to prevent regressions.

Signed-off-by: Ilya Leoshkevich 
---
 tests/tcg/s390x/Makefile.target |  1 +
 tests/tcg/s390x/div.c   | 40 +
 2 files changed, 41 insertions(+)
 create mode 100644 tests/tcg/s390x/div.c

diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index c882db7a78a..f2ec7587387 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -26,6 +26,7 @@ TESTS+=branch-relative-long
 TESTS+=noexec
 TESTS+=clst
 TESTS+=long-double
+TESTS+=div
 
 Z13_TESTS=vistr
 $(Z13_TESTS): CFLAGS+=-march=z13 -O2
diff --git a/tests/tcg/s390x/div.c b/tests/tcg/s390x/div.c
new file mode 100644
index 000..58072956147
--- /dev/null
+++ b/tests/tcg/s390x/div.c
@@ -0,0 +1,40 @@
+#include 
+#include 
+
+static void test_dr(void)
+{
+register int32_t r0 asm("r0") = -1;
+register int32_t r1 asm("r1") = -4241;
+int32_t b = 101, q, r;
+
+asm("dr %[r0],%[b]"
+: [r0] "+r" (r0), [r1] "+r" (r1)
+: [b] "r" (b)
+: "cc");
+q = r1;
+r = r0;
+assert(q == -41);
+assert(r == -100);
+}
+
+static void test_dlr(void)
+{
+register uint32_t r0 asm("r0") = 0;
+register uint32_t r1 asm("r1") = 4243;
+uint32_t b = 101, q, r;
+
+asm("dlr %[r0],%[b]"
+: [r0] "+r" (r0), [r1] "+r" (r1)
+: [b] "r" (b)
+: "cc");
+q = r1;
+r = r0;
+assert(q == 42);
+assert(r == 1);
+}
+
+int main(void)
+{
+test_dr();
+test_dlr();
+}
-- 
2.37.2




Re: [PATCH v3 16/24] disas/nanomips: Remove IMMEDIATE functions

2022-11-01 Thread Stefan Weil via

Am 01.11.22 um 10:27 schrieb Philippe Mathieu-Daudé:


On 1/11/22 09:28, Stefan Weil via wrote:

Am 12.09.22 um 14:26 schrieb Milica Lazarevic:

Both versions of IMMEDIATE functions have been removed.

Before this patch, we'd been calling img_format twice, the first time
through the IMMEDIATE to get an appropriate string and the second time
to print that string. There's no more need for that. Therefore, 
calls to

IMMEDIATE are removed, and now we're directly printing the integer
values instead.

Signed-off-by: Milica Lazarevic 
---
  disas/nanomips.cpp | 756 
-

  1 file changed, 265 insertions(+), 491 deletions(-)

diff --git a/disas/nanomips.cpp b/disas/nanomips.cpp
index 816155527d..441204bb84 100644
--- a/disas/nanomips.cpp
+++ b/disas/nanomips.cpp

[...]
@@ -3305,11 +3271,9 @@ static char *CACHE(uint64 instruction, 
Dis_info *info)

  uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
  int64 s_value = extract_s__se8_15_7_6_5_4_3_2_1_0(instruction);
-    char *op = IMMEDIATE(op_value);
-    char *s = IMMEDIATE(s_value);
  const char *rs = GPR(rs_value);
-    return img_format("CACHE %s, %s(%s)", op, s, rs);
+    return img_format("CACHE 0x%" PRIx64 ", %s(%s)", op_value, 
s_value, rs);

  }
@@ -3329,11 +3293,9 @@ static char *CACHEE(uint64 instruction, 
Dis_info *info)

  uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
  int64 s_value = extract_s__se8_15_7_6_5_4_3_2_1_0(instruction);
-    char *op = IMMEDIATE(op_value);
-    char *s = IMMEDIATE(s_value);
  const char *rs = GPR(rs_value);
-    return img_format("CACHEE %s, %s(%s)", op, s, rs);
+    return img_format("CACHEE 0x%" PRIx64 ", %s(%s)", op_value, 
s_value, rs);

  }


Do we really want to format "int64 s_value" as a string? The code now 
has lots of wrong format strings. Add the patch below to get the 
compiler report.


We once had a discussion about using G_GNUC_PRINTF for local 
functions or not. I think that this example clearly shows that it 
should be mandatory.


Yes. The problem here is nobody wants to maintain this code, but we
inherited it. IIUC this series doesn't make it worst, it just remove
the C++ dependency on UNIX-based hosts.



I expect that "%s" with an int64 s_value will cause a crash while the 
old code worked, so things are worse now and should be fixed for the 
release. If nobody maintains that code, I can try to prepare a patch.


Stefan





Re: [PATCH v1 09/12] accel/xen/xen-all: export xenstore_record_dm_state

2022-11-01 Thread Alex Bennée


"Garhwal, Vikram"  writes:

> Thanks, Alex, for reviewing this one. I built for all the archs and it was 
> fine. Can you please share more about what
> environment builds are breaking? So, I can test the changes for v2.

My cross build environment failed:

  ../../configure' '--disable-docs' '--disable-tools' 
'--cross-prefix=aarch64-linux-gnu-' '--enable-xen' 
'--target-list=i386-softmmu,x86_64-softmmu,arm-softmmu,aarch64-softmmu' 
'--disable-tpm'

On a Debian Bullseye with:

  11:30:20 [root@zen:~] # dpkg -l libxen\*
  Desired=Unknown/Install/Remove/Purge/Hold
  | Status=Not/Inst/Conf-files/Unpacked/halF-conf/Half-inst/trig-aWait/Trig-pend
  |/ Err?=(none)/Reinst-required (Status,Err: uppercase=bad)
  ||/ Name   Version Architecture 
Description
  
+++-==-===--
  ii  libxen-dev:arm64   4.14.5+24-g87d90d511c-1 arm64Public 
headers and libs for Xen
  ii  libxencall1:amd64  4.14.5+24-g87d90d511c-1 amd64Xen 
runtime library - libxencall
  ii  libxencall1:arm64  4.14.5+24-g87d90d511c-1 arm64Xen 
runtime library - libxencall
  ii  libxendevicemodel1:amd64   4.14.5+24-g87d90d511c-1 amd64Xen 
runtime libraries - libxendevicemodel
  ii  libxendevicemodel1:arm64   4.14.5+24-g87d90d511c-1 arm64Xen 
runtime libraries - libxendevicemodel
  ii  libxenevtchn1:amd644.14.5+24-g87d90d511c-1 amd64Xen 
runtime libraries - libxenevtchn
  ii  libxenevtchn1:arm644.14.5+24-g87d90d511c-1 arm64Xen 
runtime libraries - libxenevtchn
  ii  libxenforeignmemory1:amd64 4.14.5+24-g87d90d511c-1 amd64Xen 
runtime libraries - libxenforeignmemory
  ii  libxenforeignmemory1:arm64 4.14.5+24-g87d90d511c-1 arm64Xen 
runtime libraries - libxenforeignmemory
  ii  libxengnttab1:amd644.14.5+24-g87d90d511c-1 amd64Xen 
runtime libraries - libxengnttab
  ii  libxengnttab1:arm644.14.5+24-g87d90d511c-1 arm64Xen 
runtime libraries - libxengnttab
  ii  libxenhypfs1:amd64 4.14.5+24-g87d90d511c-1 amd64Xen 
runtime library - libxenhypfs
  ii  libxenhypfs1:arm64 4.14.5+24-g87d90d511c-1 arm64Xen 
runtime library - libxenhypfs
  ii  libxenmisc4.14:amd64   4.14.5+24-g87d90d511c-1 amd64Xen 
runtime libraries - miscellaneous, versioned ABI
  ii  libxenmisc4.14:arm64   4.14.5+24-g87d90d511c-1 arm64Xen 
runtime libraries - miscellaneous, versioned ABI
  ii  libxenstore3.0:amd64   4.14.5+24-g87d90d511c-1 amd64Xen 
runtime libraries - libxenstore
  ii  libxenstore3.0:arm64   4.14.5+24-g87d90d511c-1 arm64Xen 
runtime libraries - libxenstore
  ii  libxentoolcore1:amd64  4.14.5+24-g87d90d511c-1 amd64Xen 
runtime libraries - libxentoolcore
  ii  libxentoolcore1:arm64  4.14.5+24-g87d90d511c-1 arm64Xen 
runtime libraries - libxentoolcore
  ii  libxentoollog1:amd64   4.14.5+24-g87d90d511c-1 amd64Xen 
runtime libraries - libxentoollog
  ii  libxentoollog1:arm64   4.14.5+24-g87d90d511c-1 arm64Xen 
runtime libraries - libxentoollog

But also a bunch of cross builds on the CI system:

  https://gitlab.com/stsquad/qemu/-/pipelines/677956972/failures

>
>  
>
> Regards,
>
> Vikram
>
>  
>
> From: Alex Bennée 
> Date: Thursday, October 27, 2022 at 2:24 AM
> To: Garhwal, Vikram 
> Cc: qemu-devel@nongnu.org , Stabellini, Stefano 
> , Stefano
> Stabellini , Anthony Perard 
> , Paul Durrant ,
> xen-de...@lists.xenproject.org 
> Subject: Re: [PATCH v1 09/12] accel/xen/xen-all: export 
> xenstore_record_dm_state
>
> Vikram Garhwal  writes:
>
>> xenstore_record_dm_state() will also be used in aarch64 xenpv machine.
>>
>> Signed-off-by: Vikram Garhwal 
>> Signed-off-by: Stefano Stabellini 
>> ---
>>  accel/xen/xen-all.c  | 2 +-
>>  include/hw/xen/xen.h | 2 ++
>>  2 files changed, 3 insertions(+), 1 deletion(-)
>>
>> diff --git a/accel/xen/xen-all.c b/accel/xen/xen-all.c
>> index 69aa7d018b..276625b78b 100644
>> --- a/accel/xen/xen-all.c
>> +++ b/accel/xen/xen-all.c
>> @@ -100,7 +100,7 @@ void xenstore_store_pv_console_info(int i, Chardev *chr)
>>  }
>>  
>>  
>> -static void xenstore_record_dm_state(struct xs_handle *xs, const char 
>> *state)
>> +void xenstore_record_dm_state(struct xs_handle *xs, const char *state)
>>  {
>>  char path[50];
>>  
>> diff --git a/include/hw/xen/xen.h b/include/hw/xen/xen.h
>> index afdf9c436a..31e9538a5c 100644
>> --- a/include/hw/xen/xen.h
>> +++ b/include/hw/xen/xen.h
>> @@ -9,6 +9,7 @@
>>   */
>>  
>>  #include "exec/cpu-common.h"
>> +#include 
>
> This is breaking a bunch of the builds and generally we try and avoid
> adding system includes in headers (apart from osdep.h) for this reason.
> In fact there is a comment just above to that fact.
>
> I think you can just add struct xs_handle to typedefs.h (or maybe just
> xen.h) and directly i

Re: [PATCH v3 16/24] disas/nanomips: Remove IMMEDIATE functions

2022-11-01 Thread Philippe Mathieu-Daudé

On 1/11/22 12:28, Stefan Weil wrote:

Am 01.11.22 um 10:27 schrieb Philippe Mathieu-Daudé:


On 1/11/22 09:28, Stefan Weil via wrote:

Am 12.09.22 um 14:26 schrieb Milica Lazarevic:

Both versions of IMMEDIATE functions have been removed.

Before this patch, we'd been calling img_format twice, the first time
through the IMMEDIATE to get an appropriate string and the second time
to print that string. There's no more need for that. Therefore, 
calls to

IMMEDIATE are removed, and now we're directly printing the integer
values instead.

Signed-off-by: Milica Lazarevic 
---
  disas/nanomips.cpp | 756 
-

  1 file changed, 265 insertions(+), 491 deletions(-)

diff --git a/disas/nanomips.cpp b/disas/nanomips.cpp
index 816155527d..441204bb84 100644
--- a/disas/nanomips.cpp
+++ b/disas/nanomips.cpp

[...]
@@ -3305,11 +3271,9 @@ static char *CACHE(uint64 instruction, 
Dis_info *info)

  uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
  int64 s_value = extract_s__se8_15_7_6_5_4_3_2_1_0(instruction);
-    char *op = IMMEDIATE(op_value);
-    char *s = IMMEDIATE(s_value);
  const char *rs = GPR(rs_value);
-    return img_format("CACHE %s, %s(%s)", op, s, rs);
+    return img_format("CACHE 0x%" PRIx64 ", %s(%s)", op_value, 
s_value, rs);

  }
@@ -3329,11 +3293,9 @@ static char *CACHEE(uint64 instruction, 
Dis_info *info)

  uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
  int64 s_value = extract_s__se8_15_7_6_5_4_3_2_1_0(instruction);
-    char *op = IMMEDIATE(op_value);
-    char *s = IMMEDIATE(s_value);
  const char *rs = GPR(rs_value);
-    return img_format("CACHEE %s, %s(%s)", op, s, rs);
+    return img_format("CACHEE 0x%" PRIx64 ", %s(%s)", op_value, 
s_value, rs);

  }


Do we really want to format "int64 s_value" as a string? The code now 
has lots of wrong format strings. Add the patch below to get the 
compiler report.


We once had a discussion about using G_GNUC_PRINTF for local 
functions or not. I think that this example clearly shows that it 
should be mandatory.


Yes. The problem here is nobody wants to maintain this code, but we
inherited it. IIUC this series doesn't make it worst, it just remove
the C++ dependency on UNIX-based hosts.



I expect that "%s" with an int64 s_value will cause a crash while the 
old code worked, so things are worse now and should be fixed for the 
release. If nobody maintains that code, I can try to prepare a patch.


Well since I merged I already prepared some patches to fix this.



Re: [PATCH v9 1/8] mm: Introduce memfd_restricted system call to create restricted user memory

2022-11-01 Thread Chao Peng
On Mon, Oct 31, 2022 at 12:47:38PM -0500, Michael Roth wrote:
> On Tue, Oct 25, 2022 at 11:13:37PM +0800, Chao Peng wrote:
> > From: "Kirill A. Shutemov" 
> > 
> > Introduce 'memfd_restricted' system call with the ability to create
> > memory areas that are restricted from userspace access through ordinary
> > MMU operations (e.g. read/write/mmap). The memory content is expected to
> > be used through a new in-kernel interface by a third kernel module.
> > 
> > memfd_restricted() is useful for scenarios where a file descriptor(fd)
> > can be used as an interface into mm but want to restrict userspace's
> > ability on the fd. Initially it is designed to provide protections for
> > KVM encrypted guest memory.
> > 
> > Normally KVM uses memfd memory via mmapping the memfd into KVM userspace
> > (e.g. QEMU) and then using the mmaped virtual address to setup the
> > mapping in the KVM secondary page table (e.g. EPT). With confidential
> > computing technologies like Intel TDX, the memfd memory may be encrypted
> > with special key for special software domain (e.g. KVM guest) and is not
> > expected to be directly accessed by userspace. Precisely, userspace
> > access to such encrypted memory may lead to host crash so should be
> > prevented.
> > 
> > memfd_restricted() provides semantics required for KVM guest encrypted
> > memory support that a fd created with memfd_restricted() is going to be
> > used as the source of guest memory in confidential computing environment
> > and KVM can directly interact with core-mm without the need to expose
> > the memoy content into KVM userspace.
> > 
> > KVM userspace is still in charge of the lifecycle of the fd. It should
> > pass the created fd to KVM. KVM uses the new restrictedmem_get_page() to
> > obtain the physical memory page and then uses it to populate the KVM
> > secondary page table entries.
> > 
> > The userspace restricted memfd can be fallocate-ed or hole-punched
> > from userspace. When these operations happen, KVM can get notified
> > through restrictedmem_notifier, it then gets chance to remove any
> > mapped entries of the range in the secondary page tables.
> > 
> > memfd_restricted() itself is implemented as a shim layer on top of real
> > memory file systems (currently tmpfs). Pages in restrictedmem are marked
> > as unmovable and unevictable, this is required for current confidential
> > usage. But in future this might be changed.
> > 
> > By default memfd_restricted() prevents userspace read, write and mmap.
> > By defining new bit in the 'flags', it can be extended to support other
> > restricted semantics in the future.
> > 
> > The system call is currently wired up for x86 arch.
> > 
> > Signed-off-by: Kirill A. Shutemov 
> > Signed-off-by: Chao Peng 
> > ---
> >  arch/x86/entry/syscalls/syscall_32.tbl |   1 +
> >  arch/x86/entry/syscalls/syscall_64.tbl |   1 +
> >  include/linux/restrictedmem.h  |  62 ++
> >  include/linux/syscalls.h   |   1 +
> >  include/uapi/asm-generic/unistd.h  |   5 +-
> >  include/uapi/linux/magic.h |   1 +
> >  kernel/sys_ni.c|   3 +
> >  mm/Kconfig |   4 +
> >  mm/Makefile|   1 +
> >  mm/restrictedmem.c | 250 +
> >  10 files changed, 328 insertions(+), 1 deletion(-)
> >  create mode 100644 include/linux/restrictedmem.h
> >  create mode 100644 mm/restrictedmem.c
> > 
> > diff --git a/arch/x86/entry/syscalls/syscall_32.tbl 
> > b/arch/x86/entry/syscalls/syscall_32.tbl
> > index 320480a8db4f..dc70ba90247e 100644
> > --- a/arch/x86/entry/syscalls/syscall_32.tbl
> > +++ b/arch/x86/entry/syscalls/syscall_32.tbl
> > @@ -455,3 +455,4 @@
> >  448i386process_mreleasesys_process_mrelease
> >  449i386futex_waitv sys_futex_waitv
> >  450i386set_mempolicy_home_node 
> > sys_set_mempolicy_home_node
> > +451i386memfd_restrictedsys_memfd_restricted
> > diff --git a/arch/x86/entry/syscalls/syscall_64.tbl 
> > b/arch/x86/entry/syscalls/syscall_64.tbl
> > index c84d12608cd2..06516abc8318 100644
> > --- a/arch/x86/entry/syscalls/syscall_64.tbl
> > +++ b/arch/x86/entry/syscalls/syscall_64.tbl
> > @@ -372,6 +372,7 @@
> >  448common  process_mreleasesys_process_mrelease
> >  449common  futex_waitv sys_futex_waitv
> >  450common  set_mempolicy_home_node sys_set_mempolicy_home_node
> > +451common  memfd_restrictedsys_memfd_restricted
> >  
> >  #
> >  # Due to a historical design error, certain syscalls are numbered 
> > differently
> > diff --git a/include/linux/restrictedmem.h b/include/linux/restrictedmem.h
> > new file mode 100644
> > index ..9c37c3ea3180
> > --- /dev/null
> > +++ b/include/linux/restrictedmem.h
> > @@ -0,0 +1,62 @@
> > +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
> > +#ifndef _LINUX_RESTRIC

Re: [PATCH v9 7/8] KVM: Handle page fault for private memory

2022-11-01 Thread Chao Peng
On Mon, Oct 31, 2022 at 05:02:50PM -0700, Isaku Yamahata wrote:
> On Fri, Oct 28, 2022 at 02:55:45PM +0800,
> Chao Peng  wrote:
> 
> > On Wed, Oct 26, 2022 at 02:54:25PM -0700, Isaku Yamahata wrote:
> > > On Tue, Oct 25, 2022 at 11:13:43PM +0800,
> > > Chao Peng  wrote:
> > > 
> > > > A memslot with KVM_MEM_PRIVATE being set can include both fd-based
> > > > private memory and hva-based shared memory. Architecture code (like TDX
> > > > code) can tell whether the on-going fault is private or not. This patch
> > > > adds a 'is_private' field to kvm_page_fault to indicate this and
> > > > architecture code is expected to set it.
> > > > 
> > > > To handle page fault for such memslot, the handling logic is different
> > > > depending on whether the fault is private or shared. KVM checks if
> > > > 'is_private' matches the host's view of the page (maintained in
> > > > mem_attr_array).
> > > >   - For a successful match, private pfn is obtained with
> > > > restrictedmem_get_page () from private fd and shared pfn is obtained
> > > > with existing get_user_pages().
> > > >   - For a failed match, KVM causes a KVM_EXIT_MEMORY_FAULT exit to
> > > > userspace. Userspace then can convert memory between private/shared
> > > > in host's view and retry the fault.
> > > > 
> > > > Co-developed-by: Yu Zhang 
> > > > Signed-off-by: Yu Zhang 
> > > > Signed-off-by: Chao Peng 
> > > > ---
> > > >  arch/x86/kvm/mmu/mmu.c  | 56 +++--
> > > >  arch/x86/kvm/mmu/mmu_internal.h | 14 -
> > > >  arch/x86/kvm/mmu/mmutrace.h |  1 +
> > > >  arch/x86/kvm/mmu/spte.h |  6 
> > > >  arch/x86/kvm/mmu/tdp_mmu.c  |  3 +-
> > > >  include/linux/kvm_host.h| 28 +
> > > >  6 files changed, 103 insertions(+), 5 deletions(-)
> > > > 
> > > > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> > > > index 67a9823a8c35..10017a9f26ee 100644
> > > > --- a/arch/x86/kvm/mmu/mmu.c
> > > > +++ b/arch/x86/kvm/mmu/mmu.c
> > > > @@ -3030,7 +3030,7 @@ static int host_pfn_mapping_level(struct kvm 
> > > > *kvm, gfn_t gfn,
> > > >  
> > > >  int kvm_mmu_max_mapping_level(struct kvm *kvm,
> > > >   const struct kvm_memory_slot *slot, gfn_t 
> > > > gfn,
> > > > - int max_level)
> > > > + int max_level, bool is_private)
> > > >  {
> > > > struct kvm_lpage_info *linfo;
> > > > int host_level;
> > > > @@ -3042,6 +3042,9 @@ int kvm_mmu_max_mapping_level(struct kvm *kvm,
> > > > break;
> > > > }
> > > >  
> > > > +   if (is_private)
> > > > +   return max_level;
> > > 
> > > Below PG_LEVEL_NUM is passed by zap_collapsible_spte_range().  It doesn't 
> > > make
> > > sense.
> > > 
> > > > +
> > > > if (max_level == PG_LEVEL_4K)
> > > > return PG_LEVEL_4K;
> > > >  
> > > > @@ -3070,7 +3073,8 @@ void kvm_mmu_hugepage_adjust(struct kvm_vcpu 
> > > > *vcpu, struct kvm_page_fault *fault
> > > >  * level, which will be used to do precise, accurate accounting.
> > > >  */
> > > > fault->req_level = kvm_mmu_max_mapping_level(vcpu->kvm, slot,
> > > > -fault->gfn, 
> > > > fault->max_level);
> > > > +fault->gfn, 
> > > > fault->max_level,
> > > > +fault->is_private);
> > > > if (fault->req_level == PG_LEVEL_4K || 
> > > > fault->huge_page_disallowed)
> > > > return;
> > > >  
> > > > @@ -4141,6 +4145,32 @@ void kvm_arch_async_page_ready(struct kvm_vcpu 
> > > > *vcpu, struct kvm_async_pf *work)
> > > > kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true);
> > > >  }
> > > >  
> > > > +static inline u8 order_to_level(int order)
> > > > +{
> > > > +   BUILD_BUG_ON(KVM_MAX_HUGEPAGE_LEVEL > PG_LEVEL_1G);
> > > > +
> > > > +   if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G))
> > > > +   return PG_LEVEL_1G;
> > > > +
> > > > +   if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M))
> > > > +   return PG_LEVEL_2M;
> > > > +
> > > > +   return PG_LEVEL_4K;
> > > > +}
> > > > +
> > > > +static int kvm_faultin_pfn_private(struct kvm_page_fault *fault)
> > > > +{
> > > > +   int order;
> > > > +   struct kvm_memory_slot *slot = fault->slot;
> > > > +
> > > > +   if (kvm_restricted_mem_get_pfn(slot, fault->gfn, &fault->pfn, 
> > > > &order))
> > > > +   return RET_PF_RETRY;
> > > > +
> > > > +   fault->max_level = min(order_to_level(order), fault->max_level);
> > > > +   fault->map_writable = !(slot->flags & KVM_MEM_READONLY);
> > > > +   return RET_PF_CONTINUE;
> > > > +}
> > > > +
> > > >  static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct 
> > > > kvm_page_fault *fault)
> > > >  {
> > > > struct kvm_memory_slot *s

[PATCH 3/5] disas/nanomips: Use G_GNUC_PRINTF to avoid invalid string formats

2022-11-01 Thread Philippe Mathieu-Daudé
Suggested-by: Stefan Weil 
Signed-off-by: Philippe Mathieu-Daudé 
---
 disas/nanomips.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/disas/nanomips.c b/disas/nanomips.c
index e4b21e7c45..3f45447292 100644
--- a/disas/nanomips.c
+++ b/disas/nanomips.c
@@ -95,7 +95,7 @@ typedef struct Pool {
 #define IMGASSERTONCE(test)
 
 
-static char *img_format(const char *format, ...)
+static char * G_GNUC_PRINTF(1, 2) img_format(const char *format, ...)
 {
 char *buffer;
 va_list args;
-- 
2.37.3




[PATCH 0/5] disas/nanomips: Format string fixes

2022-11-01 Thread Philippe Mathieu-Daudé
Fix invalid string formats reported by Stefan:
https://lore.kernel.org/qemu-devel/78553699-00c1-ad69-1d58-02f75a1f4...@weilnetz.de/

Philippe Mathieu-Daudé (5):
  disas/nanomips: Fix invalid PRId64 format calling img_format()
  disas/nanomips: Fix invalid PRIx64 format calling img_format()
  disas/nanomips: Use G_GNUC_PRINTF to avoid invalid string formats
  disas/nanomips: Remove headers already included by "qemu/osdep.h"
  MAINTAINERS: Inherit from nanoMIPS

 MAINTAINERS  |  8 +---
 disas/nanomips.c | 44 +++-
 2 files changed, 24 insertions(+), 28 deletions(-)

-- 
2.37.3




[PATCH 2/5] disas/nanomips: Fix invalid PRIx64 format calling img_format()

2022-11-01 Thread Philippe Mathieu-Daudé
Fix:

  disas/nanomips.c:12231:62: warning: format specifies type 'char *' but the 
argument has type 'uint64' (aka 'unsigned long long') [-Wformat]
return img_format("RESTOREF 0x%" PRIx64 ", %s", u_value, count_value);
   ~~^~~
   %llu

Fixes: 4066c152b3 ("disas/nanomips: Remove IMMEDIATE functions")
Reported-by: Stefan Weil 
Signed-off-by: Philippe Mathieu-Daudé 
---
 disas/nanomips.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/disas/nanomips.c b/disas/nanomips.c
index 6466c80dc5..e4b21e7c45 100644
--- a/disas/nanomips.c
+++ b/disas/nanomips.c
@@ -12235,7 +12235,8 @@ static char *RESTOREF(uint64 instruction, Dis_info 
*info)
 uint64 u_value = extract_u_11_10_9_8_7_6_5_4_3__s3(instruction);
 
 
-return img_format("RESTOREF 0x%" PRIx64 ", %s", u_value, count_value);
+return img_format("RESTOREF 0x%" PRIx64 ", 0x%" PRIx64,
+  u_value, count_value);
 }
 
 
-- 
2.37.3




[PATCH 1/5] disas/nanomips: Fix invalid PRId64 format calling img_format()

2022-11-01 Thread Philippe Mathieu-Daudé
Fix warnings such:

  disas/nanomips.c:3251:64: warning: format specifies type 'char *' but the 
argument has type 'int64' (aka 'long long') [-Wformat]
return img_format("CACHE 0x%" PRIx64 ", %s(%s)", op_value, s_value, rs);
~~ ^~~
%lld

Fixes: 4066c152b3 ("disas/nanomips: Remove IMMEDIATE functions")
Reported-by: Stefan Weil 
Signed-off-by: Philippe Mathieu-Daudé 
---
 disas/nanomips.c | 35 ---
 1 file changed, 20 insertions(+), 15 deletions(-)

diff --git a/disas/nanomips.c b/disas/nanomips.c
index 9647f1a8e3..6466c80dc5 100644
--- a/disas/nanomips.c
+++ b/disas/nanomips.c
@@ -3252,7 +3252,8 @@ static char *CACHE(uint64 instruction, Dis_info *info)
 
 const char *rs = GPR(rs_value, info);
 
-return img_format("CACHE 0x%" PRIx64 ", %s(%s)", op_value, s_value, rs);
+return img_format("CACHE 0x%" PRIx64 ", %" PRId64 "(%s)",
+  op_value, s_value, rs);
 }
 
 
@@ -3274,7 +3275,8 @@ static char *CACHEE(uint64 instruction, Dis_info *info)
 
 const char *rs = GPR(rs_value, info);
 
-return img_format("CACHEE 0x%" PRIx64 ", %s(%s)", op_value, s_value, rs);
+return img_format("CACHEE 0x%" PRIx64 ", %" PRId64 "(%s)",
+  op_value, s_value, rs);
 }
 
 
@@ -5173,7 +5175,7 @@ static char *DADDIU_48_(uint64 instruction, Dis_info 
*info)
 
 const char *rt = GPR(rt_value, info);
 
-return img_format("DADDIU %s, %s", rt, s_value);
+return img_format("DADDIU %s, %" PRId64, rt, s_value);
 }
 
 
@@ -11859,7 +11861,7 @@ static char *PREF_S9_(uint64 instruction, Dis_info 
*info)
 
 const char *rs = GPR(rs_value, info);
 
-return img_format("PREF 0x%" PRIx64 ", %s(%s)",
+return img_format("PREF 0x%" PRIx64 ", %" PRId64 "(%s)",
   hint_value, s_value, rs);
 }
 
@@ -11905,7 +11907,8 @@ static char *PREFE(uint64 instruction, Dis_info *info)
 
 const char *rs = GPR(rs_value, info);
 
-return img_format("PREFE 0x%" PRIx64 ", %s(%s)", hint_value, s_value, rs);
+return img_format("PREFE 0x%" PRIx64 ", %" PRId64 "(%s)",
+  hint_value, s_value, rs);
 }
 
 
@@ -12079,7 +12082,7 @@ static char *REPL_PH(uint64 instruction, Dis_info *info)
 
 const char *rt = GPR(rt_value, info);
 
-return img_format("REPL.PH %s, %s", rt, s_value);
+return img_format("REPL.PH %s, %" PRId64, rt, s_value);
 }
 
 
@@ -12613,7 +12616,7 @@ static char *SB_S9_(uint64 instruction, Dis_info *info)
 const char *rt = GPR(rt_value, info);
 const char *rs = GPR(rs_value, info);
 
-return img_format("SB %s, %s(%s)", rt, s_value, rs);
+return img_format("SB %s, %" PRId64 "(%s)", rt, s_value, rs);
 }
 
 
@@ -12659,7 +12662,7 @@ static char *SBE(uint64 instruction, Dis_info *info)
 const char *rt = GPR(rt_value, info);
 const char *rs = GPR(rs_value, info);
 
-return img_format("SBE %s, %s(%s)", rt, s_value, rs);
+return img_format("SBE %s, %" PRId64 "(%s)", rt, s_value, rs);
 }
 
 
@@ -12706,7 +12709,7 @@ static char *SC(uint64 instruction, Dis_info *info)
 const char *rt = GPR(rt_value, info);
 const char *rs = GPR(rs_value, info);
 
-return img_format("SC %s, %s(%s)", rt, s_value, rs);
+return img_format("SC %s, %" PRId64 "(%s)", rt, s_value, rs);
 }
 
 
@@ -12729,7 +12732,7 @@ static char *SCD(uint64 instruction, Dis_info *info)
 const char *rt = GPR(rt_value, info);
 const char *rs = GPR(rs_value, info);
 
-return img_format("SCD %s, %s(%s)", rt, s_value, rs);
+return img_format("SCD %s, %" PRId64 "(%s)", rt, s_value, rs);
 }
 
 
@@ -12776,7 +12779,7 @@ static char *SCE(uint64 instruction, Dis_info *info)
 const char *rt = GPR(rt_value, info);
 const char *rs = GPR(rs_value, info);
 
-return img_format("SCE %s, %s(%s)", rt, s_value, rs);
+return img_format("SCE %s, %" PRId64 "(%s)", rt, s_value, rs);
 }
 
 
@@ -12868,7 +12871,7 @@ static char *SD_S9_(uint64 instruction, Dis_info *info)
 const char *rt = GPR(rt_value, info);
 const char *rs = GPR(rs_value, info);
 
-return img_format("SD %s, %s(%s)", rt, s_value, rs);
+return img_format("SD %s, %" PRId64 "(%s)", rt, s_value, rs);
 }
 
 
@@ -12973,7 +12976,7 @@ static char *SDC1_S9_(uint64 instruction, Dis_info 
*info)
 const char *ft = FPR(ft_value, info);
 const char *rs = GPR(rs_value, info);
 
-return img_format("SDC1 %s, %s(%s)", ft, s_value, rs);
+return img_format("SDC1 %s, %" PRId64 "(%s)", ft, s_value, rs);
 }
 
 
@@ -13066,7 +13069,8 @@ static char *SDC2(uint64 instruction, Dis_info *info)
 
 const char *rs = GPR(rs_value, info);
 
-return img_format("SDC2 CP%" PRIu64 ", %s(%s)", cs_value, s_value, rs);
+return img_format("SDC2 CP%" PRIu64 ", %" PRId64 "(%s)",
+  cs_value, s_value, rs);
 }
 
 
@@ -13091,7 +13095,8 @@ static char *SDM(uint64 instruction, Dis_info *info)
 const 

[PATCH 5/5] MAINTAINERS: Inherit from nanoMIPS

2022-11-01 Thread Philippe Mathieu-Daudé
6 months ago Stefan Pejic stepped in as nanoMIPS maintainer
(see commit a 8e0e23445a "target/mips: Undeprecate nanoMIPS
ISA support in QEMU"), however today his email is bouncing:

  ** Message blocked **

  Your message to stefan.pe...@syrmia.com has been blocked. See technical 
details below for more information.

  The response from the remote server was:
  550 5.4.1 Recipient address rejected: Access denied. AS(201806281) 
[DBAEUR03FT030.eop-EUR03.prod.protection.outlook.com]

To avoid unmaintained code, I feel forced to merge this code
back with the generic MIPS section.

Historical references:
- 
https://lore.kernel.org/qemu-devel/ty0pr03mb679726901bd6c6be40114a2fe2...@ty0pr03mb6797.apcprd03.prod.outlook.com/
- 
https://lore.kernel.org/qemu-devel/b858a20e97b74e7b90a94948314d0...@mtkmbs62n2.mediatek.inc/

Cc: Vince Del Vecchio 
Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Richard Henderson 
Message-Id: <49f41916-687f-b9e5-2de7-9c658fe0d...@linaro.org>
---
 MAINTAINERS | 8 +---
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index c41d8d65e2..0fa3c92b29 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -237,16 +237,10 @@ R: Jiaxun Yang 
 R: Aleksandar Rikalo 
 S: Odd Fixes
 F: target/mips/
-F: disas/mips.c
+F: disas/*mips.c
 F: docs/system/cpu-models-mips.rst.inc
 F: tests/tcg/mips/
 
-MIPS TCG CPUs (nanoMIPS ISA)
-M: Stefan Pejic 
-S: Maintained
-F: disas/nanomips.*
-F: target/mips/tcg/*nanomips*
-
 NiosII TCG CPUs
 M: Chris Wulff 
 M: Marek Vasut 
-- 
2.37.3




[PATCH 4/5] disas/nanomips: Remove headers already included by "qemu/osdep.h"

2022-11-01 Thread Philippe Mathieu-Daudé
Signed-off-by: Philippe Mathieu-Daudé 
---
 disas/nanomips.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/disas/nanomips.c b/disas/nanomips.c
index 3f45447292..821d4f8832 100644
--- a/disas/nanomips.c
+++ b/disas/nanomips.c
@@ -30,10 +30,6 @@
 #include "qemu/osdep.h"
 #include "disas/dis-asm.h"
 
-#include 
-#include 
-#include 
-
 typedef int64_t int64;
 typedef uint64_t uint64;
 typedef uint32_t uint32;
-- 
2.37.3




Re: [PATCH v4 1/3] util/main-loop: Fix maximum number of wait objects for win32

2022-11-01 Thread Daniel P . Berrangé
On Tue, Nov 01, 2022 at 09:14:55AM +0800, Bin Meng wrote:
> Hi Daniel,
> 
> On Wed, Oct 26, 2022 at 12:41 AM Bin Meng  wrote:
> >
> > On Wed, Oct 19, 2022 at 6:20 PM Bin Meng  wrote:
> > >
> > > From: Bin Meng 
> > >
> > > The maximum number of wait objects for win32 should be
> > > MAXIMUM_WAIT_OBJECTS, not MAXIMUM_WAIT_OBJECTS + 1.
> > >
> > > Signed-off-by: Bin Meng 
> > > ---
> > >
> > > Changes in v4:
> > > - make the out of bounds access protection explicit
> > >
> > > Changes in v3:
> > > - move the check of adding the same HANDLE twice to a separete patch
> > >
> > > Changes in v2:
> > > - fix the logic in qemu_add_wait_object() to avoid adding
> > >   the same HANDLE twice
> > >
> > >  util/main-loop.c | 10 +-
> > >  1 file changed, 5 insertions(+), 5 deletions(-)
> > >
> >
> > Ping?
> 
> Would you queue this series? Thanks!

The main loop is not my area as maintainer - it would normally be
Paolo IIRC.

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




Re: [PATCH 1/5] disas/nanomips: Fix invalid PRId64 format calling img_format()

2022-11-01 Thread Stefan Weil via

Am 01.11.22 um 12:44 schrieb Philippe Mathieu-Daudé:


Fix warnings such:

   disas/nanomips.c:3251:64: warning: format specifies type 'char *' but the 
argument has type 'int64' (aka 'long long') [-Wformat]
 return img_format("CACHE 0x%" PRIx64 ", %s(%s)", op_value, s_value, rs);
 ~~ ^~~
 %lld

Fixes: 4066c152b3 ("disas/nanomips: Remove IMMEDIATE functions")
Reported-by: Stefan Weil 
Signed-off-by: Philippe Mathieu-Daudé 
---



Reviewed-by: Stefan Weil 



  disas/nanomips.c | 35 ---
  1 file changed, 20 insertions(+), 15 deletions(-)

diff --git a/disas/nanomips.c b/disas/nanomips.c
index 9647f1a8e3..6466c80dc5 100644
--- a/disas/nanomips.c
+++ b/disas/nanomips.c
@@ -3252,7 +3252,8 @@ static char *CACHE(uint64 instruction, Dis_info *info)
  
  const char *rs = GPR(rs_value, info);
  
-return img_format("CACHE 0x%" PRIx64 ", %s(%s)", op_value, s_value, rs);

+return img_format("CACHE 0x%" PRIx64 ", %" PRId64 "(%s)",
+  op_value, s_value, rs);
  }
  
  
@@ -3274,7 +3275,8 @@ static char *CACHEE(uint64 instruction, Dis_info *info)
  
  const char *rs = GPR(rs_value, info);
  
-return img_format("CACHEE 0x%" PRIx64 ", %s(%s)", op_value, s_value, rs);

+return img_format("CACHEE 0x%" PRIx64 ", %" PRId64 "(%s)",
+  op_value, s_value, rs);
  }
  
  
@@ -5173,7 +5175,7 @@ static char *DADDIU_48_(uint64 instruction, Dis_info *info)
  
  const char *rt = GPR(rt_value, info);
  
-return img_format("DADDIU %s, %s", rt, s_value);

+return img_format("DADDIU %s, %" PRId64, rt, s_value);
  }
  
  
@@ -11859,7 +11861,7 @@ static char *PREF_S9_(uint64 instruction, Dis_info *info)
  
  const char *rs = GPR(rs_value, info);
  
-return img_format("PREF 0x%" PRIx64 ", %s(%s)",

+return img_format("PREF 0x%" PRIx64 ", %" PRId64 "(%s)",
hint_value, s_value, rs);
  }
  
@@ -11905,7 +11907,8 @@ static char *PREFE(uint64 instruction, Dis_info *info)
  
  const char *rs = GPR(rs_value, info);
  
-return img_format("PREFE 0x%" PRIx64 ", %s(%s)", hint_value, s_value, rs);

+return img_format("PREFE 0x%" PRIx64 ", %" PRId64 "(%s)",
+  hint_value, s_value, rs);
  }
  
  
@@ -12079,7 +12082,7 @@ static char *REPL_PH(uint64 instruction, Dis_info *info)
  
  const char *rt = GPR(rt_value, info);
  
-return img_format("REPL.PH %s, %s", rt, s_value);

+return img_format("REPL.PH %s, %" PRId64, rt, s_value);
  }
  
  
@@ -12613,7 +12616,7 @@ static char *SB_S9_(uint64 instruction, Dis_info *info)

  const char *rt = GPR(rt_value, info);
  const char *rs = GPR(rs_value, info);
  
-return img_format("SB %s, %s(%s)", rt, s_value, rs);

+return img_format("SB %s, %" PRId64 "(%s)", rt, s_value, rs);
  }
  
  
@@ -12659,7 +12662,7 @@ static char *SBE(uint64 instruction, Dis_info *info)

  const char *rt = GPR(rt_value, info);
  const char *rs = GPR(rs_value, info);
  
-return img_format("SBE %s, %s(%s)", rt, s_value, rs);

+return img_format("SBE %s, %" PRId64 "(%s)", rt, s_value, rs);
  }
  
  
@@ -12706,7 +12709,7 @@ static char *SC(uint64 instruction, Dis_info *info)

  const char *rt = GPR(rt_value, info);
  const char *rs = GPR(rs_value, info);
  
-return img_format("SC %s, %s(%s)", rt, s_value, rs);

+return img_format("SC %s, %" PRId64 "(%s)", rt, s_value, rs);
  }
  
  
@@ -12729,7 +12732,7 @@ static char *SCD(uint64 instruction, Dis_info *info)

  const char *rt = GPR(rt_value, info);
  const char *rs = GPR(rs_value, info);
  
-return img_format("SCD %s, %s(%s)", rt, s_value, rs);

+return img_format("SCD %s, %" PRId64 "(%s)", rt, s_value, rs);
  }
  
  
@@ -12776,7 +12779,7 @@ static char *SCE(uint64 instruction, Dis_info *info)

  const char *rt = GPR(rt_value, info);
  const char *rs = GPR(rs_value, info);
  
-return img_format("SCE %s, %s(%s)", rt, s_value, rs);

+return img_format("SCE %s, %" PRId64 "(%s)", rt, s_value, rs);
  }
  
  
@@ -12868,7 +12871,7 @@ static char *SD_S9_(uint64 instruction, Dis_info *info)

  const char *rt = GPR(rt_value, info);
  const char *rs = GPR(rs_value, info);
  
-return img_format("SD %s, %s(%s)", rt, s_value, rs);

+return img_format("SD %s, %" PRId64 "(%s)", rt, s_value, rs);
  }
  
  
@@ -12973,7 +12976,7 @@ static char *SDC1_S9_(uint64 instruction, Dis_info *info)

  const char *ft = FPR(ft_value, info);
  const char *rs = GPR(rs_value, info);
  
-return img_format("SDC1 %s, %s(%s)", ft, s_value, rs);

+return img_format("SDC1 %s, %" PRId64 "(%s)", ft, s_value, rs);
  }
  
  
@@ -13066,7 +13069,8 @@ static char *SDC2(uint64 instruction, Dis_info *info)
  
  const char *rs = GPR(rs_value, info);
  
-return img_format("SDC2 CP%" PRIu64 ", %s(%s)", cs_value, s_value, rs);

+return

Re: [PATCH 2/5] disas/nanomips: Fix invalid PRIx64 format calling img_format()

2022-11-01 Thread Stefan Weil via

Am 01.11.22 um 12:44 schrieb Philippe Mathieu-Daudé:


Fix:

   disas/nanomips.c:12231:62: warning: format specifies type 'char *' but the 
argument has type 'uint64' (aka 'unsigned long long') [-Wformat]
 return img_format("RESTOREF 0x%" PRIx64 ", %s", u_value, count_value);
~~^~~
%llu

Fixes: 4066c152b3 ("disas/nanomips: Remove IMMEDIATE functions")
Reported-by: Stefan Weil 
Signed-off-by: Philippe Mathieu-Daudé 
---
  disas/nanomips.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/disas/nanomips.c b/disas/nanomips.c
index 6466c80dc5..e4b21e7c45 100644
--- a/disas/nanomips.c
+++ b/disas/nanomips.c
@@ -12235,7 +12235,8 @@ static char *RESTOREF(uint64 instruction, Dis_info 
*info)
  uint64 u_value = extract_u_11_10_9_8_7_6_5_4_3__s3(instruction);
  
  
-return img_format("RESTOREF 0x%" PRIx64 ", %s", u_value, count_value);

+return img_format("RESTOREF 0x%" PRIx64 ", 0x%" PRIx64,
+  u_value, count_value);
  }



Reviewed-by: Stefan Weil 




Re: [PATCH 3/5] disas/nanomips: Use G_GNUC_PRINTF to avoid invalid string formats

2022-11-01 Thread Stefan Weil via

Am 01.11.22 um 12:44 schrieb Philippe Mathieu-Daudé:


Suggested-by: Stefan Weil 
Signed-off-by: Philippe Mathieu-Daudé 
---
  disas/nanomips.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/disas/nanomips.c b/disas/nanomips.c
index e4b21e7c45..3f45447292 100644
--- a/disas/nanomips.c
+++ b/disas/nanomips.c
@@ -95,7 +95,7 @@ typedef struct Pool {
  #define IMGASSERTONCE(test)
  
  
-static char *img_format(const char *format, ...)

+static char * G_GNUC_PRINTF(1, 2) img_format(const char *format, ...)
  {
  char *buffer;
  va_list args;



Reviewed-by: Stefan Weil 



OpenPGP_0xE08C21D5677450AD.asc
Description: OpenPGP public key


OpenPGP_signature
Description: OpenPGP digital signature


Re: [PATCH 4/5] disas/nanomips: Remove headers already included by "qemu/osdep.h"

2022-11-01 Thread Stefan Weil via


Am 01.11.22 um 12:44 schrieb Philippe Mathieu-Daudé:

Signed-off-by: Philippe Mathieu-Daudé 
---
  disas/nanomips.c | 4 
  1 file changed, 4 deletions(-)

diff --git a/disas/nanomips.c b/disas/nanomips.c
index 3f45447292..821d4f8832 100644
--- a/disas/nanomips.c
+++ b/disas/nanomips.c
@@ -30,10 +30,6 @@
  #include "qemu/osdep.h"
  #include "disas/dis-asm.h"
  
-#include 

-#include 
-#include 
-
  typedef int64_t int64;
  typedef uint64_t uint64;
  typedef uint32_t uint32;


Removing those three typedefs and replacing the related types would also 
be good (in another patch).


Reviewed-by: Stefan Weil 



OpenPGP_0xE08C21D5677450AD.asc
Description: OpenPGP public key


OpenPGP_signature
Description: OpenPGP digital signature


Re: [PATCH 0/4] hw: make TCO watchdog actually work by default for Q35

2022-11-01 Thread Daniel P . Berrangé
On Tue, Nov 01, 2022 at 01:57:24PM +0100, Igor Mammedov wrote:
> On Mon, 31 Oct 2022 11:48:58 -0400
> "Michael S. Tsirkin"  wrote:
> 
> > On Mon, Oct 31, 2022 at 01:50:24PM +, Daniel P. Berrangé wrote:
> > > On Mon, Oct 31, 2022 at 01:19:30PM +, Daniel P. Berrangé wrote:  
> > > > The TCO watchdog is unconditionally integrated into the Q35 machine
> > > > type by default, but at the same time is unconditionally disabled
> > > > from firing by a host config option that overrides guest OS attempts
> > > > to enable it. People have to know to set a magic -global to make
> > > > it non-broken  
> > > 
> > > Incidentally I found that originally the TCO watchdog was not
> > > unconditionally enabled. Its exposure to the guest could be
> > > turned on/off using
> > > 
> > >   -global ICH9-LPC.enable_tco=bool
> > > 
> > > This was implemented for machine type compat, but it also gave
> > > apps a way to disable the watchdog functionality. Unfortunately
> > > that ability was discarded in this series:
> > > 
> > >   
> > > https://lore.kernel.org/all/1453564933-29638-1-git-send-email-ehabk...@redhat.com/
> > > 
> > > but the 'enable_tco' property still exists in QOM, but silently
> > > ignored.
> > > 
> > > Seems we should either fix the impl of 'enable_tco', or remove the
> > > QOM property entirely, so we don't pretend it can be toggled anymore.
> > > 
> > > With regards,
> > > Daniel  
> > 
> > i am inclined to say you are right and the fix is to fix the impl.
> 
> Is there need for users to disable whatchdog at all?
> It was always present since then and no one complained, 
> so perhaps we should ditch property instead fixing it
> to keep it simple.

Thinking about it more, I think we should NOT fix the 'enable_tco' property,
because there will be no way for a mgmt appp to tell if they're using a
fixed or broken QEMU. So if they use 'enable_tco' on a broken QEMU and then
live migrate, they'll get an guest ABI change. If we did want to support
disabling it, then we should have a brand new property that apps can probe
for.

In the absence of a request to disable watchdog, I'd say we just delete
'enable_tco' right now. If someone wants it in future, we can add it with
a new name.

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




[PATCH v8 11/17] pci/shpc: Omit errp for pci_add_capability

2022-11-01 Thread Akihiko Odaki
Omitting errp for pci_add_capability() causes it to abort if
capabilities overlap. A caller of shpc_init(), which calls
pci_add_capability() in turn, is expected to ensure that will not
happen.

Signed-off-by: Akihiko Odaki 
---
 hw/pci-bridge/pci_bridge_dev.c  |  2 +-
 hw/pci-bridge/pcie_pci_bridge.c |  2 +-
 hw/pci/shpc.c   | 23 ++-
 include/hw/pci/shpc.h   |  3 +--
 4 files changed, 9 insertions(+), 21 deletions(-)

diff --git a/hw/pci-bridge/pci_bridge_dev.c b/hw/pci-bridge/pci_bridge_dev.c
index 657a06ddbe..4b6d1876eb 100644
--- a/hw/pci-bridge/pci_bridge_dev.c
+++ b/hw/pci-bridge/pci_bridge_dev.c
@@ -66,7 +66,7 @@ static void pci_bridge_dev_realize(PCIDevice *dev, Error 
**errp)
 dev->config[PCI_INTERRUPT_PIN] = 0x1;
 memory_region_init(&bridge_dev->bar, OBJECT(dev), "shpc-bar",
shpc_bar_size(dev));
-err = shpc_init(dev, &br->sec_bus, &bridge_dev->bar, 0, errp);
+err = shpc_init(dev, &br->sec_bus, &bridge_dev->bar, 0);
 if (err) {
 goto shpc_error;
 }
diff --git a/hw/pci-bridge/pcie_pci_bridge.c b/hw/pci-bridge/pcie_pci_bridge.c
index df5dfdd139..99778e3e24 100644
--- a/hw/pci-bridge/pcie_pci_bridge.c
+++ b/hw/pci-bridge/pcie_pci_bridge.c
@@ -42,7 +42,7 @@ static void pcie_pci_bridge_realize(PCIDevice *d, Error 
**errp)
 d->config[PCI_INTERRUPT_PIN] = 0x1;
 memory_region_init(&pcie_br->shpc_bar, OBJECT(d), "shpc-bar",
shpc_bar_size(d));
-rc = shpc_init(d, &br->sec_bus, &pcie_br->shpc_bar, 0, errp);
+rc = shpc_init(d, &br->sec_bus, &pcie_br->shpc_bar, 0);
 if (rc) {
 goto error;
 }
diff --git a/hw/pci/shpc.c b/hw/pci/shpc.c
index e71f3a7483..5b3228c793 100644
--- a/hw/pci/shpc.c
+++ b/hw/pci/shpc.c
@@ -440,16 +440,11 @@ static void shpc_cap_update_dword(PCIDevice *d)
 }
 
 /* Add SHPC capability to the config space for the device. */
-static int shpc_cap_add_config(PCIDevice *d, Error **errp)
+static void shpc_cap_add_config(PCIDevice *d)
 {
 uint8_t *config;
-int config_offset;
-config_offset = pci_add_capability(d, PCI_CAP_ID_SHPC,
-   0, SHPC_CAP_LENGTH,
-   errp);
-if (config_offset < 0) {
-return config_offset;
-}
+uint8_t config_offset;
+config_offset = pci_add_capability(d, PCI_CAP_ID_SHPC, 0, SHPC_CAP_LENGTH);
 config = d->config + config_offset;
 
 pci_set_byte(config + SHPC_CAP_DWORD_SELECT, 0);
@@ -459,7 +454,6 @@ static int shpc_cap_add_config(PCIDevice *d, Error **errp)
 /* Make dword select and data writable. */
 pci_set_byte(d->wmask + config_offset + SHPC_CAP_DWORD_SELECT, 0xff);
 pci_set_long(d->wmask + config_offset + SHPC_CAP_DWORD_DATA, 0x);
-return 0;
 }
 
 static uint64_t shpc_mmio_read(void *opaque, hwaddr addr,
@@ -584,18 +578,13 @@ void shpc_device_unplug_request_cb(HotplugHandler 
*hotplug_dev,
 }
 
 /* Initialize the SHPC structure in bridge's BAR. */
-int shpc_init(PCIDevice *d, PCIBus *sec_bus, MemoryRegion *bar,
-  unsigned offset, Error **errp)
+int shpc_init(PCIDevice *d, PCIBus *sec_bus, MemoryRegion *bar, unsigned 
offset)
 {
-int i, ret;
+int i;
 int nslots = SHPC_MAX_SLOTS; /* TODO: qdev property? */
 SHPCDevice *shpc = d->shpc = g_malloc0(sizeof(*d->shpc));
 shpc->sec_bus = sec_bus;
-ret = shpc_cap_add_config(d, errp);
-if (ret) {
-g_free(d->shpc);
-return ret;
-}
+shpc_cap_add_config(d);
 if (nslots < SHPC_MIN_SLOTS) {
 return 0;
 }
diff --git a/include/hw/pci/shpc.h b/include/hw/pci/shpc.h
index d5683b7399..18ab16ec9f 100644
--- a/include/hw/pci/shpc.h
+++ b/include/hw/pci/shpc.h
@@ -38,8 +38,7 @@ struct SHPCDevice {
 
 void shpc_reset(PCIDevice *d);
 int shpc_bar_size(PCIDevice *dev);
-int shpc_init(PCIDevice *dev, PCIBus *sec_bus, MemoryRegion *bar,
-  unsigned off, Error **errp);
+int shpc_init(PCIDevice *dev, PCIBus *sec_bus, MemoryRegion *bar, unsigned 
off);
 void shpc_cleanup(PCIDevice *dev, MemoryRegion *bar);
 void shpc_free(PCIDevice *dev);
 void shpc_cap_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int len);
-- 
2.38.1




[PATCH v8 14/17] hw/pci-bridge/pcie_pci_bridge: Omit errp for pci_add_capability

2022-11-01 Thread Akihiko Odaki
Omitting errp for pci_add_capability() causes it to abort if
capabilities overlap. This behavior is appropriate heare because all of
the capabilities set in this device are defined in the program and
their overlap should not happen unless there is a programming error.

Signed-off-by: Akihiko Odaki 
---
 hw/pci-bridge/pcie_pci_bridge.c | 11 +++
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/hw/pci-bridge/pcie_pci_bridge.c b/hw/pci-bridge/pcie_pci_bridge.c
index 99778e3e24..1b839465e7 100644
--- a/hw/pci-bridge/pcie_pci_bridge.c
+++ b/hw/pci-bridge/pcie_pci_bridge.c
@@ -35,7 +35,7 @@ static void pcie_pci_bridge_realize(PCIDevice *d, Error 
**errp)
 {
 PCIBridge *br = PCI_BRIDGE(d);
 PCIEPCIBridge *pcie_br = PCIE_PCI_BRIDGE_DEV(d);
-int rc, pos;
+int rc;
 
 pci_bridge_initfn(d, TYPE_PCI_BUS);
 
@@ -49,12 +49,8 @@ static void pcie_pci_bridge_realize(PCIDevice *d, Error 
**errp)
 
 pcie_cap_init(d, 0, PCI_EXP_TYPE_PCI_BRIDGE, 0);
 
-pos = pci_add_capability(d, PCI_CAP_ID_PM, 0, PCI_PM_SIZEOF, errp);
-if (pos < 0) {
-goto pm_error;
-}
-d->exp.pm_cap = pos;
-pci_set_word(d->config + pos + PCI_PM_PMC, 0x3);
+d->exp.pm_cap = pci_add_capability(d, PCI_CAP_ID_PM, 0, PCI_PM_SIZEOF);
+pci_set_word(d->config + d->exp.pm_cap + PCI_PM_PMC, 0x3);
 
 pcie_cap_arifwd_init(d);
 pcie_cap_deverr_init(d);
@@ -85,7 +81,6 @@ static void pcie_pci_bridge_realize(PCIDevice *d, Error 
**errp)
 msi_error:
 pcie_aer_exit(d);
 aer_error:
-pm_error:
 pcie_cap_exit(d);
 shpc_cleanup(d, &pcie_br->shpc_bar);
 error:
-- 
2.38.1




[PATCH v8 07/17] hw/nvme: Omit errp for pci_add_capability

2022-11-01 Thread Akihiko Odaki
Omitting errp for pci_add_capability() causes it to abort if
capabilities overlap. This behavior is appropriate here because all of
the capabilities set in this device are defined in the program and
their overlap should not happen unless there is a programming error.

Signed-off-by: Akihiko Odaki 
---
 hw/nvme/ctrl.c | 14 ++
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 87aeba0564..ff4e2beea6 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -7325,17 +7325,9 @@ static void nvme_init_sriov(NvmeCtrl *n, PCIDevice 
*pci_dev, uint16_t offset)
   PCI_BASE_ADDRESS_MEM_TYPE_64, bar_size);
 }
 
-static int nvme_add_pm_capability(PCIDevice *pci_dev, uint8_t offset)
+static void nvme_add_pm_capability(PCIDevice *pci_dev, uint8_t offset)
 {
-Error *err = NULL;
-int ret;
-
-ret = pci_add_capability(pci_dev, PCI_CAP_ID_PM, offset,
- PCI_PM_SIZEOF, &err);
-if (err) {
-error_report_err(err);
-return ret;
-}
+pci_add_capability(pci_dev, PCI_CAP_ID_PM, offset, PCI_PM_SIZEOF);
 
 pci_set_word(pci_dev->config + offset + PCI_PM_PMC,
  PCI_PM_CAP_VER_1_2);
@@ -7343,8 +7335,6 @@ static int nvme_add_pm_capability(PCIDevice *pci_dev, 
uint8_t offset)
  PCI_PM_CTRL_NO_SOFT_RESET);
 pci_set_word(pci_dev->wmask + offset + PCI_PM_CTRL,
  PCI_PM_CTRL_STATE_MASK);
-
-return 0;
 }
 
 static int nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp)
-- 
2.38.1




[PATCH v8 02/17] pci: Allow to omit errp for pci_add_capability

2022-11-01 Thread Akihiko Odaki
pci_add_capability appears most PCI devices. Its error handling required
lots of code, and led to inconsistent behaviors such as:
- passing error_abort
- passing error_fatal
- asserting the returned value
- propagating the error to the caller
- skipping the rest of the function
- just ignoring

The code generating errors in pci_add_capability had a comment which
says:
> Verify that capabilities don't overlap.  Note: device assignment
> depends on this check to verify that the device is not broken.
> Should never trigger for emulated devices, but it's helpful for
> debugging these.

Indeed vfio has some code that passes capability offsets and sizes from
a physical device, but it explicitly pays attention so that the
capabilities never overlap. Therefore, we can always assert that
capabilities never overlap when pci_add_capability is called, resolving
these inconsistencies.

Such an implementation of pci_add_capability will not have errp
parameter. However, there are so many callers of pci_add_capability
that it does not make sense to amend all of them at once to match
with the new signature. Instead, this change will allow callers of
pci_add_capability to omit errp as the first step.

Signed-off-by: Akihiko Odaki 
---
 hw/pci/pci.c |  8 
 include/hw/pci/pci.h | 13 ++---
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index b53649d1fd..cce57f572c 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2532,14 +2532,14 @@ bool pci_check_capability_overlap(PCIDevice *pdev, 
uint8_t cap_id,
 }
 
 /*
- * On success, pci_add_capability() returns a positive value
+ * On success, pci_add_capability_legacy() returns a positive value
  * that the offset of the pci capability.
  * On failure, it sets an error and returns a negative error
  * code.
  */
-int pci_add_capability(PCIDevice *pdev, uint8_t cap_id,
-   uint8_t offset, uint8_t size,
-   Error **errp)
+int pci_add_capability_legacy(PCIDevice *pdev, uint8_t cap_id,
+  uint8_t offset, uint8_t size,
+  Error **errp)
 {
 uint8_t *config;
 
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 77b264c17e..50c00ece3e 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -2,6 +2,7 @@
 #define QEMU_PCI_H
 
 #include "exec/memory.h"
+#include "qapi/error.h"
 #include "sysemu/dma.h"
 
 /* PCI includes legacy ISA access.  */
@@ -393,9 +394,15 @@ pcibus_t pci_get_bar_addr(PCIDevice *pci_dev, int 
region_num);
 bool pci_check_capability_overlap(PCIDevice *pdev, uint8_t cap_id,
   uint8_t offset, uint8_t size, Error **errp);
 
-int pci_add_capability(PCIDevice *pdev, uint8_t cap_id,
-   uint8_t offset, uint8_t size,
-   Error **errp);
+int pci_add_capability_legacy(PCIDevice *pdev, uint8_t cap_id,
+  uint8_t offset, uint8_t size,
+  Error **errp);
+
+#define PCI_ADD_CAPABILITY_VA(pdev, cap_id, offset, size, errp, ...) \
+pci_add_capability_legacy(pdev, cap_id, offset, size, errp)
+
+#define pci_add_capability(...) \
+PCI_ADD_CAPABILITY_VA(__VA_ARGS__, &error_abort)
 
 void pci_del_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
 
-- 
2.38.1




[PATCH] fixup! bios-tables-test: add test for number of cores > 255

2022-11-01 Thread Igor Mammedov
testcase requires KVM for execution and would fail with
 "
  qemu-system-i386: current -smp configuration requires kernel irqchip
  and X2APIC API support
 "
move it to kvm guarded section.
This fixes CI failure on hosts that doesn't have KVM enabled for Q35
machine type.

Signed-off-by: Igor Mammedov 
---
 tests/qtest/bios-tables-test.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c
index 9d97354df5..395d441212 100644
--- a/tests/qtest/bios-tables-test.c
+++ b/tests/qtest/bios-tables-test.c
@@ -1994,8 +1994,6 @@ int main(int argc, char *argv[])
 qtest_add_func("acpi/q35/tpm12-tis",
test_acpi_q35_tcg_tpm12_tis);
 }
-qtest_add_func("acpi/q35/core-count2",
-   test_acpi_q35_tcg_core_count2);
 qtest_add_func("acpi/q35/bridge", test_acpi_q35_tcg_bridge);
 qtest_add_func("acpi/q35/multif-bridge",
test_acpi_q35_multif_bridge);
@@ -2026,6 +2024,8 @@ int main(int argc, char *argv[])
 if (has_kvm) {
 qtest_add_func("acpi/q35/kvm/xapic", test_acpi_q35_kvm_xapic);
 qtest_add_func("acpi/q35/kvm/dmar", test_acpi_q35_kvm_dmar);
+qtest_add_func("acpi/q35/core-count2",
+   test_acpi_q35_tcg_core_count2);
 }
 qtest_add_func("acpi/q35/viot", test_acpi_q35_viot);
 #ifdef CONFIG_POSIX
-- 
2.31.1




RE: [PATCH 07/16] hw/9pfs: Implement Windows specific utilities functions for 9pfs

2022-11-01 Thread Shi, Guohuai



> -Original Message-
> From: Christian Schoenebeck 
> Sent: Tuesday, November 1, 2022 22:28
> To: qemu-devel@nongnu.org
> Cc: Shi, Guohuai ; Greg Kurz ;
> Meng, Bin 
> Subject: Re: [PATCH 07/16] hw/9pfs: Implement Windows specific utilities
> functions for 9pfs
> 
> [Please note: This e-mail is from an EXTERNAL e-mail address]
> 
> On Monday, October 24, 2022 6:57:50 AM CET Bin Meng wrote:
> > From: Guohuai Shi 
> >
> > Windows POSIX API and MinGW library do not provide the NO_FOLLOW flag,
> > and do not allow opening a directory by POSIX open(). This causes all
> > xxx_at() functions cannot work directly. However, we can provide
> > Windows handle based functions to emulate xxx_at() functions (e.g.:
> > openat_win32, utimensat_win32, etc.).
> >
> > Windows does not support extended attributes. 9pfs for Windows uses
> > NTFS ADS (Alternate Data Streams) to emulate extended attributes.
> >
> > Windows does not provide POSIX compatible readlink(), and symbolic
> > link feature in 9pfs will be disabled on Windows.
> 
> Wouldn't it be more user friendly if the relevant error locations would use
> something like error_report_once() and suggesting to enable mapped(-xattr) to
> make 9p symlinks on guest working if desired by the user?
> 
> Probably this error case would need to wrapped into a dedicated function,
> otherwise I guess error_report_once() would fire several times by different
> callers.
> 

Windows (MinGW) does not only support symlink, but also does not have symlink 
definitions.
Windows does not support symlink flags S_IFLNK.

So even I add symlink support by mapped-xattr, the MinGW library does not have 
symlink flags and get a build error.
And this flags is defined by Windows header files.
The impact of adding a new flags to an pre-defined structure (struct stat) is 
unknown.

So I think it is not a good idea to do that.

> > Signed-off-by: Guohuai Shi 
> > Signed-off-by: Bin Meng 
> > ---
> >
> >  hw/9pfs/9p-local.h  |   7 +
> >  hw/9pfs/9p-util.h   |  40 +-
> >  hw/9pfs/9p-local.c  |   4 -
> >  hw/9pfs/9p-util-win32.c | 885
> > 
> >  4 files changed, 931 insertions(+), 5 deletions(-)  create mode
> > 100644 hw/9pfs/9p-util-win32.c
> >
> > diff --git a/hw/9pfs/9p-local.h b/hw/9pfs/9p-local.h index
> > c8404063e5..02fd894ba3 100644
> > --- a/hw/9pfs/9p-local.h
> > +++ b/hw/9pfs/9p-local.h
> > @@ -15,6 +15,13 @@
> >
> >  #include "9p-file-id.h"
> >
> > +typedef struct {
> > +P9_FILE_ID mountfd;
> > +#ifdef CONFIG_WIN32
> > +char *root_path;
> > +#endif
> > +} LocalData;
> > +
> >  P9_FILE_ID local_open_nofollow(FsContext *fs_ctx, const char *path, int
> flags,
> > mode_t mode);  P9_FILE_ID
> > local_opendir_nofollow(FsContext *fs_ctx, const char *path); diff
> > --git a/hw/9pfs/9p-util.h b/hw/9pfs/9p-util.h index
> > 1e7dc76345..82b2d0c3e4 100644
> > --- a/hw/9pfs/9p-util.h
> > +++ b/hw/9pfs/9p-util.h
> > @@ -90,26 +90,61 @@ static inline int errno_to_dotl(int err) {
> >  return err;
> >  }
> >
> > -#ifdef CONFIG_DARWIN
> > +#if defined(CONFIG_DARWIN)
> >  #define qemu_fgetxattr(...) fgetxattr(__VA_ARGS__, 0, 0)
> > +#elif defined(CONFIG_WIN32)
> > +#define qemu_fgetxattr fgetxattr_win32
> >  #else
> >  #define qemu_fgetxattr fgetxattr
> >  #endif
> >
> > +#ifdef CONFIG_WIN32
> > +#define qemu_openat openat_win32
> > +#define qemu_fstatatfstatat_win32
> > +#define qemu_mkdiratmkdirat_win32
> > +#define qemu_renameat   renameat_win32
> > +#define qemu_utimensat  utimensat_win32
> > +#define qemu_unlinkat   unlinkat_win32
> > +#else
> >  #define qemu_openat openat
> >  #define qemu_fstatatfstatat
> >  #define qemu_mkdiratmkdirat
> >  #define qemu_renameat   renameat
> >  #define qemu_utimensat  utimensat
> >  #define qemu_unlinkat   unlinkat
> > +#endif
> > +
> > +#ifdef CONFIG_WIN32
> > +char *get_full_path_win32(P9_FILE_ID fd, const char *name); ssize_t
> > +fgetxattr_win32(int fd, const char *name, void *value, size_t size);
> > +P9_FILE_ID openat_win32(P9_FILE_ID dirfd, const char *pathname, int flags,
> > +mode_t mode); int fstatat_win32(P9_FILE_ID
> > +dirfd, const char *pathname,
> > +  struct stat *statbuf, int flags); int
> > +mkdirat_win32(P9_FILE_ID dirfd, const char *pathname, mode_t mode);
> > +int renameat_win32(P9_FILE_ID olddirfd, const char *oldpath,
> > +   P9_FILE_ID newdirfd, const char *newpath); int
> > +utimensat_win32(P9_FILE_ID dirfd, const char *pathname,
> > +const struct timespec times[2], int flags); int
> > +unlinkat_win32(P9_FILE_ID dirfd, const char *pathname, int flags);
> > +int statfs_win32(const char *root_path, struct statfs *stbuf);
> > +P9_FILE_ID openat_dir(P9_FILE_ID dirfd, const char *name); P9_FILE_ID
> > +openat_file(P9_FILE_ID dirfd, const char *name, int flags,
> > +   mode_t mode);
> > +#endif
> >
> >  static inline void close_pre

Re: [PATCH v8 01/17] hw/vfio/pci: Ensure MSI and MSI-X do not overlap

2022-11-01 Thread Philippe Mathieu-Daudé

On 1/11/22 14:57, Akihiko Odaki wrote:

pci_add_capability() checks whether capabilities overlap, and notifies
its caller so that it can properly handle the case. However, in the
most cases, the capabilities actually never overlap, and the interface
incurred extra error handling code, which is often incorrect or
suboptimal. For such cases, pci_add_capability() can simply abort the
execution if the capabilities actually overlap since it should be a
programming error.

This change handles the other cases: hw/vfio/pci depends on the check to
decide MSI and MSI-X capabilities overlap with another. As they are
quite an exceptional and hw/vfio/pci knows much about PCI capabilities,
adding code specific to the cases to hw/vfio/pci still results in less
code than having error handling code everywhere in total.

Signed-off-by: Akihiko Odaki 
---
  hw/pci/pci.c | 34 ++
  hw/vfio/pci.c| 15 ++-
  include/hw/pci/pci.h |  3 +++
  3 files changed, 39 insertions(+), 13 deletions(-)



  /*
   * On success, pci_add_capability() returns a positive value
   * that the offset of the pci capability.
@@ -2523,7 +2542,6 @@ int pci_add_capability(PCIDevice *pdev, uint8_t cap_id,
 Error **errp)
  {
  uint8_t *config;
-int i, overlapping_cap;
  
  if (!offset) {

  offset = pci_find_space(pdev, size);
@@ -2534,17 +2552,9 @@ int pci_add_capability(PCIDevice *pdev, uint8_t cap_id,
   * depends on this check to verify that the device is not broken.
   * Should never trigger for emulated devices, but it's helpful
   * for debugging these. */
-for (i = offset; i < offset + size; i++) {
-overlapping_cap = pci_find_capability_at_offset(pdev, i);
-if (overlapping_cap) {
-error_setg(errp, "%s:%02x:%02x.%x "
-   "Attempt to add PCI capability %x at offset "
-   "%x overlaps existing capability %x at offset %x",
-   pci_root_bus_path(pdev), pci_dev_bus_num(pdev),
-   PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
-   cap_id, offset, overlapping_cap, i);
-return -EINVAL;
-}
+pci_check_capability_overlap(pdev, cap_id, offset, size, errp);
+if (errp) {


   if (!pci_check_capability_overlap(...)) {


+return -EINVAL;
  }
  }




Re: [PATCH 04/16] hw/9pfs: Introduce an opaque type 9P_FILE_ID

2022-11-01 Thread Christian Schoenebeck
On Monday, October 24, 2022 6:57:47 AM CET Bin Meng wrote:
> Introduce an opaque type to represent a file in the 9pfs. This is
> file descriptor on POSIX systems. In the upcoming patches, we can
> extend it to support Windows.
> 
> With this new opaque type, it significantly reduces the number of
> deviated code paths when adding Windows support.

Strictly it is not an opaque type as you still have direct access to the
original value, not that I would care about this comment here, but ...

> Signed-off-by: Bin Meng 
> ---
> 
>  hw/9pfs/9p-file-id.h |  21 +
>  hw/9pfs/9p-local.h   |   8 +-
>  hw/9pfs/9p-util.h|  28 ---
>  hw/9pfs/9p-local.c   | 166 ---
>  hw/9pfs/9p-util-darwin.c |  14 ++--
>  hw/9pfs/9p-util-linux.c  |  14 ++--
>  hw/9pfs/9p-xattr.c   |  16 ++--
>  7 files changed, 150 insertions(+), 117 deletions(-)
>  create mode 100644 hw/9pfs/9p-file-id.h
> 
> diff --git a/hw/9pfs/9p-file-id.h b/hw/9pfs/9p-file-id.h
> new file mode 100644
> index 00..60cbfbf4dd
> --- /dev/null
> +++ b/hw/9pfs/9p-file-id.h
> @@ -0,0 +1,21 @@
> +/*
> + * 9p file representation for different hosts
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + */
> +
> +#ifndef QEMU_9P_FILE_ID_H
> +#define QEMU_9P_FILE_ID_H
> +
> +/*
> + * 9pfs file id
> + *
> + * This is file descriptor on POSIX platforms
> + */
> +typedef int P9_FILE_ID;

I would not call it a "file ID" as that's a value identical for all streams
and processes. A correct term was either file descriptor or a file handle.

And as far as I can see, common QEMU code style suggests camel-case. So
instead maybe "QemuFileDescr_t" or just "QemuFd_t" to not confuse that with
9p protocol stuff?

> +
> +/* invalid value for P9_FILE_ID */
> +#define P9_INVALID_FILE -1

... I would rather either add a function like qemu_fd_invalid() or a function-
like macro as QEMU_FD_INVALID(), because that would also work with some exotic
systems that use fully opaque file descriptors that require either a function
call or dereferencing a struct member. So that would make this code more
future proof.

> +
> +#endif
> diff --git a/hw/9pfs/9p-local.h b/hw/9pfs/9p-local.h
> index 32c72749d9..c8404063e5 100644
> --- a/hw/9pfs/9p-local.h
> +++ b/hw/9pfs/9p-local.h
> @@ -13,8 +13,10 @@
>  #ifndef QEMU_9P_LOCAL_H
>  #define QEMU_9P_LOCAL_H
>  
> -int local_open_nofollow(FsContext *fs_ctx, const char *path, int flags,
> -mode_t mode);
> -int local_opendir_nofollow(FsContext *fs_ctx, const char *path);
> +#include "9p-file-id.h"
> +
> +P9_FILE_ID local_open_nofollow(FsContext *fs_ctx, const char *path, int 
> flags,
> +   mode_t mode);
> +P9_FILE_ID local_opendir_nofollow(FsContext *fs_ctx, const char *path);
>  
>  #endif
> diff --git a/hw/9pfs/9p-util.h b/hw/9pfs/9p-util.h
> index c314cf381d..1e7dc76345 100644
> --- a/hw/9pfs/9p-util.h
> +++ b/hw/9pfs/9p-util.h
> @@ -13,6 +13,8 @@
>  #ifndef QEMU_9P_UTIL_H
>  #define QEMU_9P_UTIL_H
>  
> +#include "9p-file-id.h"
> +
>  #ifdef O_PATH
>  #define O_PATH_9P_UTIL O_PATH
>  #else
> @@ -101,30 +103,31 @@ static inline int errno_to_dotl(int err) {
>  #define qemu_utimensat  utimensat
>  #define qemu_unlinkat   unlinkat
>  
> -static inline void close_preserve_errno(int fd)
> +static inline void close_preserve_errno(P9_FILE_ID fd)
>  {
>  int serrno = errno;
>  close(fd);
>  errno = serrno;
>  }
>  
> -static inline int openat_dir(int dirfd, const char *name)
> +static inline P9_FILE_ID openat_dir(P9_FILE_ID dirfd, const char *name)
>  {
>  return qemu_openat(dirfd, name,
> O_DIRECTORY | O_RDONLY | O_NOFOLLOW | O_PATH_9P_UTIL);
>  }
>  
> -static inline int openat_file(int dirfd, const char *name, int flags,
> -  mode_t mode)
> +static inline P9_FILE_ID openat_file(P9_FILE_ID dirfd, const char *name,
> + int flags, mode_t mode)
>  {
> -int fd, serrno, ret;
> +int serrno, ret;
> +P9_FILE_ID fd;
>  
>  #ifndef CONFIG_DARWIN
>  again:
>  #endif
>  fd = qemu_openat(dirfd, name, flags | O_NOFOLLOW | O_NOCTTY | O_NONBLOCK,
>   mode);
> -if (fd == -1) {
> +if (fd == P9_INVALID_FILE) {

So these checks would become something like:

if (QEMU_FD_INVALID(fd)) {

>  #ifndef CONFIG_DARWIN
>  if (errno == EPERM && (flags & O_NOATIME)) {
>  /*
> @@ -155,13 +158,13 @@ again:
>  return fd;
>  }
>  
> -ssize_t fgetxattrat_nofollow(int dirfd, const char *path, const char *name,
> - void *value, size_t size);
> -int fsetxattrat_nofollow(int dirfd, const char *path, const char *name,
> +ssize_t fgetxattrat_nofollow(P9_FILE_ID dirfd, const char *path,
> + const char *name, void *value, size_t size);
> +int fsetxattrat_nofollow(P9_FILE

Re: [PULL 21/86] bios-tables-test: add test for number of cores > 255

2022-11-01 Thread Jonathan Cameron via
On Mon, 31 Oct 2022 08:51:44 -0400
"Michael S. Tsirkin"  wrote:

> From: Julia Suvorova 
> 
> The new test is run with a large number of cpus and checks if the
> core_count field in smbios_cpu_test (structure type 4) is correct.
> 
> Choose q35 as it allows to run with -smp > 255.

Getting a failure on this on i386.

qemu-system-i386: current -smp configuration requires kernel irqchip and X2APIC 
API support.

Note that was on bisection of this pull request applied to current mainline
(also in the CI report for the HMAT set - though there is another issue there.)

My guess is fix is don't run it unless 64 bit?

Jonathan


> 
> Signed-off-by: Julia Suvorova 
> Message-Id: <20220731162141.178443-5-jus...@redhat.com>
> Message-Id: <2022101731.101412-5-jus...@redhat.com>
> Reviewed-by: Michael S. Tsirkin 
> Signed-off-by: Michael S. Tsirkin 
> Reviewed-by: Igor Mammedov 
> ---
>  tests/qtest/bios-tables-test.c | 58 ++
>  1 file changed, 45 insertions(+), 13 deletions(-)
> 
> diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c
> index d4fbe6791d..e402b57d46 100644
> --- a/tests/qtest/bios-tables-test.c
> +++ b/tests/qtest/bios-tables-test.c
> @@ -92,6 +92,8 @@ typedef struct {
>  SmbiosEntryPoint smbios_ep_table;
>  uint16_t smbios_cpu_max_speed;
>  uint16_t smbios_cpu_curr_speed;
> +uint8_t smbios_core_count;
> +uint16_t smbios_core_count2;
>  uint8_t *required_struct_types;
>  int required_struct_types_len;
>  QTestState *qts;
> @@ -631,29 +633,42 @@ static inline bool smbios_single_instance(uint8_t type)
>  }
>  }
>  
> -static bool smbios_cpu_test(test_data *data, uint32_t addr)
> +static void smbios_cpu_test(test_data *data, uint32_t addr,
> +SmbiosEntryPointType ep_type)
>  {
> -uint16_t expect_speed[2];
> -uint16_t real;
> +uint8_t core_count, expected_core_count = data->smbios_core_count;
> +uint16_t speed, expected_speed[2];
> +uint16_t core_count2, expected_core_count2 = data->smbios_core_count2;
>  int offset[2];
>  int i;
>  
>  /* Check CPU speed for backward compatibility */
>  offset[0] = offsetof(struct smbios_type_4, max_speed);
>  offset[1] = offsetof(struct smbios_type_4, current_speed);
> -expect_speed[0] = data->smbios_cpu_max_speed ? : 2000;
> -expect_speed[1] = data->smbios_cpu_curr_speed ? : 2000;
> +expected_speed[0] = data->smbios_cpu_max_speed ? : 2000;
> +expected_speed[1] = data->smbios_cpu_curr_speed ? : 2000;
>  
>  for (i = 0; i < 2; i++) {
> -real = qtest_readw(data->qts, addr + offset[i]);
> -if (real != expect_speed[i]) {
> -fprintf(stderr, "Unexpected SMBIOS CPU speed: real %u expect 
> %u\n",
> -real, expect_speed[i]);
> -return false;
> -}
> +speed = qtest_readw(data->qts, addr + offset[i]);
> +g_assert_cmpuint(speed, ==, expected_speed[i]);
>  }
>  
> -return true;
> +core_count = qtest_readb(data->qts,
> +addr + offsetof(struct smbios_type_4, core_count));
> +
> +if (expected_core_count) {
> +g_assert_cmpuint(core_count, ==, expected_core_count);
> +}
> +
> +if (ep_type == SMBIOS_ENTRY_POINT_TYPE_64) {
> +core_count2 = qtest_readw(data->qts,
> +  addr + offsetof(struct smbios_type_4, 
> core_count2));
> +
> +/* Core Count has reached its limit, checking Core Count 2 */
> +if (expected_core_count == 0xFF && expected_core_count2) {
> +g_assert_cmpuint(core_count2, ==, expected_core_count2);
> +}
> +}
>  }
>  
>  static void test_smbios_structs(test_data *data, SmbiosEntryPointType 
> ep_type)
> @@ -686,7 +701,7 @@ static void test_smbios_structs(test_data *data, 
> SmbiosEntryPointType ep_type)
>  set_bit(type, struct_bitmap);
>  
>  if (type == 4) {
> -g_assert(smbios_cpu_test(data, addr));
> +smbios_cpu_test(data, addr, ep_type);
>  }
>  
>  /* seek to end of unformatted string area of this struct ("\0\0") */
> @@ -908,6 +923,21 @@ static void test_acpi_q35_tcg(void)
>  free_test_data(&data);
>  }
>  
> +static void test_acpi_q35_tcg_core_count2(void)
> +{
> +test_data data = {
> +.machine = MACHINE_Q35,
> +.variant = ".core-count2",
> +.required_struct_types = base_required_struct_types,
> +.required_struct_types_len = ARRAY_SIZE(base_required_struct_types),
> +.smbios_core_count = 0xFF,
> +.smbios_core_count2 = 275,
> +};
> +
> +test_acpi_one("-machine smbios-entry-point-type=64 -smp 275", &data);
> +free_test_data(&data);
> +}
> +
>  static void test_acpi_q35_tcg_bridge(void)
>  {
>  test_data data;
> @@ -1859,6 +1889,8 @@ int main(int argc, char *argv[])
>  qtest_add_func("acpi/q35/tpm12-tis",
> test_acpi_q35_

Re: [PATCH v4 3/3] util/aio-win32: Correct the event array size in aio_poll()

2022-11-01 Thread Philippe Mathieu-Daudé

On 19/10/22 12:20, Bin Meng wrote:

From: Bin Meng 

WaitForMultipleObjects() can only wait for MAXIMUM_WAIT_OBJECTS
object handles. Correct the event array size in aio_poll() and
add a assert() to ensure it does not cause out of bound access.

Signed-off-by: Bin Meng 
Reviewed-by: Stefan Weil 
Reviewed-by: Marc-André Lureau 
Reviewed-by: Daniel P. Berrangé 
---

(no changes since v2)

Changes in v2:
- change 'count' to unsigned

  util/aio-win32.c | 5 +++--
  1 file changed, 3 insertions(+), 2 deletions(-)


Reviewed-by: Philippe Mathieu-Daudé 




Re: [PATCH v9 1/8] mm: Introduce memfd_restricted system call to create restricted user memory

2022-11-01 Thread Michael Roth
On Tue, Nov 01, 2022 at 07:37:29PM +0800, Chao Peng wrote:
> On Mon, Oct 31, 2022 at 12:47:38PM -0500, Michael Roth wrote:
> > On Tue, Oct 25, 2022 at 11:13:37PM +0800, Chao Peng wrote:
> > > From: "Kirill A. Shutemov" 
> > > 
> > > +struct restrictedmem_data {
> > > + struct mutex lock;
> > > + struct file *memfd;
> > > + struct list_head notifiers;
> > > +};
> > > +
> > > +static void restrictedmem_notifier_invalidate(struct restrictedmem_data 
> > > *data,
> > > +  pgoff_t start, pgoff_t end, bool notify_start)
> > > +{
> > > + struct restrictedmem_notifier *notifier;
> > > +
> > > + mutex_lock(&data->lock);
> > > + list_for_each_entry(notifier, &data->notifiers, list) {
> > > + if (notify_start)
> > > + notifier->ops->invalidate_start(notifier, start, end);
> > > + else
> > > + notifier->ops->invalidate_end(notifier, start, end);
> > > + }
> > > + mutex_unlock(&data->lock);
> > > +}
> > > +
> > > +static int restrictedmem_release(struct inode *inode, struct file *file)
> > > +{
> > > + struct restrictedmem_data *data = inode->i_mapping->private_data;
> > > +
> > > + fput(data->memfd);
> > > + kfree(data);
> > > + return 0;
> > > +}
> > > +
> > > +static long restrictedmem_fallocate(struct file *file, int mode,
> > > + loff_t offset, loff_t len)
> > > +{
> > > + struct restrictedmem_data *data = file->f_mapping->private_data;
> > > + struct file *memfd = data->memfd;
> > > + int ret;
> > > +
> > > + if (mode & FALLOC_FL_PUNCH_HOLE) {
> > > + if (!PAGE_ALIGNED(offset) || !PAGE_ALIGNED(len))
> > > + return -EINVAL;
> > > + }
> > > +
> > > + restrictedmem_notifier_invalidate(data, offset, offset + len, true);
> > > + ret = memfd->f_op->fallocate(memfd, mode, offset, len);
> > > + restrictedmem_notifier_invalidate(data, offset, offset + len, false);
> > > + return ret;
> > > +}
> > 
> > In v8 there was some discussion about potentially passing the page/folio
> > and order as part of the invalidation callback, I ended up needing
> > something similar for SEV-SNP, and think it might make sense for other
> > platforms. This main reasoning is:
> 
> In that context what we talked on is the inaccessible_get_pfn(), I was
> not aware there is need for invalidation callback as well.

Right, your understanding is correct. I think Sean had only mentioned in
passing that it was something we could potentially do, and in the cases I
was looking at it ended up being useful. I only mentioned it so I don't
seem like I'm too far out in the weeds here :)

> 
> > 
> >   1) restoring kernel directmap:
> > 
> >  Currently SNP (and I believe TDX) need to either split or remove kernel
> >  direct mappings for restricted PFNs, since there is no guarantee that
> >  other PFNs within a 2MB range won't be used for non-restricted
> >  (which will cause an RMP #PF in the case of SNP since the 2MB
> >  mapping overlaps with guest-owned pages)
> 
> Has the splitting and restoring been a well-discussed direction? I'm
> just curious whether there is other options to solve this issue.

For SNP it's been discussed for quite some time, and either splitting or
removing private entries from directmap are the well-discussed way I'm
aware of to avoid RMP violations due to some other kernel process using
a 2MB mapping to access shared memory if there are private pages that
happen to be within that range.

In both cases the issue of how to restore directmap as 2M becomes a
problem.

I was also under the impression TDX had similar requirements. If so,
do you know what the plan is for handling this for TDX?

There are also 2 potential alternatives I'm aware of, but these haven't
been discussed in much detail AFAIK:

a) Ensure confidential guests are backed by 2MB pages. shmem has a way to
   request 2MB THP pages, but I'm not sure how reliably we can guarantee
   that enough THPs are available, so if we went that route we'd probably
   be better off requiring the use of hugetlbfs as the backing store. But
   obviously that's a bit limiting and it would be nice to have the option
   of using normal pages as well. One nice thing with invalidation
   scheme proposed here is that this would "Just Work" if implement
   hugetlbfs support, so an admin that doesn't want any directmap
   splitting has this option available, otherwise it's done as a
   best-effort.

b) Implement general support for restoring directmap as 2M even when
   subpages might be in use by other kernel threads. This would be the
   most flexible approach since it requires no special handling during
   invalidations, but I think it's only possible if all the CPA
   attributes for the 2M range are the same at the time the mapping is
   restored/unsplit, so some potential locking issues there and still
   chance for splitting directmap over time.

> 
> > 
> >  Previously we were able to restore 2MB mappings to some degree
> >  since both sh

Re: [PATCH] fixup! bios-tables-test: add test for number of cores > 255

2022-11-01 Thread Stefan Hajnoczi
I will wait for Michael's v2 pull request in the coming days.

Stefan



[PATCH] migration: check magic value for deciding the mapping of channels

2022-11-01 Thread manish.mishra
Current logic assumes that channel connections on the destination side are
always established in the same order as the source and the first one will
always be the default channel followed by the multifid or post-copy
preemption channel. This may not be always true, as even if a channel has a
connection established on the source side it can be in the pending state on
the destination side and a newer connection can be established first.
Basically causing out of order mapping of channels on the destination side.
Currently, all channels except post-copy preempt send a magic number, this
patch uses that magic number to decide the type of channel. This logic is
applicable only for precopy(multifd) live migration, as mentioned, the
post-copy preempt channel does not send any magic number. Also, this patch
uses MSG_PEEK to check the magic number of channels so that current
data/control stream management remains un-effected.

Signed-off-by: manish.mishra 
---
 include/io/channel.h | 25 +
 io/channel-socket.c  | 27 +++
 io/channel.c | 39 +++
 migration/migration.c| 33 +
 migration/multifd.c  | 12 
 migration/multifd.h  |  2 +-
 migration/postcopy-ram.c |  5 +
 migration/postcopy-ram.h |  2 +-
 8 files changed, 119 insertions(+), 26 deletions(-)

diff --git a/include/io/channel.h b/include/io/channel.h
index c680ee7480..74177aeeea 100644
--- a/include/io/channel.h
+++ b/include/io/channel.h
@@ -115,6 +115,10 @@ struct QIOChannelClass {
 int **fds,
 size_t *nfds,
 Error **errp);
+ssize_t (*io_read_peek)(QIOChannel *ioc,
+void *buf,
+size_t nbytes,
+Error **errp);
 int (*io_close)(QIOChannel *ioc,
 Error **errp);
 GSource * (*io_create_watch)(QIOChannel *ioc,
@@ -475,6 +479,27 @@ int qio_channel_write_all(QIOChannel *ioc,
   size_t buflen,
   Error **errp);
 
+/**
+ * qio_channel_read_peek_all:
+ * @ioc: the channel object
+ * @buf: the memory region to read in data
+ * @nbytes: the number of bytes to read
+ * @errp: pointer to a NULL-initialized error object
+ *
+ * Read given @nbytes data from peek of channel into
+ * memory region @buf.
+ *
+ * The function will be blocked until read size is
+ * equal to requested size.
+ *
+ * Returns: 1 if all bytes were read, 0 if end-of-file
+ *  occurs without data, or -1 on error
+ */
+int qio_channel_read_peek_all(QIOChannel *ioc,
+  void* buf,
+  size_t nbytes,
+  Error **errp);
+
 /**
  * qio_channel_set_blocking:
  * @ioc: the channel object
diff --git a/io/channel-socket.c b/io/channel-socket.c
index b76dca9cc1..b99f5dfda6 100644
--- a/io/channel-socket.c
+++ b/io/channel-socket.c
@@ -705,6 +705,32 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
 }
 #endif /* WIN32 */
 
+static ssize_t qio_channel_socket_read_peek(QIOChannel *ioc,
+void *buf,
+size_t nbytes,
+Error **errp)
+{
+QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
+ssize_t bytes = 0;
+
+retry:
+bytes = recv(sioc->fd, buf, nbytes, MSG_PEEK);
+
+if (bytes < 0) {
+if (errno == EINTR) {
+goto retry;
+}
+if (errno == EAGAIN) {
+return QIO_CHANNEL_ERR_BLOCK;
+}
+
+error_setg_errno(errp, errno,
+ "Unable to read from peek of socket");
+return -1;
+}
+
+return bytes;
+}
 
 #ifdef QEMU_MSG_ZEROCOPY
 static int qio_channel_socket_flush(QIOChannel *ioc,
@@ -902,6 +928,7 @@ static void qio_channel_socket_class_init(ObjectClass 
*klass,
 
 ioc_klass->io_writev = qio_channel_socket_writev;
 ioc_klass->io_readv = qio_channel_socket_readv;
+ioc_klass->io_read_peek = qio_channel_socket_read_peek;
 ioc_klass->io_set_blocking = qio_channel_socket_set_blocking;
 ioc_klass->io_close = qio_channel_socket_close;
 ioc_klass->io_shutdown = qio_channel_socket_shutdown;
diff --git a/io/channel.c b/io/channel.c
index 0640941ac5..a2d9b96f3f 100644
--- a/io/channel.c
+++ b/io/channel.c
@@ -346,6 +346,45 @@ int qio_channel_write_all(QIOChannel *ioc,
 return qio_channel_writev_all(ioc, &iov, 1, errp);
 }
 
+int qio_channel_read_peek_all(QIOChannel *ioc,
+  void* buf,
+  size_t nbytes,
+  Error **errp)
+{
+   QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
+   ssize_t bytes = 0;
+
+   if (!klass->io_read_peek) {
+   error_setg(errp, "Channel does not support read peek");
+   re

[PATCH v9 13/17] pci/slotid: Omit errp for pci_add_capability

2022-11-01 Thread Akihiko Odaki
Omitting errp for pci_add_capability() causes it to abort if
capabilities overlap. A caller of slotid_cap_init(), which calls
pci_add_capability() in turn, is expected to ensure that will not
happen.

Signed-off-by: Akihiko Odaki 
---
 hw/pci/slotid_cap.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/hw/pci/slotid_cap.c b/hw/pci/slotid_cap.c
index 36d021b4a6..5da8c82133 100644
--- a/hw/pci/slotid_cap.c
+++ b/hw/pci/slotid_cap.c
@@ -12,7 +12,7 @@ int slotid_cap_init(PCIDevice *d, int nslots,
 unsigned offset,
 Error **errp)
 {
-int cap;
+uint8_t cap;
 
 if (!chassis) {
 error_setg(errp, "Bridge chassis not specified. Each bridge is 
required"
@@ -24,11 +24,7 @@ int slotid_cap_init(PCIDevice *d, int nslots,
 return -EINVAL;
 }
 
-cap = pci_add_capability(d, PCI_CAP_ID_SLOTID, offset,
- SLOTID_CAP_LENGTH, errp);
-if (cap < 0) {
-return cap;
-}
+cap = pci_add_capability(d, PCI_CAP_ID_SLOTID, offset, SLOTID_CAP_LENGTH);
 /* We make each chassis unique, this way each bridge is First in Chassis */
 d->config[cap + PCI_SID_ESR] = PCI_SID_ESR_FIC |
 (nslots << SLOTID_NSLOTS_SHIFT);
-- 
2.38.1




RE: [PATCH 09/16] hw/9pfs: Disable unsupported flags and features for Windows

2022-11-01 Thread Shi, Guohuai



> -Original Message-
> From: Christian Schoenebeck 
> Sent: Tuesday, November 1, 2022 23:04
> To: qemu-devel@nongnu.org
> Cc: Shi, Guohuai ; Greg Kurz ;
> Meng, Bin 
> Subject: Re: [PATCH 09/16] hw/9pfs: Disable unsupported flags and features
> for Windows
> 
> [Please note: This e-mail is from an EXTERNAL e-mail address]
> 
> On Monday, October 24, 2022 6:57:52 AM CET Bin Meng wrote:
> > From: Guohuai Shi 
> >
> > Some flags and features are not supported on Windows, like mknod,
> > readlink, file mode, etc. Update the codes for Windows.
> >
> > Signed-off-by: Guohuai Shi 
> > Signed-off-by: Bin Meng 
> > ---
> >
> >  hw/9pfs/9p-util.h |  6 +++-
> >  hw/9pfs/9p.c  | 90 ++-
> >  2 files changed, 86 insertions(+), 10 deletions(-)
> >
> > diff --git a/hw/9pfs/9p-util.h b/hw/9pfs/9p-util.h index
> > 82b2d0c3e4..3d154e9103 100644
> > --- a/hw/9pfs/9p-util.h
> > +++ b/hw/9pfs/9p-util.h
> > @@ -53,8 +53,10 @@ static inline uint64_t makedev_dotl(uint32_t dev_major,
> uint32_t dev_minor)
> >   */
> >  static inline uint64_t host_dev_to_dotl_dev(dev_t dev)  { -#ifdef
> > CONFIG_LINUX
> > +#if defined(CONFIG_LINUX)
> >  return dev;
> > +#elif defined(CONFIG_WIN32)
> > +return 0;
> 
> Really?

Check MS this document: 
https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/fstat-fstat32-fstat64-fstati64-fstat32i64-fstat64i32?view=msvc-170
st_rdev: If a device, fd; otherwise 0.
st_dev: If a device, fd; otherwise 0.

So for any file open, it should be 0.

> 
> >  #else
> >  return makedev_dotl(major(dev), minor(dev));  #endif @@ -260,7
> > +262,9 @@ static inline struct dirent *qemu_dirent_dup(struct dirent
> > *dent)  #if defined CONFIG_DARWIN && defined CONFIG_PTHREAD_FCHDIR_NP
> > int pthread_fchdir_np(int fd) __attribute__((weak_import));  #endif
> > +#ifndef CONFIG_WIN32
> >  int qemu_mknodat(P9_FILE_ID dirfd, const char *filename, mode_t mode,
> >   dev_t dev);
> > +#endif
> >
> >  #endif
> > diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c index 6c4af86240..771aab34ac
> > 100644
> > --- a/hw/9pfs/9p.c
> > +++ b/hw/9pfs/9p.c
> > @@ -39,6 +39,11 @@
> >  #include "qemu/xxhash.h"
> >  #include 
> >
> > +#ifdef CONFIG_WIN32
> > +#define UTIME_NOW   ((1l << 30) - 1l)
> > +#define UTIME_OMIT  ((1l << 30) - 2l) #endif
> > +
> >  int open_fd_hw;
> >  int total_open_fd;
> >  static int open_fd_rc;
> > @@ -132,13 +137,17 @@ static int dotl_to_open_flags(int flags)
> >  DotlOpenflagMap dotl_oflag_map[] = {
> >  { P9_DOTL_CREATE, O_CREAT },
> >  { P9_DOTL_EXCL, O_EXCL },
> > +#ifndef CONFIG_WIN32
> >  { P9_DOTL_NOCTTY , O_NOCTTY },
> > +#endif
> >  { P9_DOTL_TRUNC, O_TRUNC },
> >  { P9_DOTL_APPEND, O_APPEND },
> > +#ifndef CONFIG_WIN32
> >  { P9_DOTL_NONBLOCK, O_NONBLOCK } ,
> >  { P9_DOTL_DSYNC, O_DSYNC },
> >  { P9_DOTL_FASYNC, FASYNC },
> > -#ifndef CONFIG_DARWIN
> > +#endif
> > +#ifdef CONFIG_LINUX
> 
> Better
> 
>#if !defined(CONFIG_DARWIN) && !defined(CONFIG_WIN32)
> 

It is OK.

> Otherwise it might automatically opt-out other future platforms
> unintentionally.
> 
> >  { P9_DOTL_NOATIME, O_NOATIME },
> >  /*
> >   *  On Darwin, we could map to F_NOCACHE, which is @@ -151,8
> > +160,10 @@ static int dotl_to_open_flags(int flags)  #endif
> >  { P9_DOTL_LARGEFILE, O_LARGEFILE },
> >  { P9_DOTL_DIRECTORY, O_DIRECTORY },
> > +#ifndef CONFIG_WIN32
> >  { P9_DOTL_NOFOLLOW, O_NOFOLLOW },
> >  { P9_DOTL_SYNC, O_SYNC },
> > +#endif
> >  };
> >
> >  for (i = 0; i < ARRAY_SIZE(dotl_oflag_map); i++) { @@ -179,8
> > +190,11 @@ static int get_dotl_openflags(V9fsState *s, int oflags)
> >   * Filter the client open flags
> >   */
> >  flags = dotl_to_open_flags(oflags);
> > -flags &= ~(O_NOCTTY | O_ASYNC | O_CREAT);
> > -#ifndef CONFIG_DARWIN
> > +flags &= ~(O_CREAT);
> > +#ifndef CONFIG_WIN32
> > +flags &= ~(O_NOCTTY | O_ASYNC);
> > +#endif
> > +#ifdef CONFIG_LINUX
> 
> Same as above: better explicitly opt-out than the other way around.
> 

It is OK.

> >  /*
> >   * Ignore direct disk access hint until the server supports it.
> >   */
> > @@ -986,9 +1000,11 @@ static int stat_to_qid(V9fsPDU *pdu, const struct
> stat *stbuf, V9fsQID *qidp)
> >  if (S_ISDIR(stbuf->st_mode)) {
> >  qidp->type |= P9_QID_TYPE_DIR;
> >  }
> > +#ifndef CONFIG_WIN32
> >  if (S_ISLNK(stbuf->st_mode)) {
> >  qidp->type |= P9_QID_TYPE_SYMLINK;
> >  }
> > +#endif
> >
> >  return 0;
> >  }
> > @@ -1097,6 +1113,7 @@ static mode_t v9mode_to_mode(uint32_t mode,
> V9fsString *extension)
> >  ret |= S_IFDIR;
> >  }
> >
> > +#ifndef CONFIG_WIN32
> >  if (mode & P9_STAT_MODE_SYMLINK) {
> >  ret |= S_IFLNK;
> >  }
> > @@ -1106,6 +1123,7 @@ static mode_t v9mode_to_mode(uint32_t mode,
> V9fsString *extension)
> >  if (mode & P9_STAT_MODE_NAMED_PIPE

[PATCH v9 12/17] msix: Omit errp for pci_add_capability

2022-11-01 Thread Akihiko Odaki
Omitting errp for pci_add_capability() causes it to abort if
capabilities overlap. A caller of msix_init(), which calls
pci_add_capability() in turn, is expected to ensure that will not
happen.

Signed-off-by: Akihiko Odaki 
---
 hw/pci/msix.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/hw/pci/msix.c b/hw/pci/msix.c
index 1e381a9813..28af83403b 100644
--- a/hw/pci/msix.c
+++ b/hw/pci/msix.c
@@ -311,7 +311,7 @@ int msix_init(struct PCIDevice *dev, unsigned short 
nentries,
   uint8_t pba_bar_nr, unsigned pba_offset, uint8_t cap_pos,
   Error **errp)
 {
-int cap;
+uint8_t cap;
 unsigned table_size, pba_size;
 uint8_t *config;
 
@@ -340,11 +340,7 @@ int msix_init(struct PCIDevice *dev, unsigned short 
nentries,
 return -EINVAL;
 }
 
-cap = pci_add_capability(dev, PCI_CAP_ID_MSIX,
-  cap_pos, MSIX_CAP_LENGTH, errp);
-if (cap < 0) {
-return cap;
-}
+cap = pci_add_capability(dev, PCI_CAP_ID_MSIX, cap_pos, MSIX_CAP_LENGTH);
 
 dev->msix_cap = cap;
 dev->cap_present |= QEMU_PCI_CAP_MSIX;
-- 
2.38.1




[PATCH v8 10/17] pcie: Omit errp for pci_add_capability

2022-11-01 Thread Akihiko Odaki
Omitting errp for pci_add_capability() causes it to abort if
capabilities overlap. A caller of a PCIe function which calls
pci_add_capability() in turn is expected to ensure that will not
happen.

Signed-off-by: Akihiko Odaki 
Acked-by: Jonathan Cameron  (for CXL parts)
---
 docs/pcie_sriov.txt|  4 +--
 hw/display/bochs-display.c |  4 +--
 hw/net/e1000e.c|  4 +--
 hw/pci-bridge/cxl_downstream.c |  9 ++
 hw/pci-bridge/cxl_upstream.c   |  8 ++---
 hw/pci-bridge/pcie_pci_bridge.c|  6 +---
 hw/pci-bridge/pcie_root_port.c |  9 +-
 hw/pci-bridge/xio3130_downstream.c |  7 +---
 hw/pci-bridge/xio3130_upstream.c   |  7 +---
 hw/pci-host/designware.c   |  3 +-
 hw/pci-host/xilinx-pcie.c  |  4 +--
 hw/pci/pcie.c  | 52 --
 hw/usb/hcd-xhci-pci.c  |  3 +-
 hw/virtio/virtio-pci.c |  3 +-
 include/hw/pci/pcie.h  | 11 +++
 15 files changed, 35 insertions(+), 99 deletions(-)

diff --git a/docs/pcie_sriov.txt b/docs/pcie_sriov.txt
index 11158dbf88..728a73ba7b 100644
--- a/docs/pcie_sriov.txt
+++ b/docs/pcie_sriov.txt
@@ -49,7 +49,7 @@ setting up a BAR for a VF.
pci_your_pf_dev_realize( ... )
{
   ...
-  int ret = pcie_endpoint_cap_init(d, 0x70);
+  pcie_endpoint_cap_init(d, 0x70);
   ...
   pcie_ari_init(d, 0x100, 1);
   ...
@@ -79,7 +79,7 @@ setting up a BAR for a VF.
pci_your_vf_dev_realize( ... )
{
   ...
-  int ret = pcie_endpoint_cap_init(d, 0x60);
+  pcie_endpoint_cap_init(d, 0x60);
   ...
   pcie_ari_init(d, 0x100, 1);
   ...
diff --git a/hw/display/bochs-display.c b/hw/display/bochs-display.c
index 8ed734b195..111cabcfb3 100644
--- a/hw/display/bochs-display.c
+++ b/hw/display/bochs-display.c
@@ -265,7 +265,6 @@ static void bochs_display_realize(PCIDevice *dev, Error 
**errp)
 {
 BochsDisplayState *s = BOCHS_DISPLAY(dev);
 Object *obj = OBJECT(dev);
-int ret;
 
 if (s->vgamem < 4 * MiB) {
 error_setg(errp, "bochs-display: video memory too small");
@@ -302,8 +301,7 @@ static void bochs_display_realize(PCIDevice *dev, Error 
**errp)
 }
 
 if (pci_bus_is_express(pci_get_bus(dev))) {
-ret = pcie_endpoint_cap_init(dev, 0x80);
-assert(ret > 0);
+pcie_endpoint_cap_init(dev, 0x80);
 } else {
 dev->cap_present &= ~QEMU_PCI_CAP_EXPRESS;
 }
diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c
index e433b8f9a5..aea4305c43 100644
--- a/hw/net/e1000e.c
+++ b/hw/net/e1000e.c
@@ -462,9 +462,7 @@ static void e1000e_pci_realize(PCIDevice *pci_dev, Error 
**errp)
 
 e1000e_init_msix(s);
 
-if (pcie_endpoint_cap_v1_init(pci_dev, e1000e_pcie_offset) < 0) {
-hw_error("Failed to initialize PCIe capability");
-}
+pcie_endpoint_cap_v1_init(pci_dev, e1000e_pcie_offset);
 
 ret = msi_init(PCI_DEVICE(s), 0xD0, 1, true, false, NULL);
 if (ret) {
diff --git a/hw/pci-bridge/cxl_downstream.c b/hw/pci-bridge/cxl_downstream.c
index a361e519d0..1980dd9c6c 100644
--- a/hw/pci-bridge/cxl_downstream.c
+++ b/hw/pci-bridge/cxl_downstream.c
@@ -155,12 +155,8 @@ static void cxl_dsp_realize(PCIDevice *d, Error **errp)
 goto err_bridge;
 }
 
-rc = pcie_cap_init(d, CXL_DOWNSTREAM_PORT_EXP_OFFSET,
-   PCI_EXP_TYPE_DOWNSTREAM, p->port,
-   errp);
-if (rc < 0) {
-goto err_msi;
-}
+pcie_cap_init(d, CXL_DOWNSTREAM_PORT_EXP_OFFSET,
+  PCI_EXP_TYPE_DOWNSTREAM, p->port);
 
 pcie_cap_flr_init(d);
 pcie_cap_deverr_init(d);
@@ -195,7 +191,6 @@ static void cxl_dsp_realize(PCIDevice *d, Error **errp)
 pcie_chassis_del_slot(s);
  err_pcie_cap:
 pcie_cap_exit(d);
- err_msi:
 msi_uninit(d);
  err_bridge:
 pci_bridge_exitfn(d);
diff --git a/hw/pci-bridge/cxl_upstream.c b/hw/pci-bridge/cxl_upstream.c
index a83a3e81e4..26f27ba681 100644
--- a/hw/pci-bridge/cxl_upstream.c
+++ b/hw/pci-bridge/cxl_upstream.c
@@ -138,11 +138,8 @@ static void cxl_usp_realize(PCIDevice *d, Error **errp)
 goto err_bridge;
 }
 
-rc = pcie_cap_init(d, CXL_UPSTREAM_PORT_PCIE_CAP_OFFSET,
-   PCI_EXP_TYPE_UPSTREAM, p->port, errp);
-if (rc < 0) {
-goto err_msi;
-}
+pcie_cap_init(d, CXL_UPSTREAM_PORT_PCIE_CAP_OFFSET,
+  PCI_EXP_TYPE_UPSTREAM, p->port);
 
 pcie_cap_flr_init(d);
 pcie_cap_deverr_init(d);
@@ -165,7 +162,6 @@ static void cxl_usp_realize(PCIDevice *d, Error **errp)
 
 err_cap:
 pcie_cap_exit(d);
-err_msi:
 msi_uninit(d);
 err_bridge:
 pci_bridge_exitfn(d);
diff --git a/hw/pci-bridge/pcie_pci_bridge.c b/hw/pci-bridge/pcie_pci_bridge.c
index 1cd917a459..df5dfdd139 100644
--- a/hw/pci-bridge/pcie_pci_bridge.c
+++ b/hw/pci-bridge/pcie_pci_bridge.c
@@ -47,10 +47,7 @@ static void pcie_pci_bridge_realize(PCIDevice *d, Error 
**errp)
 goto error;
 }
 
-rc = p

Re: [PATCH 08/16] hw/9pfs: Handle current directory offset for Windows

2022-11-01 Thread Christian Schoenebeck
On Monday, October 24, 2022 6:57:51 AM CET Bin Meng wrote:
> From: Guohuai Shi 
> 
> On Windows 'struct dirent' does not have current directory offset.
> We have to save current directory offset and update offset when
> reading directory.
> 
> Signed-off-by: Guohuai Shi 
> Signed-off-by: Bin Meng 
> ---
> 
>  hw/9pfs/9p.c| 16 
>  hw/9pfs/codir.c | 15 +++
>  2 files changed, 31 insertions(+)
> 
> diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
> index aebadeaa03..6c4af86240 100644
> --- a/hw/9pfs/9p.c
> +++ b/hw/9pfs/9p.c
> @@ -2319,7 +2319,15 @@ static int coroutine_fn 
> v9fs_do_readdir_with_stat(V9fsPDU *pdu,
>  count += len;
>  v9fs_stat_free(&v9stat);
>  v9fs_path_free(&path);
> +#ifndef CONFIG_WIN32
>  saved_dir_pos = qemu_dirent_off(dent);
> +#else
> +/*
> + * Get offset by calling telldir() manually,
> + * as Windows does not have dent->d_off.
> + */
> +saved_dir_pos = v9fs_co_telldir(pdu, fidp);
> +#endif
>  }

That's not the way to go. We already had the same discussion with the macOS
patches and why we introduced qemu_dirent_off() for exactly that purpose:

v9fs_co_telldir() would dispatch the coroutine from QEMU main thread to
background worker thread and vice versa. So you would get side effects by
doing this.

Please implement this adequately in qemu_dirent_off() instead of touching the
controller portion here.

>  
>  v9fs_readdir_unlock(&fidp->fs.dir);
> @@ -2520,7 +2528,15 @@ static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, 
> V9fsFidState *fidp,
>  qid.version = 0;
>  }
>  
> +#ifndef CONFIG_WIN32
>  off = qemu_dirent_off(dent);
> +#else
> +/*
> + * Get offset by calling telldir() manually,
> + * as Windows does not have dent->d_off.
> + */
> +off = v9fs_co_telldir(pdu, fidp);
> +#endif
>  v9fs_string_init(&name);
>  v9fs_string_sprintf(&name, "%s", dent->d_name);
>  
> diff --git a/hw/9pfs/codir.c b/hw/9pfs/codir.c
> index 93ba44fb75..2fbe7b831b 100644
> --- a/hw/9pfs/codir.c
> +++ b/hw/9pfs/codir.c
> @@ -78,6 +78,9 @@ static int do_readdir_many(V9fsPDU *pdu, V9fsFidState *fidp,
>  int len, err = 0;
>  int32_t size = 0;
>  off_t saved_dir_pos;
> +#ifdef CONFIG_WIN32
> +off_t next_dir_pos;
> +#endif
>  struct dirent *dent;
>  struct V9fsDirEnt *e = NULL;
>  V9fsPath path;
> @@ -124,6 +127,14 @@ static int do_readdir_many(V9fsPDU *pdu, V9fsFidState 
> *fidp,
>  break;
>  }
>  
> +#ifdef CONFIG_WIN32
> +next_dir_pos = s->ops->telldir(&s->ctx, &fidp->fs);
> +if (next_dir_pos < 0) {
> +err = next_dir_pos;
> +goto out;
> +}
> +#endif
> +
>  /*
>   * stop this loop as soon as it would exceed the allowed maximum
>   * response message size for the directory entries collected so far,
> @@ -168,7 +179,11 @@ static int do_readdir_many(V9fsPDU *pdu, V9fsFidState 
> *fidp,
>  }
>  
>  size += len;
> +#ifndef CONFIG_WIN32
>  saved_dir_pos = qemu_dirent_off(dent);
> +#else
> +saved_dir_pos = next_dir_pos;
> +#endif
>  }
>  
>  /* restore (last) saved position */
> 






Re: [PULL 00/86] pci,pc,virtio: features, tests, fixes, cleanups

2022-11-01 Thread Igor Mammedov
On Mon, 31 Oct 2022 08:50:41 -0400
"Michael S. Tsirkin"  wrote:

> 
> Holiday here tomorrow, so most likely this is it for features for this 
> release.
> 
> The following changes since commit 75d30fde55485b965a1168a21d016dd07b50ed32:
> 
>   Merge tag 'block-pull-request' of https://gitlab.com/stefanha/qemu into 
> staging (2022-10-30 15:07:25 -0400)
> 
> are available in the Git repository at:
> 
>   https://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream
> 
> for you to fetch changes up to 3e624c953b9c37f5aafdf92c16c721818ec8c648:
> 
>   intel-iommu: PASID support (2022-10-31 08:46:50 -0400)
> 
> 
> pci,pc,virtio: features, tests, fixes, cleanups
> 
> lots of acpi rework

expected DSDT tables updates should be updated to include

> first version of biosbits infrastructure
> ASID support in vhost-vdpa
> core_count2 support in smbios

tables added by this test which was merged before above refactoring
to avoid failure (it was masked by 1/86)

or simpler, swap order they are being merged and then just regenerate
tables for this test case only.

> PCIe DOE emulation
> virtio vq reset
> HMAT support
> part of infrastructure for viommu support in vhost-vdpa
> VTD PASID support
> fixes, tests all over the place
> 
> Signed-off-by: Michael S. Tsirkin 




Re: [PATCH v4 1/3] util/main-loop: Fix maximum number of wait objects for win32

2022-11-01 Thread Philippe Mathieu-Daudé

On 19/10/22 12:20, Bin Meng wrote:

From: Bin Meng 

The maximum number of wait objects for win32 should be
MAXIMUM_WAIT_OBJECTS, not MAXIMUM_WAIT_OBJECTS + 1.

Signed-off-by: Bin Meng 
---

Changes in v4:
- make the out of bounds access protection explicit

Changes in v3:
- move the check of adding the same HANDLE twice to a separete patch

Changes in v2:
- fix the logic in qemu_add_wait_object() to avoid adding
   the same HANDLE twice

  util/main-loop.c | 10 +-
  1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/util/main-loop.c b/util/main-loop.c
index f00a25451b..de38876064 100644
--- a/util/main-loop.c
+++ b/util/main-loop.c
@@ -363,10 +363,10 @@ void qemu_del_polling_cb(PollingFunc *func, void *opaque)
  /* Wait objects support */
  typedef struct WaitObjects {
  int num;
-int revents[MAXIMUM_WAIT_OBJECTS + 1];
-HANDLE events[MAXIMUM_WAIT_OBJECTS + 1];
-WaitObjectFunc *func[MAXIMUM_WAIT_OBJECTS + 1];
-void *opaque[MAXIMUM_WAIT_OBJECTS + 1];
+int revents[MAXIMUM_WAIT_OBJECTS];
+HANDLE events[MAXIMUM_WAIT_OBJECTS];
+WaitObjectFunc *func[MAXIMUM_WAIT_OBJECTS];
+void *opaque[MAXIMUM_WAIT_OBJECTS];
  } WaitObjects;
  
  static WaitObjects wait_objects = {0};

@@ -395,7 +395,7 @@ void qemu_del_wait_object(HANDLE handle, WaitObjectFunc 
*func, void *opaque)
  if (w->events[i] == handle) {
  found = 1;
  }
-if (found) {
+if (found && i < (MAXIMUM_WAIT_OBJECTS - 1)) {


Matter of style, I find this form easier to review (same logic than
what follows):

   if (found && i + 1 < MAXIMUM_WAIT_OBJECTS) {


  w->events[i] = w->events[i + 1];
  w->func[i] = w->func[i + 1];
  w->opaque[i] = w->opaque[i + 1];


Reviewed-by: Philippe Mathieu-Daudé 




[PATCH v4] qapi/qmp: Add timestamps to qmp command responses

2022-11-01 Thread Denis Plotnikov
Add "start" & "end" time values to QMP command responses.

These time values are added to let the qemu management layer get the exact
command execution time without any other time variance which might be brought
by other parts of management layer or qemu internals.
This helps to look for problems poactively from the management layer side.
The management layer would be able to detect problem cases by calculating
QMP command execution time:
1. execution_time_from_mgmt_perspective -
   execution_time_of_qmp_command > some_threshold
   This detects problems with management layer or internal qemu QMP command
   dispatching
2. current_qmp_command_execution_time > avg_qmp_command_execution_time
   This detects that a certain QMP command starts to execute longer than
   usual
In both these cases more thorough investigation of the root cases should be
done by using some qemu tracepoints depending on particular QMP command under
investigation or by other means. The timestamps help to avoid excessive log
output when qemu tracepoints are used to address similar cases.

Example of result:

./qemu/scripts/qmp/qmp-shell /tmp/qmp.socket

(QEMU) query-status
{"end": {"seconds": 1650367305, "microseconds": 831032},
 "start": {"seconds": 1650367305, "microseconds": 831012},
 "return": {"status": "running", "singlestep": false, "running": true}}

The response of the QMP command contains the start & end time of
the QMP command processing.

Also, "start" & "end" timestaps are added to qemu guest agent responses as
qemu-ga shares the same code for request dispatching.

Suggested-by: Andrey Ryabinin 
Signed-off-by: Denis Plotnikov 
Reviewed-by: Daniel P. Berrangé 
---
v3->v4
 - rewrite commit message [Markus]
 - use new fileds description in doc [Markus]
 - change type to int64_t [Markus]
 - simplify tests [Markus]

v2->v3:
 - fix typo "timestaps -> timestamps" [Marc-André]

v1->v2:
 - rephrase doc descriptions [Daniel]
 - add tests for qmp timestamps to qmp test and qga test [Daniel]
 - adjust asserts in test-qmp-cmds according to the new number of returning keys

v0->v1:
 - remove interface to control "start" and "end" time values: return timestamps 
unconditionally
 - add description to qmp specification
 - leave the same timestamp format in "seconds", "microseconds" to be 
consistent with events
   timestamp
 - fix patch description

 docs/interop/qmp-spec.txt  | 28 ++--
 qapi/qmp-dispatch.c| 18 ++
 tests/qtest/qmp-test.c | 32 
 tests/unit/test-qga.c  | 29 +
 tests/unit/test-qmp-cmds.c |  4 ++--
 5 files changed, 107 insertions(+), 4 deletions(-)

diff --git a/docs/interop/qmp-spec.txt b/docs/interop/qmp-spec.txt
index b0e8351d5b261..0dd8e716c02f0 100644
--- a/docs/interop/qmp-spec.txt
+++ b/docs/interop/qmp-spec.txt
@@ -158,7 +158,9 @@ responses that have an unknown "id" field.
 
 The format of a success response is:
 
-{ "return": json-value, "id": json-value }
+{ "return": json-value, "id": json-value,
+  "start": {"seconds": json-value, "microseconds": json-value},
+  "end": {"seconds": json-value, "microseconds": json-value} }
 
  Where,
 
@@ -169,13 +171,25 @@ The format of a success response is:
   command does not return data
 - The "id" member contains the transaction identification associated
   with the command execution if issued by the Client
+- The "start" member contains the exact time of when the server
+  started executing the command. This excludes any time the
+  command request spent queued, after reading it off the wire.
+  It is a json-object with the number of seconds and microseconds
+  since the Unix epoch
+- The "end" member contains the exact time of when the server
+  finished executing the command. This excludes any time the
+  command response spent queued, waiting to be sent on the wire.
+  It is a json-object with the number of seconds and microseconds
+  since the Unix epoch
 
 2.4.2 error
 ---
 
 The format of an error response is:
 
-{ "error": { "class": json-string, "desc": json-string }, "id": json-value }
+{ "error": { "class": json-string, "desc": json-string }, "id": json-value
+  "start": {"seconds": json-value, "microseconds": json-value},
+  "end": {"seconds": json-value, "microseconds": json-value} }
 
  Where,
 
@@ -184,6 +198,16 @@ The format of an error response is:
   not attempt to parse this message.
 - The "id" member contains the transaction identification associated with
   the command execution if issued by the Client
+- The "start" member contains the exact time of when the server
+  started executing the command. This excludes any time the
+  command request spent queued, after reading it off the wire.
+  It is a json-object with the number of seconds and microseconds
+  since the Unix epoch
+- The "end" member contains the exact time of when the server
+  finished executing the command. This excludes any time the
+  command

[PATCH v9 06/17] eepro100: Omit errp for pci_add_capability

2022-11-01 Thread Akihiko Odaki
Omitting errp for pci_add_capability() causes it to abort if
capabilities overlap. This behavior is appropriate here because all of
the capabilities set in this device are defined in the program and
their overlap should not happen unless there is a programming error.

Signed-off-by: Akihiko Odaki 
---
 hw/net/eepro100.c | 7 +--
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/hw/net/eepro100.c b/hw/net/eepro100.c
index 679f52f80f..bf2ecdded9 100644
--- a/hw/net/eepro100.c
+++ b/hw/net/eepro100.c
@@ -549,12 +549,7 @@ static void e100_pci_reset(EEPRO100State *s, Error **errp)
 if (info->power_management) {
 /* Power Management Capabilities */
 int cfg_offset = 0xdc;
-int r = pci_add_capability(&s->dev, PCI_CAP_ID_PM,
-   cfg_offset, PCI_PM_SIZEOF,
-   errp);
-if (r < 0) {
-return;
-}
+pci_add_capability(&s->dev, PCI_CAP_ID_PM, cfg_offset, PCI_PM_SIZEOF);
 
 pci_set_word(pci_conf + cfg_offset + PCI_PM_PMC, 0x7e21);
 #if 0 /* TODO: replace dummy code for power management emulation. */
-- 
2.38.1




[PATCH v9 16/17] virtio-pci: Omit errp for pci_add_capability

2022-11-01 Thread Akihiko Odaki
Omitting errp for pci_add_capability() causes it to abort if
capabilities overlap. This behavior is appropriate here because all of
the capabilities set in this device are defined in the program and
their overlap should not happen unless there is a programming error.

Signed-off-by: Akihiko Odaki 
---
 include/hw/virtio/virtio-pci.h | 2 +-
 hw/virtio/virtio-pci.c | 9 ++---
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/include/hw/virtio/virtio-pci.h b/include/hw/virtio/virtio-pci.h
index 2446dcd9ae..9f3736723c 100644
--- a/include/hw/virtio/virtio-pci.h
+++ b/include/hw/virtio/virtio-pci.h
@@ -141,7 +141,7 @@ struct VirtIOPCIProxy {
 uint32_t msix_bar_idx;
 uint32_t modern_io_bar_idx;
 uint32_t modern_mem_bar_idx;
-int config_cap;
+uint8_t config_cap;
 uint32_t flags;
 bool disable_modern;
 bool ignore_backend_features;
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index c37bdc77ea..b393ff01be 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1154,8 +1154,7 @@ static int virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy,
 PCIDevice *dev = &proxy->pci_dev;
 int offset;
 
-offset = pci_add_capability(dev, PCI_CAP_ID_VNDR, 0,
-cap->cap_len, &error_abort);
+offset = pci_add_capability(dev, PCI_CAP_ID_VNDR, 0, cap->cap_len);
 
 assert(cap->cap_len >= sizeof *cap);
 memcpy(dev->config + offset + PCI_CAP_FLAGS, &cap->cap_len,
@@ -1864,11 +1863,7 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error 
**errp)
 
 pcie_endpoint_cap_init(pci_dev, 0);
 
-pos = pci_add_capability(pci_dev, PCI_CAP_ID_PM, 0,
- PCI_PM_SIZEOF, errp);
-if (pos < 0) {
-return;
-}
+pos = pci_add_capability(pci_dev, PCI_CAP_ID_PM, 0, PCI_PM_SIZEOF);
 
 pci_dev->exp.pm_cap = pos;
 
-- 
2.38.1




[PULL v3 for 7.2 00/31] testing and plugin updates

2022-11-01 Thread Alex Bennée
The following changes since commit 5107fd3effb1cfec3b96d9e819f1605048640e31:

  net/vhost-vdpa.c: Fix clang compilation failure (2022-10-31 13:01:31 -0400)

are available in the Git repository at:

  https://github.com/stsquad/qemu.git tags/pull-testing-for-7.2-011122-3

for you to fetch changes up to 339bf0c071eff5e6ff1d9ddb3ad5cd02e4cd9ca3:

  tests/vm: use -o IdentitiesOnly=yes for ssh (2022-10-31 20:37:59 +)


testing and plugin updates for 7.2:

  - cleanup win32/64 docker files
  - update test-mingw test
  - add flex/bison to debian-all-test
  - handle --enable-static/--disable-pie in config
  - extend timeouts on x86_64 avocado tests
  - add flex/bison to debian-hexagon-cross
  - use regular semihosting for nios2 check-tcg
  - fix obscure linker error to nios2 softmmu tests
  - various windows portability fixes for tests
  - clean-up of MAINTAINERS
  - use -machine none when appropriate in avocado
  - make raspi2_initrd test detect shutdown
  - disable sh4 rd2 tests on gitlab
  - re-enable threadcount/linux-test for sh4
  - clean-up s390x handling of "ex" instruction
  - better handle new CPUs in execlog plugin
  - pass CONFIG_DEBUG_TCG to plugin builds
  - try and avoid races in test-io-channel-command
  - speed up ssh key checking for tests/vm


Alex Bennée (21):
  tests/lcitool: Rename non-Debian specific helper
  tests/docker: update fedora-win[32|64]-cross with lcitool
  tests/lcitool: Refresh to latest libvirt-ci module
  tests/docker: update test-mingw to run single build
  configure: don't enable cross compilers unless in target_list
  configure: fix the --enable-static --disable-pie case
  tests/avocado: extend the timeout for x86_64 tcg tests
  tests/tcg: use regular semihosting for nios2-softmmu
  MAINTAINERS: add entries for the key build bits
  MAINTAINERS: add features_to_c.sh to gdbstub files
  MAINTAINERS: fix-up for check-tcg Makefile changes
  tests/avocado: set -machine none for userfwd and vnc tests
  tests/avocado: disable sh4 rd2 tests on Gitlab
  tests/tcg: re-enable linux-test for sh4
  tests/tcg: re-enable threadcount for sh4
  target/s390x: don't use ld_code2 to probe next pc
  target/s390x: don't probe next pc for EXecuted insns
  target/s390x: fake instruction loading when handling 'ex'
  contrib/plugins: enable debug on CONFIG_DEBUG_TCG
  contrib/plugins: protect execlog's last_exec expansion
  tests/unit: cleanups for test-io-channel-command

Anton Johansson (2):
  tests/docker: Add flex/bison to `debian-all-test`
  tests/docker: Add flex/bison to `debian-hexagon-cross`

Bin Meng (4):
  semihosting/arm-compat-semi: Avoid using hardcoded /tmp
  tcg: Avoid using hardcoded /tmp
  block/vvfat: Unify the mkdir() call
  hw/usb: dev-mtp: Use g_mkdir()

Ilya Leoshkevich (1):
  tests/vm: use -o IdentitiesOnly=yes for ssh

Paolo Bonzini (1):
  tests/tcg: include CONFIG_PLUGIN in config-host.mak

Peter Maydell (1):
  tests/avocado: raspi2_initrd: Wait for guest shutdown message before 
stopping

Richard Henderson (1):
  tests/tcg/nios2: Tweak 10m50-ghrd.ld

 configure  |  17 ++-
 include/exec/translator.h  |  17 +++
 block/vvfat.c  |   9 +-
 contrib/plugins/execlog.c  |  38 --
 hw/usb/dev-mtp.c   |   4 +-
 semihosting/arm-compat-semi.c  |   3 +-
 target/s390x/tcg/translate.c   |  14 ++-
 tcg/tcg.c  |   3 +-
 tests/unit/test-io-channel-command.c   |  45 ---
 MAINTAINERS|  29 -
 contrib/plugins/Makefile   |   1 +
 tests/avocado/boot_linux.py|   1 +
 tests/avocado/boot_linux_console.py|   7 +-
 tests/avocado/info_usernet.py  |   3 +
 tests/avocado/vnc.py   |   1 +
 .../dockerfiles/debian-all-test-cross.docker   |   2 +
 .../docker/dockerfiles/debian-hexagon-cross.docker |   2 +-
 tests/docker/dockerfiles/fedora-win32-cross.docker | 139 +++--
 tests/docker/dockerfiles/fedora-win64-cross.docker | 138 ++--
 tests/docker/test-mingw|  16 +--
 tests/lcitool/libvirt-ci   |   2 +-
 tests/lcitool/refresh  |  48 ---
 tests/tcg/nios2/10m50-ghrd.ld  |  14 ++-
 tests/tcg/nios2/Makefile.softmmu-target|   3 +-
 tests/tcg/sh4/Makefile.target  |  12 --
 tests/vm/basevm.py |   3 +-
 26 files changed, 396 insertions(+), 175 deletions(-)

-- 
2.34.

Re: [PULL 21/86] bios-tables-test: add test for number of cores > 255

2022-11-01 Thread Ani Sinha
On Tue, Nov 1, 2022 at 19:22 Jonathan Cameron 
wrote:

> On Mon, 31 Oct 2022 08:51:44 -0400
> "Michael S. Tsirkin"  wrote:
>
> > From: Julia Suvorova 
> >
> > The new test is run with a large number of cpus and checks if the
> > core_count field in smbios_cpu_test (structure type 4) is correct.
> >
> > Choose q35 as it allows to run with -smp > 255.
>
> Getting a failure on this on i386.
>
> qemu-system-i386: current -smp configuration requires kernel irqchip and
> X2APIC API support.
>
> Note that was on bisection of this pull request applied to current mainline
> (also in the CI report for the HMAT set - though there is another issue
> there.)


Can you point me to the CI report?


>
> My guess is fix is don't run it unless 64 bit?
>
> Jonathan
>
>
> >
> > Signed-off-by: Julia Suvorova 
> > Message-Id: <20220731162141.178443-5-jus...@redhat.com>
> > Message-Id: <2022101731.101412-5-jus...@redhat.com>
> > Reviewed-by: Michael S. Tsirkin 
> > Signed-off-by: Michael S. Tsirkin 
> > Reviewed-by: Igor Mammedov 
> > ---
> >  tests/qtest/bios-tables-test.c | 58 ++
> >  1 file changed, 45 insertions(+), 13 deletions(-)
> >
> > diff --git a/tests/qtest/bios-tables-test.c
> b/tests/qtest/bios-tables-test.c
> > index d4fbe6791d..e402b57d46 100644
> > --- a/tests/qtest/bios-tables-test.c
> > +++ b/tests/qtest/bios-tables-test.c
> > @@ -92,6 +92,8 @@ typedef struct {
> >  SmbiosEntryPoint smbios_ep_table;
> >  uint16_t smbios_cpu_max_speed;
> >  uint16_t smbios_cpu_curr_speed;
> > +uint8_t smbios_core_count;
> > +uint16_t smbios_core_count2;
> >  uint8_t *required_struct_types;
> >  int required_struct_types_len;
> >  QTestState *qts;
> > @@ -631,29 +633,42 @@ static inline bool smbios_single_instance(uint8_t
> type)
> >  }
> >  }
> >
> > -static bool smbios_cpu_test(test_data *data, uint32_t addr)
> > +static void smbios_cpu_test(test_data *data, uint32_t addr,
> > +SmbiosEntryPointType ep_type)
> >  {
> > -uint16_t expect_speed[2];
> > -uint16_t real;
> > +uint8_t core_count, expected_core_count = data->smbios_core_count;
> > +uint16_t speed, expected_speed[2];
> > +uint16_t core_count2, expected_core_count2 =
> data->smbios_core_count2;
> >  int offset[2];
> >  int i;
> >
> >  /* Check CPU speed for backward compatibility */
> >  offset[0] = offsetof(struct smbios_type_4, max_speed);
> >  offset[1] = offsetof(struct smbios_type_4, current_speed);
> > -expect_speed[0] = data->smbios_cpu_max_speed ? : 2000;
> > -expect_speed[1] = data->smbios_cpu_curr_speed ? : 2000;
> > +expected_speed[0] = data->smbios_cpu_max_speed ? : 2000;
> > +expected_speed[1] = data->smbios_cpu_curr_speed ? : 2000;
> >
> >  for (i = 0; i < 2; i++) {
> > -real = qtest_readw(data->qts, addr + offset[i]);
> > -if (real != expect_speed[i]) {
> > -fprintf(stderr, "Unexpected SMBIOS CPU speed: real %u
> expect %u\n",
> > -real, expect_speed[i]);
> > -return false;
> > -}
> > +speed = qtest_readw(data->qts, addr + offset[i]);
> > +g_assert_cmpuint(speed, ==, expected_speed[i]);
> >  }
> >
> > -return true;
> > +core_count = qtest_readb(data->qts,
> > +addr + offsetof(struct smbios_type_4, core_count));
> > +
> > +if (expected_core_count) {
> > +g_assert_cmpuint(core_count, ==, expected_core_count);
> > +}
> > +
> > +if (ep_type == SMBIOS_ENTRY_POINT_TYPE_64) {
> > +core_count2 = qtest_readw(data->qts,
> > +  addr + offsetof(struct smbios_type_4,
> core_count2));
> > +
> > +/* Core Count has reached its limit, checking Core Count 2 */
> > +if (expected_core_count == 0xFF && expected_core_count2) {
> > +g_assert_cmpuint(core_count2, ==, expected_core_count2);
> > +}
> > +}
> >  }
> >
> >  static void test_smbios_structs(test_data *data, SmbiosEntryPointType
> ep_type)
> > @@ -686,7 +701,7 @@ static void test_smbios_structs(test_data *data,
> SmbiosEntryPointType ep_type)
> >  set_bit(type, struct_bitmap);
> >
> >  if (type == 4) {
> > -g_assert(smbios_cpu_test(data, addr));
> > +smbios_cpu_test(data, addr, ep_type);
> >  }
> >
> >  /* seek to end of unformatted string area of this struct
> ("\0\0") */
> > @@ -908,6 +923,21 @@ static void test_acpi_q35_tcg(void)
> >  free_test_data(&data);
> >  }
> >
> > +static void test_acpi_q35_tcg_core_count2(void)
> > +{
> > +test_data data = {
> > +.machine = MACHINE_Q35,
> > +.variant = ".core-count2",
> > +.required_struct_types = base_required_struct_types,
> > +.required_struct_types_len =
> ARRAY_SIZE(base_required_struct_types),
> > +.smbios_core_count = 0xFF,
> > +.smbios_core_count2 = 275,
> > +};
> > +
> > +test_acpi_one("-m

[PATCH v8 06/17] eepro100: Omit errp for pci_add_capability

2022-11-01 Thread Akihiko Odaki
Omitting errp for pci_add_capability() causes it to abort if
capabilities overlap. This behavior is appropriate here because all of
the capabilities set in this device are defined in the program and
their overlap should not happen unless there is a programming error.

Signed-off-by: Akihiko Odaki 
---
 hw/net/eepro100.c | 7 +--
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/hw/net/eepro100.c b/hw/net/eepro100.c
index 679f52f80f..bf2ecdded9 100644
--- a/hw/net/eepro100.c
+++ b/hw/net/eepro100.c
@@ -549,12 +549,7 @@ static void e100_pci_reset(EEPRO100State *s, Error **errp)
 if (info->power_management) {
 /* Power Management Capabilities */
 int cfg_offset = 0xdc;
-int r = pci_add_capability(&s->dev, PCI_CAP_ID_PM,
-   cfg_offset, PCI_PM_SIZEOF,
-   errp);
-if (r < 0) {
-return;
-}
+pci_add_capability(&s->dev, PCI_CAP_ID_PM, cfg_offset, PCI_PM_SIZEOF);
 
 pci_set_word(pci_conf + cfg_offset + PCI_PM_PMC, 0x7e21);
 #if 0 /* TODO: replace dummy code for power management emulation. */
-- 
2.38.1




Re: [PATCH 11/16] hw/9pfs: Add Linux error number definition

2022-11-01 Thread Christian Schoenebeck
On Monday, October 24, 2022 6:57:54 AM CET Bin Meng wrote:
> From: Guohuai Shi 
> 
> When using 9p2000.L protocol, the errno should use the Linux errno.
> Currently magic numbers with comments are used. Replace these with
> macros for future expansion.
> 
> Signed-off-by: Guohuai Shi 
> Signed-off-by: Bin Meng 
> ---
> 
>  hw/9pfs/9p-linux-errno.h | 151 +++
>  hw/9pfs/9p-util.h|  38 ++
>  2 files changed, 176 insertions(+), 13 deletions(-)
>  create mode 100644 hw/9pfs/9p-linux-errno.h
> 
> diff --git a/hw/9pfs/9p-linux-errno.h b/hw/9pfs/9p-linux-errno.h
> new file mode 100644
> index 00..56c37fa293
> --- /dev/null
> +++ b/hw/9pfs/9p-linux-errno.h
> @@ -0,0 +1,151 @@
> +/*
> + * 9p Linux errno translation definition
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + */
> +
> +#include 
> +
> +#ifndef QEMU_9P_LINUX_ERRNO_H
> +#define QEMU_9P_LINUX_ERRNO_H
> +
> +/*
> + * This file contains the Linux errno definitions to translate errnos set by
> + * the 9P server (running on non-Linux hosts) to a corresponding errno value.
> + *
> + * This list should be periodically reviewed and updated; particularly for
> + * errnos that might be set as a result of a file system operation.
> + */

Yeah, that's my main concern here. I wonder if there is isn't a better
maintainable solution at least for the list of Linux errors, so that we don't
have to manually update the L_ macros below.

> +
> +#define L_EPERM 1   /* Operation not permitted */
> +#define L_ENOENT2   /* No such file or directory */
> +#define L_ESRCH 3   /* No such process */
> +#define L_EINTR 4   /* Interrupted system call */
> +#define L_EIO   5   /* I/O error */
> +#define L_ENXIO 6   /* No such device or address */
> +#define L_E2BIG 7   /* Argument list too long */
> +#define L_ENOEXEC   8   /* Exec format error */
> +#define L_EBADF 9   /* Bad file number */
> +#define L_ECHILD10  /* No child processes */
> +#define L_EAGAIN11  /* Try again */
> +#define L_ENOMEM12  /* Out of memory */
> +#define L_EACCES13  /* Permission denied */
> +#define L_EFAULT14  /* Bad address */
> +#define L_ENOTBLK   15  /* Block device required */
> +#define L_EBUSY 16  /* Device or resource busy */
> +#define L_EEXIST17  /* File exists */
> +#define L_EXDEV 18  /* Cross-device link */
> +#define L_ENODEV19  /* No such device */
> +#define L_ENOTDIR   20  /* Not a directory */
> +#define L_EISDIR21  /* Is a directory */
> +#define L_EINVAL22  /* Invalid argument */
> +#define L_ENFILE23  /* File table overflow */
> +#define L_EMFILE24  /* Too many open files */
> +#define L_ENOTTY25  /* Not a typewriter */
> +#define L_ETXTBSY   26  /* Text file busy */
> +#define L_EFBIG 27  /* File too large */
> +#define L_ENOSPC28  /* No space left on device */
> +#define L_ESPIPE29  /* Illegal seek */
> +#define L_EROFS 30  /* Read-only file system */
> +#define L_EMLINK31  /* Too many links */
> +#define L_EPIPE 32  /* Broken pipe */
> +#define L_EDOM  33  /* Math argument out of domain of func */
> +#define L_ERANGE34  /* Math result not representable */
> +#define L_EDEADLK   35  /* Resource deadlock would occur */
> +#define L_ENAMETOOLONG  36  /* File name too long */
> +#define L_ENOLCK37  /* No record locks available */
> +#define L_ENOSYS38  /* Function not implemented */
> +#define L_ENOTEMPTY 39  /* Directory not empty */
> +#define L_ELOOP 40  /* Too many symbolic links encountered */
> +#define L_ENOMSG42  /* No message of desired type */
> +#define L_EIDRM 43  /* Identifier removed */
> +#define L_ECHRNG44  /* Channel number out of range */
> +#define L_EL2NSYNC  45  /* Level 2 not synchronized */
> +#define L_EL3HLT46  /* Level 3 halted */
> +#define L_EL3RST47  /* Level 3 reset */
> +#define L_ELNRNG48  /* Link number out of range */
> +#define L_EUNATCH   49  /* Protocol driver not attached */
> +#define L_ENOCSI50  /* No CSI structure available */
> +#define L_EL2HLT51  /* Level 2 halted */
> +#define L_EBADE 52  /* Invalid exchange */
> +#define L_EBADR 53  /* Invalid request descriptor */
> +#define L_EXFULL54  /* Exchange full */
> +#define L_ENOANO55  /* No anode */
> +#define L_EBADRQC   56  /* Invalid request code */
> +#define L_EBADSLT   57  /* Invalid slot */
> +#define L_EBFONT58 

[PATCH v8 09/17] hw/pci/pci_bridge: Omit errp for pci_add_capability

2022-11-01 Thread Akihiko Odaki
Omitting errp for pci_add_capability() causes it to abort if
capabilities overlap. A caller of pci_bridge_ssvid_init(), which calls
pci_add_capability() in turn, is expected to ensure that will not
happen.

Signed-off-by: Akihiko Odaki 
---
 hw/pci-bridge/i82801b11.c  | 14 ++
 hw/pci-bridge/pcie_root_port.c |  7 +--
 hw/pci-bridge/xio3130_downstream.c |  8 ++--
 hw/pci-bridge/xio3130_upstream.c   |  8 ++--
 hw/pci/pci_bridge.c| 21 ++---
 include/hw/pci/pci_bridge.h|  5 ++---
 6 files changed, 15 insertions(+), 48 deletions(-)

diff --git a/hw/pci-bridge/i82801b11.c b/hw/pci-bridge/i82801b11.c
index f28181e210..f45dcdbacc 100644
--- a/hw/pci-bridge/i82801b11.c
+++ b/hw/pci-bridge/i82801b11.c
@@ -61,21 +61,11 @@ typedef struct I82801b11Bridge {
 
 static void i82801b11_bridge_realize(PCIDevice *d, Error **errp)
 {
-int rc;
-
 pci_bridge_initfn(d, TYPE_PCI_BUS);
 
-rc = pci_bridge_ssvid_init(d, I82801ba_SSVID_OFFSET,
-   I82801ba_SSVID_SVID, I82801ba_SSVID_SSID,
-   errp);
-if (rc < 0) {
-goto err_bridge;
-}
+pci_bridge_ssvid_init(d, I82801ba_SSVID_OFFSET,
+  I82801ba_SSVID_SVID, I82801ba_SSVID_SSID);
 pci_config_set_prog_interface(d->config, PCI_CLASS_BRIDGE_PCI_INF_SUB);
-return;
-
-err_bridge:
-pci_bridge_exitfn(d);
 }
 
 static const VMStateDescription i82801b11_bridge_dev_vmstate = {
diff --git a/hw/pci-bridge/pcie_root_port.c b/hw/pci-bridge/pcie_root_port.c
index 460e48269d..a9d8c2adb4 100644
--- a/hw/pci-bridge/pcie_root_port.c
+++ b/hw/pci-bridge/pcie_root_port.c
@@ -74,12 +74,7 @@ static void rp_realize(PCIDevice *d, Error **errp)
 }
 pcie_port_init_reg(d);
 
-rc = pci_bridge_ssvid_init(d, rpc->ssvid_offset, dc->vendor_id,
-   rpc->ssid, errp);
-if (rc < 0) {
-error_append_hint(errp, "Can't init SSV ID, error %d\n", rc);
-goto err_bridge;
-}
+pci_bridge_ssvid_init(d, rpc->ssvid_offset, dc->vendor_id, rpc->ssid);
 
 if (rpc->interrupts_init) {
 rc = rpc->interrupts_init(d, errp);
diff --git a/hw/pci-bridge/xio3130_downstream.c 
b/hw/pci-bridge/xio3130_downstream.c
index 05e2b06c0c..eea3d3a2df 100644
--- a/hw/pci-bridge/xio3130_downstream.c
+++ b/hw/pci-bridge/xio3130_downstream.c
@@ -81,12 +81,8 @@ static void xio3130_downstream_realize(PCIDevice *d, Error 
**errp)
 goto err_bridge;
 }
 
-rc = pci_bridge_ssvid_init(d, XIO3130_SSVID_OFFSET,
-   XIO3130_SSVID_SVID, XIO3130_SSVID_SSID,
-   errp);
-if (rc < 0) {
-goto err_msi;
-}
+pci_bridge_ssvid_init(d, XIO3130_SSVID_OFFSET,
+  XIO3130_SSVID_SVID, XIO3130_SSVID_SSID);
 
 rc = pcie_cap_init(d, XIO3130_EXP_OFFSET, PCI_EXP_TYPE_DOWNSTREAM,
p->port, errp);
diff --git a/hw/pci-bridge/xio3130_upstream.c b/hw/pci-bridge/xio3130_upstream.c
index 5ff46ef050..d954906d79 100644
--- a/hw/pci-bridge/xio3130_upstream.c
+++ b/hw/pci-bridge/xio3130_upstream.c
@@ -71,12 +71,8 @@ static void xio3130_upstream_realize(PCIDevice *d, Error 
**errp)
 goto err_bridge;
 }
 
-rc = pci_bridge_ssvid_init(d, XIO3130_SSVID_OFFSET,
-   XIO3130_SSVID_SVID, XIO3130_SSVID_SSID,
-   errp);
-if (rc < 0) {
-goto err_msi;
-}
+pci_bridge_ssvid_init(d, XIO3130_SSVID_OFFSET,
+  XIO3130_SSVID_SVID, XIO3130_SSVID_SSID);
 
 rc = pcie_cap_init(d, XIO3130_EXP_OFFSET, PCI_EXP_TYPE_UPSTREAM,
p->port, errp);
diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
index da34c8ebcd..30032fed64 100644
--- a/hw/pci/pci_bridge.c
+++ b/hw/pci/pci_bridge.c
@@ -42,21 +42,15 @@
 #define PCI_SSVID_SVID  4
 #define PCI_SSVID_SSID  6
 
-int pci_bridge_ssvid_init(PCIDevice *dev, uint8_t offset,
-  uint16_t svid, uint16_t ssid,
-  Error **errp)
+void pci_bridge_ssvid_init(PCIDevice *dev, uint8_t offset,
+   uint16_t svid, uint16_t ssid)
 {
-int pos;
+uint8_t pos;
 
-pos = pci_add_capability(dev, PCI_CAP_ID_SSVID, offset,
- PCI_SSVID_SIZEOF, errp);
-if (pos < 0) {
-return pos;
-}
+pos = pci_add_capability(dev, PCI_CAP_ID_SSVID, offset, PCI_SSVID_SIZEOF);
 
 pci_set_word(dev->config + pos + PCI_SSVID_SVID, svid);
 pci_set_word(dev->config + pos + PCI_SSVID_SSID, ssid);
-return pos;
 }
 
 /* Accessor function to get parent bridge device from pci bus. */
@@ -455,11 +449,8 @@ int pci_bridge_qemu_reserve_cap_init(PCIDevice *dev, int 
cap_offset,
 .mem_pref_64 = cpu_to_le64(res_reserve.mem_pref_64)
 };
 
-int offset = pci_add_capability(dev, PCI_CAP_ID_VNDR,
- 

Re: [PATCH] migration: check magic value for deciding the mapping of channels

2022-11-01 Thread manish.mishra

Sorry for late patch on this. I mentioned i will send it last week itself, but 
later reliased it was festival week in India, so was mostly holidays.

Thanks

Manish Mishra

On 01/11/22 8:00 pm, manish.mishra wrote:

Current logic assumes that channel connections on the destination side are
always established in the same order as the source and the first one will
always be the default channel followed by the multifid or post-copy
preemption channel. This may not be always true, as even if a channel has a
connection established on the source side it can be in the pending state on
the destination side and a newer connection can be established first.
Basically causing out of order mapping of channels on the destination side.
Currently, all channels except post-copy preempt send a magic number, this
patch uses that magic number to decide the type of channel. This logic is
applicable only for precopy(multifd) live migration, as mentioned, the
post-copy preempt channel does not send any magic number. Also, this patch
uses MSG_PEEK to check the magic number of channels so that current
data/control stream management remains un-effected.

Signed-off-by: manish.mishra
---
  include/io/channel.h | 25 +
  io/channel-socket.c  | 27 +++
  io/channel.c | 39 +++
  migration/migration.c| 33 +
  migration/multifd.c  | 12 
  migration/multifd.h  |  2 +-
  migration/postcopy-ram.c |  5 +
  migration/postcopy-ram.h |  2 +-
  8 files changed, 119 insertions(+), 26 deletions(-)

diff --git a/include/io/channel.h b/include/io/channel.h
index c680ee7480..74177aeeea 100644
--- a/include/io/channel.h
+++ b/include/io/channel.h
@@ -115,6 +115,10 @@ struct QIOChannelClass {
  int **fds,
  size_t *nfds,
  Error **errp);
+ssize_t (*io_read_peek)(QIOChannel *ioc,
+void *buf,
+size_t nbytes,
+Error **errp);
  int (*io_close)(QIOChannel *ioc,
  Error **errp);
  GSource * (*io_create_watch)(QIOChannel *ioc,
@@ -475,6 +479,27 @@ int qio_channel_write_all(QIOChannel *ioc,
size_t buflen,
Error **errp);
  
+/**

+ * qio_channel_read_peek_all:
+ * @ioc: the channel object
+ * @buf: the memory region to read in data
+ * @nbytes: the number of bytes to read
+ * @errp: pointer to a NULL-initialized error object
+ *
+ * Read given @nbytes data from peek of channel into
+ * memory region @buf.
+ *
+ * The function will be blocked until read size is
+ * equal to requested size.
+ *
+ * Returns: 1 if all bytes were read, 0 if end-of-file
+ *  occurs without data, or -1 on error
+ */
+int qio_channel_read_peek_all(QIOChannel *ioc,
+  void* buf,
+  size_t nbytes,
+  Error **errp);
+
  /**
   * qio_channel_set_blocking:
   * @ioc: the channel object
diff --git a/io/channel-socket.c b/io/channel-socket.c
index b76dca9cc1..b99f5dfda6 100644
--- a/io/channel-socket.c
+++ b/io/channel-socket.c
@@ -705,6 +705,32 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
  }
  #endif /* WIN32 */
  
+static ssize_t qio_channel_socket_read_peek(QIOChannel *ioc,

+void *buf,
+size_t nbytes,
+Error **errp)
+{
+QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
+ssize_t bytes = 0;
+
+retry:
+bytes = recv(sioc->fd, buf, nbytes, MSG_PEEK);
+
+if (bytes < 0) {
+if (errno == EINTR) {
+goto retry;
+}
+if (errno == EAGAIN) {
+return QIO_CHANNEL_ERR_BLOCK;
+}
+
+error_setg_errno(errp, errno,
+ "Unable to read from peek of socket");
+return -1;
+}
+
+return bytes;
+}
  
  #ifdef QEMU_MSG_ZEROCOPY

  static int qio_channel_socket_flush(QIOChannel *ioc,
@@ -902,6 +928,7 @@ static void qio_channel_socket_class_init(ObjectClass 
*klass,
  
  ioc_klass->io_writev = qio_channel_socket_writev;

  ioc_klass->io_readv = qio_channel_socket_readv;
+ioc_klass->io_read_peek = qio_channel_socket_read_peek;
  ioc_klass->io_set_blocking = qio_channel_socket_set_blocking;
  ioc_klass->io_close = qio_channel_socket_close;
  ioc_klass->io_shutdown = qio_channel_socket_shutdown;
diff --git a/io/channel.c b/io/channel.c
index 0640941ac5..a2d9b96f3f 100644
--- a/io/channel.c
+++ b/io/channel.c
@@ -346,6 +346,45 @@ int qio_channel_write_all(QIOChannel *ioc,
  return qio_channel_writev_all(ioc, &iov, 1, errp);
  }
  
+int qio_channel_read_peek_all(QIOChannel *ioc,

+  void* buf,
+   

[PATCH v8 13/17] pci/slotid: Omit errp for pci_add_capability

2022-11-01 Thread Akihiko Odaki
Omitting errp for pci_add_capability() causes it to abort if
capabilities overlap. A caller of slotid_cap_init(), which calls
pci_add_capability() in turn, is expected to ensure that will not
happen.

Signed-off-by: Akihiko Odaki 
---
 hw/pci/slotid_cap.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/hw/pci/slotid_cap.c b/hw/pci/slotid_cap.c
index 36d021b4a6..5da8c82133 100644
--- a/hw/pci/slotid_cap.c
+++ b/hw/pci/slotid_cap.c
@@ -12,7 +12,7 @@ int slotid_cap_init(PCIDevice *d, int nslots,
 unsigned offset,
 Error **errp)
 {
-int cap;
+uint8_t cap;
 
 if (!chassis) {
 error_setg(errp, "Bridge chassis not specified. Each bridge is 
required"
@@ -24,11 +24,7 @@ int slotid_cap_init(PCIDevice *d, int nslots,
 return -EINVAL;
 }
 
-cap = pci_add_capability(d, PCI_CAP_ID_SLOTID, offset,
- SLOTID_CAP_LENGTH, errp);
-if (cap < 0) {
-return cap;
-}
+cap = pci_add_capability(d, PCI_CAP_ID_SLOTID, offset, SLOTID_CAP_LENGTH);
 /* We make each chassis unique, this way each bridge is First in Chassis */
 d->config[cap + PCI_SID_ESR] = PCI_SID_ESR_FIC |
 (nslots << SLOTID_NSLOTS_SHIFT);
-- 
2.38.1




Re: [PATCH v8 01/17] hw/vfio/pci: Ensure MSI and MSI-X do not overlap

2022-11-01 Thread Philippe Mathieu-Daudé

On 1/11/22 14:57, Akihiko Odaki wrote:

pci_add_capability() checks whether capabilities overlap, and notifies
its caller so that it can properly handle the case. However, in the
most cases, the capabilities actually never overlap, and the interface
incurred extra error handling code, which is often incorrect or
suboptimal. For such cases, pci_add_capability() can simply abort the
execution if the capabilities actually overlap since it should be a
programming error.

This change handles the other cases: hw/vfio/pci depends on the check to
decide MSI and MSI-X capabilities overlap with another. As they are
quite an exceptional and hw/vfio/pci knows much about PCI capabilities,
adding code specific to the cases to hw/vfio/pci still results in less
code than having error handling code everywhere in total.

Signed-off-by: Akihiko Odaki 
---
  hw/pci/pci.c | 34 ++
  hw/vfio/pci.c| 15 ++-
  include/hw/pci/pci.h |  3 +++
  3 files changed, 39 insertions(+), 13 deletions(-)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 2f450f6a72..b53649d1fd 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2512,6 +2512,25 @@ static void pci_del_option_rom(PCIDevice *pdev)
  pdev->has_rom = false;
  }
  
+bool pci_check_capability_overlap(PCIDevice *pdev, uint8_t cap_id,

+  uint8_t offset, uint8_t size, Error **errp)
+{
+int i;
+
+for (i = offset; i < offset + size; i++) {
+if (pdev->used[i]) {
+error_setg(errp,
+   "%s:%02x:%02x.%x PCI capability %x at offset %x overlaps 
existing capability %x at offset %x",
+   pci_root_bus_path(pdev), pci_dev_bus_num(pdev),
+   PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
+   cap_id, offset, pci_find_capability_at_offset(pdev, i), 
i);
+return true;
+}
+}
+
+return false;
+}


I apologize for jumping at v8 :/

Per the Error API, function taking an Error** as last argument should 
return TRUE on success; or FALSE on error and setting the *errp argument.


Your function return 'true' on error. The confusion might come from its
name 'pci_check_capability_overlap'.

> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> index b54b6ef88f..77b264c17e 100644
> --- a/include/hw/pci/pci.h
> +++ b/include/hw/pci/pci.h
> @@ -390,6 +390,9 @@ void pci_register_vga(PCIDevice *pci_dev, 
MemoryRegion *mem,

>   void pci_unregister_vga(PCIDevice *pci_dev);
>   pcibus_t pci_get_bar_addr(PCIDevice *pci_dev, int region_num);
>

Please document function prototype of public APIs.

> +bool pci_check_capability_overlap(PCIDevice *pdev, uint8_t cap_id,
> +  uint8_t offset, uint8_t size, 
Error **errp);

> +
>   int pci_add_capability(PCIDevice *pdev, uint8_t cap_id,
>  uint8_t offset, uint8_t size,
>  Error **errp);

Also, consider configuring scripts/git.orderfile :)

Regards,

Phil.



Re: [PATCH v5 0/6] ppc/e500: Add support for two types of flash, cleanup

2022-11-01 Thread Bernhard Beschow
Am 1. November 2022 10:41:51 UTC schrieb Bernhard Beschow :
>On Mon, Oct 31, 2022 at 12:54 PM Philippe Mathieu-Daudé 
>wrote:
>
>> This is a respin of Bernhard's v4 with Freescale eSDHC implemented
>> as an 'UNIMP' region. See v4 cover here:
>>
>> https://lore.kernel.org/qemu-devel/20221018210146.193159-1-shen...@gmail.com/
>>
>> Only tested with the ppce500 machine (no further regression testing).
>>
>> Since v4:
>> - Do not rename ESDHC_* definitions to USDHC_*
>> - Do not modify SDHCIState structure
>>
>
>Works beautifully, both for the buildroot load and for my proprietary load.
>So:
>Tested-by: Bernhard Beschow
>
>>
>> Bernhard Beschow (4):
>>   hw/block/pflash_cfi0{1, 2}: Error out if device length isn't a power
>> of two
>>   docs/system/ppc/ppce500: Use qemu-system-ppc64 across the board(s)
>>   hw/ppc/e500: Implement pflash handling
>>   hw/ppc/e500: Add Freescale eSDHC to e500plat
>>
>> Philippe Mathieu-Daudé (2):
>>   hw/sd/sdhci: MMIO region is implemented in 32-bit accesses
>>   hw/sd/sdhci: Map host controller interface in host endianess

Hi Phil,

Is there a chance to get this in for 7.2?

Best regards,
Bernhard
>>
>>  docs/system/ppc/ppce500.rst |  38 +--
>>  hw/block/pflash_cfi01.c |   8 ++-
>>  hw/block/pflash_cfi02.c |   5 ++
>>  hw/ppc/Kconfig  |   3 +
>>  hw/ppc/e500.c   | 127 +++-
>>  hw/ppc/e500.h   |   1 +
>>  hw/ppc/e500plat.c   |   1 +
>>  hw/sd/sdhci.c   |   6 +-
>>  8 files changed, 180 insertions(+), 9 deletions(-)
>>
>> --
>> 2.37.3
>>
>>




[PATCH v8 05/17] e1000e: Omit errp for pci_add_capability

2022-11-01 Thread Akihiko Odaki
Omitting errp for pci_add_capability() causes it to abort if
capabilities overlap. This behavior is appropriate here because all of
the capabilities set in this device are defined in the program and
their overlap should not happen unless there is a programming error.

Signed-off-by: Akihiko Odaki 
---
 hw/net/e1000e.c | 18 +++---
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c
index ac96f7665a..e433b8f9a5 100644
--- a/hw/net/e1000e.c
+++ b/hw/net/e1000e.c
@@ -377,17 +377,10 @@ e1000e_gen_dsn(uint8_t *mac)
(uint64_t)(mac[0])  << 56;
 }
 
-static int
+static void
 e1000e_add_pm_capability(PCIDevice *pdev, uint8_t offset, uint16_t pmc)
 {
-Error *local_err = NULL;
-int ret = pci_add_capability(pdev, PCI_CAP_ID_PM, offset,
- PCI_PM_SIZEOF, &local_err);
-
-if (local_err) {
-error_report_err(local_err);
-return ret;
-}
+pci_add_capability(pdev, PCI_CAP_ID_PM, offset, PCI_PM_SIZEOF);
 
 pci_set_word(pdev->config + offset + PCI_PM_PMC,
  PCI_PM_CAP_VER_1_1 |
@@ -400,8 +393,6 @@ e1000e_add_pm_capability(PCIDevice *pdev, uint8_t offset, 
uint16_t pmc)
 
 pci_set_word(pdev->w1cmask + offset + PCI_PM_CTRL,
  PCI_PM_CTRL_PME_STATUS);
-
-return ret;
 }
 
 static void e1000e_write_config(PCIDevice *pci_dev, uint32_t address,
@@ -480,10 +471,7 @@ static void e1000e_pci_realize(PCIDevice *pci_dev, Error 
**errp)
 trace_e1000e_msi_init_fail(ret);
 }
 
-if (e1000e_add_pm_capability(pci_dev, e1000e_pmrb_offset,
-  PCI_PM_CAP_DSI) < 0) {
-hw_error("Failed to initialize PM capability");
-}
+e1000e_add_pm_capability(pci_dev, e1000e_pmrb_offset, PCI_PM_CAP_DSI);
 
 if (pcie_aer_init(pci_dev, PCI_ERR_VER, e1000e_aer_offset,
   PCI_ERR_SIZEOF, NULL) < 0) {
-- 
2.38.1




Re: [PATCH v8 17/17] pci: Remove legacy errp from pci_add_capability

2022-11-01 Thread Philippe Mathieu-Daudé

On 1/11/22 14:57, Akihiko Odaki wrote:

Signed-off-by: Akihiko Odaki 
---
  hw/pci/pci.c | 20 +---
  include/hw/pci/pci.h | 12 ++--
  2 files changed, 7 insertions(+), 25 deletions(-)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index cce57f572c..41de7643af 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2532,14 +2532,11 @@ bool pci_check_capability_overlap(PCIDevice *pdev, 
uint8_t cap_id,
  }
  
  /*

- * On success, pci_add_capability_legacy() returns a positive value
- * that the offset of the pci capability.
- * On failure, it sets an error and returns a negative error
- * code.
+ * pci_add_capability() returns a positive value that the offset of the pci
+ * capability.


Simpler:

"Return: offset of the PCI capability."


   */
-int pci_add_capability_legacy(PCIDevice *pdev, uint8_t cap_id,
-  uint8_t offset, uint8_t size,
-  Error **errp)
+uint8_t pci_add_capability(PCIDevice *pdev, uint8_t cap_id,
+   uint8_t offset, uint8_t size)
  {
  uint8_t *config;
  





[PATCH v9 04/17] ahci: Omit errp for pci_add_capability

2022-11-01 Thread Akihiko Odaki
Omitting errp for pci_add_capability() causes it to abort if
capabilities overlap. This behavior is appropriate here because all of
the capabilities set in this device are defined in the program and
their overlap should not happen unless there is a programming error.

Signed-off-by: Akihiko Odaki 
---
 hw/ide/ich.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/hw/ide/ich.c b/hw/ide/ich.c
index 1007a51fcb..3b478b01f8 100644
--- a/hw/ide/ich.c
+++ b/hw/ide/ich.c
@@ -106,7 +106,7 @@ static void pci_ich9_ahci_init(Object *obj)
 static void pci_ich9_ahci_realize(PCIDevice *dev, Error **errp)
 {
 struct AHCIPCIState *d;
-int sata_cap_offset;
+uint8_t sata_cap_offset;
 uint8_t *sata_cap;
 d = ICH9_AHCI(dev);
 int ret;
@@ -130,11 +130,7 @@ static void pci_ich9_ahci_realize(PCIDevice *dev, Error 
**errp)
  &d->ahci.mem);
 
 sata_cap_offset = pci_add_capability(dev, PCI_CAP_ID_SATA,
-  ICH9_SATA_CAP_OFFSET, SATA_CAP_SIZE,
-  errp);
-if (sata_cap_offset < 0) {
-return;
-}
+  ICH9_SATA_CAP_OFFSET, SATA_CAP_SIZE);
 
 sata_cap = dev->config + sata_cap_offset;
 pci_set_word(sata_cap + SATA_CAP_REV, 0x10);
-- 
2.38.1




Re: [PATCH 0/4] hw: make TCO watchdog actually work by default for Q35

2022-11-01 Thread Igor Mammedov
On Mon, 31 Oct 2022 11:48:58 -0400
"Michael S. Tsirkin"  wrote:

> On Mon, Oct 31, 2022 at 01:50:24PM +, Daniel P. Berrangé wrote:
> > On Mon, Oct 31, 2022 at 01:19:30PM +, Daniel P. Berrangé wrote:  
> > > The TCO watchdog is unconditionally integrated into the Q35 machine
> > > type by default, but at the same time is unconditionally disabled
> > > from firing by a host config option that overrides guest OS attempts
> > > to enable it. People have to know to set a magic -global to make
> > > it non-broken  
> > 
> > Incidentally I found that originally the TCO watchdog was not
> > unconditionally enabled. Its exposure to the guest could be
> > turned on/off using
> > 
> >   -global ICH9-LPC.enable_tco=bool
> > 
> > This was implemented for machine type compat, but it also gave
> > apps a way to disable the watchdog functionality. Unfortunately
> > that ability was discarded in this series:
> > 
> >   
> > https://lore.kernel.org/all/1453564933-29638-1-git-send-email-ehabk...@redhat.com/
> > 
> > but the 'enable_tco' property still exists in QOM, but silently
> > ignored.
> > 
> > Seems we should either fix the impl of 'enable_tco', or remove the
> > QOM property entirely, so we don't pretend it can be toggled anymore.
> > 
> > With regards,
> > Daniel  
> 
> i am inclined to say you are right and the fix is to fix the impl.

Is there need for users to disable whatchdog at all?
It was always present since then and no one complained, 
so perhaps we should ditch property instead fixing it
to keep it simple.

> 
> > -- 
> > |: https://berrange.com  -o-https://www.flickr.com/photos/dberrange 
> > :|
> > |: https://libvirt.org -o-https://fstop138.berrange.com 
> > :|
> > |: https://entangle-photo.org-o-https://www.instagram.com/dberrange 
> > :|  
> 




Re: [PATCH v4 1/3] util/main-loop: Fix maximum number of wait objects for win32

2022-11-01 Thread Bin Meng
On Tue, Nov 1, 2022 at 8:03 PM Daniel P. Berrangé  wrote:
>
> On Tue, Nov 01, 2022 at 09:14:55AM +0800, Bin Meng wrote:
> > Hi Daniel,
> >
> > On Wed, Oct 26, 2022 at 12:41 AM Bin Meng  wrote:
> > >
> > > On Wed, Oct 19, 2022 at 6:20 PM Bin Meng  wrote:
> > > >
> > > > From: Bin Meng 
> > > >
> > > > The maximum number of wait objects for win32 should be
> > > > MAXIMUM_WAIT_OBJECTS, not MAXIMUM_WAIT_OBJECTS + 1.
> > > >
> > > > Signed-off-by: Bin Meng 
> > > > ---
> > > >
> > > > Changes in v4:
> > > > - make the out of bounds access protection explicit
> > > >
> > > > Changes in v3:
> > > > - move the check of adding the same HANDLE twice to a separete patch
> > > >
> > > > Changes in v2:
> > > > - fix the logic in qemu_add_wait_object() to avoid adding
> > > >   the same HANDLE twice
> > > >
> > > >  util/main-loop.c | 10 +-
> > > >  1 file changed, 5 insertions(+), 5 deletions(-)
> > > >
> > >
> > > Ping?
> >
> > Would you queue this series? Thanks!
>
> The main loop is not my area as maintainer - it would normally be
> Paolo IIRC.
>

Thanks, but Paolo has been silent since day 1 ...

Regards,
Bin



[PATCH v9 05/17] e1000e: Omit errp for pci_add_capability

2022-11-01 Thread Akihiko Odaki
Omitting errp for pci_add_capability() causes it to abort if
capabilities overlap. This behavior is appropriate here because all of
the capabilities set in this device are defined in the program and
their overlap should not happen unless there is a programming error.

Signed-off-by: Akihiko Odaki 
---
 hw/net/e1000e.c | 18 +++---
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c
index ac96f7665a..e433b8f9a5 100644
--- a/hw/net/e1000e.c
+++ b/hw/net/e1000e.c
@@ -377,17 +377,10 @@ e1000e_gen_dsn(uint8_t *mac)
(uint64_t)(mac[0])  << 56;
 }
 
-static int
+static void
 e1000e_add_pm_capability(PCIDevice *pdev, uint8_t offset, uint16_t pmc)
 {
-Error *local_err = NULL;
-int ret = pci_add_capability(pdev, PCI_CAP_ID_PM, offset,
- PCI_PM_SIZEOF, &local_err);
-
-if (local_err) {
-error_report_err(local_err);
-return ret;
-}
+pci_add_capability(pdev, PCI_CAP_ID_PM, offset, PCI_PM_SIZEOF);
 
 pci_set_word(pdev->config + offset + PCI_PM_PMC,
  PCI_PM_CAP_VER_1_1 |
@@ -400,8 +393,6 @@ e1000e_add_pm_capability(PCIDevice *pdev, uint8_t offset, 
uint16_t pmc)
 
 pci_set_word(pdev->w1cmask + offset + PCI_PM_CTRL,
  PCI_PM_CTRL_PME_STATUS);
-
-return ret;
 }
 
 static void e1000e_write_config(PCIDevice *pci_dev, uint32_t address,
@@ -480,10 +471,7 @@ static void e1000e_pci_realize(PCIDevice *pci_dev, Error 
**errp)
 trace_e1000e_msi_init_fail(ret);
 }
 
-if (e1000e_add_pm_capability(pci_dev, e1000e_pmrb_offset,
-  PCI_PM_CAP_DSI) < 0) {
-hw_error("Failed to initialize PM capability");
-}
+e1000e_add_pm_capability(pci_dev, e1000e_pmrb_offset, PCI_PM_CAP_DSI);
 
 if (pcie_aer_init(pci_dev, PCI_ERR_VER, e1000e_aer_offset,
   PCI_ERR_SIZEOF, NULL) < 0) {
-- 
2.38.1




Re: [PATCH v5 0/6] ppc/e500: Add support for two types of flash, cleanup

2022-11-01 Thread Philippe Mathieu-Daudé

On 1/11/22 17:01, Bernhard Beschow wrote:

Am 1. November 2022 10:41:51 UTC schrieb Bernhard Beschow :

On Mon, Oct 31, 2022 at 12:54 PM Philippe Mathieu-Daudé 
wrote:


This is a respin of Bernhard's v4 with Freescale eSDHC implemented
as an 'UNIMP' region. See v4 cover here:

https://lore.kernel.org/qemu-devel/20221018210146.193159-1-shen...@gmail.com/

Only tested with the ppce500 machine (no further regression testing).

Since v4:
- Do not rename ESDHC_* definitions to USDHC_*
- Do not modify SDHCIState structure



Works beautifully, both for the buildroot load and for my proprietary load.
So:
Tested-by: Bernhard Beschow



Bernhard Beschow (4):
   hw/block/pflash_cfi0{1, 2}: Error out if device length isn't a power
 of two
   docs/system/ppc/ppce500: Use qemu-system-ppc64 across the board(s)
   hw/ppc/e500: Implement pflash handling
   hw/ppc/e500: Add Freescale eSDHC to e500plat

Philippe Mathieu-Daudé (2):
   hw/sd/sdhci: MMIO region is implemented in 32-bit accesses
   hw/sd/sdhci: Map host controller interface in host endianess


Hi Phil,

Is there a chance to get this in for 7.2?


Well 1/ can you review patch #1 and 2/ we need to figure out what to do 
with patch #2 :) Can you point me to the CCSR datasheet?




[PATCH v9 03/17] hw/i386/amd_iommu: Omit errp for pci_add_capability

2022-11-01 Thread Akihiko Odaki
Omitting errp for pci_add_capability() causes it to abort if
capabilities overlap. This behavior is appropriate here because all of
the capabilities set in this device are defined in the program and
their overlap should not happen unless there is a programming error.

Signed-off-by: Akihiko Odaki 
---
 hw/i386/amd_iommu.c | 21 -
 1 file changed, 4 insertions(+), 17 deletions(-)

diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 725f69095b..8a88cbea0a 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -1539,7 +1539,6 @@ static void amdvi_sysbus_reset(DeviceState *dev)
 
 static void amdvi_sysbus_realize(DeviceState *dev, Error **errp)
 {
-int ret = 0;
 AMDVIState *s = AMD_IOMMU_DEVICE(dev);
 MachineState *ms = MACHINE(qdev_get_machine());
 PCMachineState *pcms = PC_MACHINE(ms);
@@ -1553,23 +1552,11 @@ static void amdvi_sysbus_realize(DeviceState *dev, 
Error **errp)
 if (!qdev_realize(DEVICE(&s->pci), &bus->qbus, errp)) {
 return;
 }
-ret = pci_add_capability(&s->pci.dev, AMDVI_CAPAB_ID_SEC, 0,
- AMDVI_CAPAB_SIZE, errp);
-if (ret < 0) {
-return;
-}
-s->capab_offset = ret;
+s->capab_offset = pci_add_capability(&s->pci.dev, AMDVI_CAPAB_ID_SEC, 0,
+ AMDVI_CAPAB_SIZE);
 
-ret = pci_add_capability(&s->pci.dev, PCI_CAP_ID_MSI, 0,
- AMDVI_CAPAB_REG_SIZE, errp);
-if (ret < 0) {
-return;
-}
-ret = pci_add_capability(&s->pci.dev, PCI_CAP_ID_HT, 0,
- AMDVI_CAPAB_REG_SIZE, errp);
-if (ret < 0) {
-return;
-}
+pci_add_capability(&s->pci.dev, PCI_CAP_ID_MSI, 0, AMDVI_CAPAB_REG_SIZE);
+pci_add_capability(&s->pci.dev, PCI_CAP_ID_HT, 0, AMDVI_CAPAB_REG_SIZE);
 
 /* Pseudo address space under root PCI bus. */
 x86ms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_IOAPIC_SB_DEVID);
-- 
2.38.1




Re: [PATCH 09/16] hw/9pfs: Disable unsupported flags and features for Windows

2022-11-01 Thread Christian Schoenebeck
On Monday, October 24, 2022 6:57:52 AM CET Bin Meng wrote:
> From: Guohuai Shi 
> 
> Some flags and features are not supported on Windows, like mknod,
> readlink, file mode, etc. Update the codes for Windows.
> 
> Signed-off-by: Guohuai Shi 
> Signed-off-by: Bin Meng 
> ---
> 
>  hw/9pfs/9p-util.h |  6 +++-
>  hw/9pfs/9p.c  | 90 ++-
>  2 files changed, 86 insertions(+), 10 deletions(-)
> 
> diff --git a/hw/9pfs/9p-util.h b/hw/9pfs/9p-util.h
> index 82b2d0c3e4..3d154e9103 100644
> --- a/hw/9pfs/9p-util.h
> +++ b/hw/9pfs/9p-util.h
> @@ -53,8 +53,10 @@ static inline uint64_t makedev_dotl(uint32_t dev_major, 
> uint32_t dev_minor)
>   */
>  static inline uint64_t host_dev_to_dotl_dev(dev_t dev)
>  {
> -#ifdef CONFIG_LINUX
> +#if defined(CONFIG_LINUX)
>  return dev;
> +#elif defined(CONFIG_WIN32)
> +return 0;

Really?

>  #else
>  return makedev_dotl(major(dev), minor(dev));
>  #endif
> @@ -260,7 +262,9 @@ static inline struct dirent *qemu_dirent_dup(struct 
> dirent *dent)
>  #if defined CONFIG_DARWIN && defined CONFIG_PTHREAD_FCHDIR_NP
>  int pthread_fchdir_np(int fd) __attribute__((weak_import));
>  #endif
> +#ifndef CONFIG_WIN32
>  int qemu_mknodat(P9_FILE_ID dirfd, const char *filename, mode_t mode,
>   dev_t dev);
> +#endif
>  
>  #endif
> diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
> index 6c4af86240..771aab34ac 100644
> --- a/hw/9pfs/9p.c
> +++ b/hw/9pfs/9p.c
> @@ -39,6 +39,11 @@
>  #include "qemu/xxhash.h"
>  #include 
>  
> +#ifdef CONFIG_WIN32
> +#define UTIME_NOW   ((1l << 30) - 1l)
> +#define UTIME_OMIT  ((1l << 30) - 2l)
> +#endif
> +
>  int open_fd_hw;
>  int total_open_fd;
>  static int open_fd_rc;
> @@ -132,13 +137,17 @@ static int dotl_to_open_flags(int flags)
>  DotlOpenflagMap dotl_oflag_map[] = {
>  { P9_DOTL_CREATE, O_CREAT },
>  { P9_DOTL_EXCL, O_EXCL },
> +#ifndef CONFIG_WIN32
>  { P9_DOTL_NOCTTY , O_NOCTTY },
> +#endif
>  { P9_DOTL_TRUNC, O_TRUNC },
>  { P9_DOTL_APPEND, O_APPEND },
> +#ifndef CONFIG_WIN32
>  { P9_DOTL_NONBLOCK, O_NONBLOCK } ,
>  { P9_DOTL_DSYNC, O_DSYNC },
>  { P9_DOTL_FASYNC, FASYNC },
> -#ifndef CONFIG_DARWIN
> +#endif
> +#ifdef CONFIG_LINUX

Better

   #if !defined(CONFIG_DARWIN) && !defined(CONFIG_WIN32)

Otherwise it might automatically opt-out other future platforms
unintentionally.

>  { P9_DOTL_NOATIME, O_NOATIME },
>  /*
>   *  On Darwin, we could map to F_NOCACHE, which is
> @@ -151,8 +160,10 @@ static int dotl_to_open_flags(int flags)
>  #endif
>  { P9_DOTL_LARGEFILE, O_LARGEFILE },
>  { P9_DOTL_DIRECTORY, O_DIRECTORY },
> +#ifndef CONFIG_WIN32
>  { P9_DOTL_NOFOLLOW, O_NOFOLLOW },
>  { P9_DOTL_SYNC, O_SYNC },
> +#endif
>  };
>  
>  for (i = 0; i < ARRAY_SIZE(dotl_oflag_map); i++) {
> @@ -179,8 +190,11 @@ static int get_dotl_openflags(V9fsState *s, int oflags)
>   * Filter the client open flags
>   */
>  flags = dotl_to_open_flags(oflags);
> -flags &= ~(O_NOCTTY | O_ASYNC | O_CREAT);
> -#ifndef CONFIG_DARWIN
> +flags &= ~(O_CREAT);
> +#ifndef CONFIG_WIN32
> +flags &= ~(O_NOCTTY | O_ASYNC);
> +#endif
> +#ifdef CONFIG_LINUX

Same as above: better explicitly opt-out than the other way around.

>  /*
>   * Ignore direct disk access hint until the server supports it.
>   */
> @@ -986,9 +1000,11 @@ static int stat_to_qid(V9fsPDU *pdu, const struct stat 
> *stbuf, V9fsQID *qidp)
>  if (S_ISDIR(stbuf->st_mode)) {
>  qidp->type |= P9_QID_TYPE_DIR;
>  }
> +#ifndef CONFIG_WIN32
>  if (S_ISLNK(stbuf->st_mode)) {
>  qidp->type |= P9_QID_TYPE_SYMLINK;
>  }
> +#endif
>  
>  return 0;
>  }
> @@ -1097,6 +1113,7 @@ static mode_t v9mode_to_mode(uint32_t mode, V9fsString 
> *extension)
>  ret |= S_IFDIR;
>  }
>  
> +#ifndef CONFIG_WIN32
>  if (mode & P9_STAT_MODE_SYMLINK) {
>  ret |= S_IFLNK;
>  }
> @@ -1106,6 +1123,7 @@ static mode_t v9mode_to_mode(uint32_t mode, V9fsString 
> *extension)
>  if (mode & P9_STAT_MODE_NAMED_PIPE) {
>  ret |= S_IFIFO;
>  }
> +#endif
>  if (mode & P9_STAT_MODE_DEVICE) {
>  if (extension->size && extension->data[0] == 'c') {
>  ret |= S_IFCHR;
> @@ -1118,6 +1136,7 @@ static mode_t v9mode_to_mode(uint32_t mode, V9fsString 
> *extension)
>  ret |= S_IFREG;
>  }
>  
> +#ifndef CONFIG_WIN32
>  if (mode & P9_STAT_MODE_SETUID) {
>  ret |= S_ISUID;
>  }
> @@ -1127,6 +1146,7 @@ static mode_t v9mode_to_mode(uint32_t mode, V9fsString 
> *extension)
>  if (mode & P9_STAT_MODE_SETVTX) {
>  ret |= S_ISVTX;
>  }
> +#endif
>  
>  return ret;
>  }
> @@ -1182,6 +1202,7 @@ static uint32_t stat_to_v9mode(const struct stat *stbuf)
>  mode |= P9_STAT_MODE_DIR;
>  }
>  
> +#ifndef CONFIG_WIN32
>  if (S_ISLNK(stbuf->st_mode)) {
>  mode |= P9_STAT_MODE_SYMLINK;
>   

[PATCH v9 01/17] hw/vfio/pci: Ensure MSI and MSI-X do not overlap

2022-11-01 Thread Akihiko Odaki
pci_add_capability() checks whether capabilities overlap, and notifies
its caller so that it can properly handle the case. However, in the
most cases, the capabilities actually never overlap, and the interface
incurred extra error handling code, which is often incorrect or
suboptimal. For such cases, pci_add_capability() can simply abort the
execution if the capabilities actually overlap since it should be a
programming error.

This change handles the other cases: hw/vfio/pci depends on the check to
decide MSI and MSI-X capabilities overlap with another. As they are
quite an exceptional and hw/vfio/pci knows much about PCI capabilities,
adding code specific to the cases to hw/vfio/pci still results in less
code than having error handling code everywhere in total.

Signed-off-by: Akihiko Odaki 
---
 include/hw/pci/pci.h |  7 +++
 hw/pci/pci.c | 33 +
 hw/vfio/pci.c| 15 ++-
 3 files changed, 42 insertions(+), 13 deletions(-)

diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index b54b6ef88f..f4e6612440 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -390,6 +390,13 @@ void pci_register_vga(PCIDevice *pci_dev, MemoryRegion 
*mem,
 void pci_unregister_vga(PCIDevice *pci_dev);
 pcibus_t pci_get_bar_addr(PCIDevice *pci_dev, int region_num);
 
+/*
+ * If there is no overlap, pci_check_capability_overlap() returns true.
+ * Otherise, it sets an error and returns false.
+ */
+bool pci_check_capability_overlap(PCIDevice *pdev, uint8_t cap_id,
+  uint8_t offset, uint8_t size, Error **errp);
+
 int pci_add_capability(PCIDevice *pdev, uint8_t cap_id,
uint8_t offset, uint8_t size,
Error **errp);
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 2f450f6a72..5531e30385 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2512,6 +2512,25 @@ static void pci_del_option_rom(PCIDevice *pdev)
 pdev->has_rom = false;
 }
 
+bool pci_check_capability_overlap(PCIDevice *pdev, uint8_t cap_id,
+  uint8_t offset, uint8_t size, Error **errp)
+{
+int i;
+
+for (i = offset; i < offset + size; i++) {
+if (pdev->used[i]) {
+error_setg(errp,
+   "%s:%02x:%02x.%x PCI capability %x at offset %x 
overlaps existing capability %x at offset %x",
+   pci_root_bus_path(pdev), pci_dev_bus_num(pdev),
+   PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
+   cap_id, offset, pci_find_capability_at_offset(pdev, i), 
i);
+return false;
+}
+}
+
+return true;
+}
+
 /*
  * On success, pci_add_capability() returns a positive value
  * that the offset of the pci capability.
@@ -2523,7 +2542,6 @@ int pci_add_capability(PCIDevice *pdev, uint8_t cap_id,
Error **errp)
 {
 uint8_t *config;
-int i, overlapping_cap;
 
 if (!offset) {
 offset = pci_find_space(pdev, size);
@@ -2534,17 +2552,8 @@ int pci_add_capability(PCIDevice *pdev, uint8_t cap_id,
  * depends on this check to verify that the device is not broken.
  * Should never trigger for emulated devices, but it's helpful
  * for debugging these. */
-for (i = offset; i < offset + size; i++) {
-overlapping_cap = pci_find_capability_at_offset(pdev, i);
-if (overlapping_cap) {
-error_setg(errp, "%s:%02x:%02x.%x "
-   "Attempt to add PCI capability %x at offset "
-   "%x overlaps existing capability %x at offset %x",
-   pci_root_bus_path(pdev), pci_dev_bus_num(pdev),
-   PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
-   cap_id, offset, overlapping_cap, i);
-return -EINVAL;
-}
+if (!pci_check_capability_overlap(pdev, cap_id, offset, size, errp)) {
+return -EINVAL;
 }
 }
 
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 939dcc3d4a..0ca6b5ff4b 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1298,6 +1298,14 @@ static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, 
Error **errp)
 
 trace_vfio_msi_setup(vdev->vbasedev.name, pos);
 
+vdev->msi_cap_size = 0xa + (msi_maskbit ? 0xa : 0) + (msi_64bit ? 0x4 : 0);
+
+ret = pci_check_capability_overlap(&vdev->pdev, PCI_CAP_ID_MSI,
+   pos, vdev->msi_cap_size, errp);
+if (!ret) {
+return -EINVAL;
+}
+
 ret = msi_init(&vdev->pdev, pos, entries, msi_64bit, msi_maskbit, &err);
 if (ret < 0) {
 if (ret == -ENOTSUP) {
@@ -1306,7 +1314,6 @@ static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, 
Error **errp)
 error_propagate_prepend(errp, err, "msi_init failed: ");
 return ret;
 }
-vdev->msi_cap_size = 0xa + (msi_maskbit ? 0xa : 0) + (msi_64bit ? 0x4 : 0);
 
 

[PATCH v8 03/17] hw/i386/amd_iommu: Omit errp for pci_add_capability

2022-11-01 Thread Akihiko Odaki
Omitting errp for pci_add_capability() causes it to abort if
capabilities overlap. This behavior is appropriate here because all of
the capabilities set in this device are defined in the program and
their overlap should not happen unless there is a programming error.

Signed-off-by: Akihiko Odaki 
---
 hw/i386/amd_iommu.c | 21 -
 1 file changed, 4 insertions(+), 17 deletions(-)

diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 725f69095b..8a88cbea0a 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -1539,7 +1539,6 @@ static void amdvi_sysbus_reset(DeviceState *dev)
 
 static void amdvi_sysbus_realize(DeviceState *dev, Error **errp)
 {
-int ret = 0;
 AMDVIState *s = AMD_IOMMU_DEVICE(dev);
 MachineState *ms = MACHINE(qdev_get_machine());
 PCMachineState *pcms = PC_MACHINE(ms);
@@ -1553,23 +1552,11 @@ static void amdvi_sysbus_realize(DeviceState *dev, 
Error **errp)
 if (!qdev_realize(DEVICE(&s->pci), &bus->qbus, errp)) {
 return;
 }
-ret = pci_add_capability(&s->pci.dev, AMDVI_CAPAB_ID_SEC, 0,
- AMDVI_CAPAB_SIZE, errp);
-if (ret < 0) {
-return;
-}
-s->capab_offset = ret;
+s->capab_offset = pci_add_capability(&s->pci.dev, AMDVI_CAPAB_ID_SEC, 0,
+ AMDVI_CAPAB_SIZE);
 
-ret = pci_add_capability(&s->pci.dev, PCI_CAP_ID_MSI, 0,
- AMDVI_CAPAB_REG_SIZE, errp);
-if (ret < 0) {
-return;
-}
-ret = pci_add_capability(&s->pci.dev, PCI_CAP_ID_HT, 0,
- AMDVI_CAPAB_REG_SIZE, errp);
-if (ret < 0) {
-return;
-}
+pci_add_capability(&s->pci.dev, PCI_CAP_ID_MSI, 0, AMDVI_CAPAB_REG_SIZE);
+pci_add_capability(&s->pci.dev, PCI_CAP_ID_HT, 0, AMDVI_CAPAB_REG_SIZE);
 
 /* Pseudo address space under root PCI bus. */
 x86ms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_IOAPIC_SB_DEVID);
-- 
2.38.1




[PATCH v9 14/17] hw/pci-bridge/pcie_pci_bridge: Omit errp for pci_add_capability

2022-11-01 Thread Akihiko Odaki
Omitting errp for pci_add_capability() causes it to abort if
capabilities overlap. This behavior is appropriate heare because all of
the capabilities set in this device are defined in the program and
their overlap should not happen unless there is a programming error.

Signed-off-by: Akihiko Odaki 
---
 hw/pci-bridge/pcie_pci_bridge.c | 11 +++
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/hw/pci-bridge/pcie_pci_bridge.c b/hw/pci-bridge/pcie_pci_bridge.c
index 99778e3e24..1b839465e7 100644
--- a/hw/pci-bridge/pcie_pci_bridge.c
+++ b/hw/pci-bridge/pcie_pci_bridge.c
@@ -35,7 +35,7 @@ static void pcie_pci_bridge_realize(PCIDevice *d, Error 
**errp)
 {
 PCIBridge *br = PCI_BRIDGE(d);
 PCIEPCIBridge *pcie_br = PCIE_PCI_BRIDGE_DEV(d);
-int rc, pos;
+int rc;
 
 pci_bridge_initfn(d, TYPE_PCI_BUS);
 
@@ -49,12 +49,8 @@ static void pcie_pci_bridge_realize(PCIDevice *d, Error 
**errp)
 
 pcie_cap_init(d, 0, PCI_EXP_TYPE_PCI_BRIDGE, 0);
 
-pos = pci_add_capability(d, PCI_CAP_ID_PM, 0, PCI_PM_SIZEOF, errp);
-if (pos < 0) {
-goto pm_error;
-}
-d->exp.pm_cap = pos;
-pci_set_word(d->config + pos + PCI_PM_PMC, 0x3);
+d->exp.pm_cap = pci_add_capability(d, PCI_CAP_ID_PM, 0, PCI_PM_SIZEOF);
+pci_set_word(d->config + d->exp.pm_cap + PCI_PM_PMC, 0x3);
 
 pcie_cap_arifwd_init(d);
 pcie_cap_deverr_init(d);
@@ -85,7 +81,6 @@ static void pcie_pci_bridge_realize(PCIDevice *d, Error 
**errp)
 msi_error:
 pcie_aer_exit(d);
 aer_error:
-pm_error:
 pcie_cap_exit(d);
 shpc_cleanup(d, &pcie_br->shpc_bar);
 error:
-- 
2.38.1




Re: [PATCH 07/16] hw/9pfs: Implement Windows specific utilities functions for 9pfs

2022-11-01 Thread Christian Schoenebeck
On Monday, October 24, 2022 6:57:50 AM CET Bin Meng wrote:
> From: Guohuai Shi 
> 
> Windows POSIX API and MinGW library do not provide the NO_FOLLOW
> flag, and do not allow opening a directory by POSIX open(). This
> causes all xxx_at() functions cannot work directly. However, we
> can provide Windows handle based functions to emulate xxx_at()
> functions (e.g.: openat_win32, utimensat_win32, etc.).
> 
> Windows does not support extended attributes. 9pfs for Windows uses
> NTFS ADS (Alternate Data Streams) to emulate extended attributes.
> 
> Windows does not provide POSIX compatible readlink(), and symbolic
> link feature in 9pfs will be disabled on Windows.

Wouldn't it be more user friendly if the relevant error locations would use
something like error_report_once() and suggesting to enable mapped(-xattr) to
make 9p symlinks on guest working if desired by the user?

Probably this error case would need to wrapped into a dedicated function,
otherwise I guess error_report_once() would fire several times by different
callers.

> Signed-off-by: Guohuai Shi 
> Signed-off-by: Bin Meng 
> ---
> 
>  hw/9pfs/9p-local.h  |   7 +
>  hw/9pfs/9p-util.h   |  40 +-
>  hw/9pfs/9p-local.c  |   4 -
>  hw/9pfs/9p-util-win32.c | 885 
>  4 files changed, 931 insertions(+), 5 deletions(-)
>  create mode 100644 hw/9pfs/9p-util-win32.c
> 
> diff --git a/hw/9pfs/9p-local.h b/hw/9pfs/9p-local.h
> index c8404063e5..02fd894ba3 100644
> --- a/hw/9pfs/9p-local.h
> +++ b/hw/9pfs/9p-local.h
> @@ -15,6 +15,13 @@
>  
>  #include "9p-file-id.h"
>  
> +typedef struct {
> +P9_FILE_ID mountfd;
> +#ifdef CONFIG_WIN32
> +char *root_path;
> +#endif
> +} LocalData;
> +
>  P9_FILE_ID local_open_nofollow(FsContext *fs_ctx, const char *path, int 
> flags,
> mode_t mode);
>  P9_FILE_ID local_opendir_nofollow(FsContext *fs_ctx, const char *path);
> diff --git a/hw/9pfs/9p-util.h b/hw/9pfs/9p-util.h
> index 1e7dc76345..82b2d0c3e4 100644
> --- a/hw/9pfs/9p-util.h
> +++ b/hw/9pfs/9p-util.h
> @@ -90,26 +90,61 @@ static inline int errno_to_dotl(int err) {
>  return err;
>  }
>  
> -#ifdef CONFIG_DARWIN
> +#if defined(CONFIG_DARWIN)
>  #define qemu_fgetxattr(...) fgetxattr(__VA_ARGS__, 0, 0)
> +#elif defined(CONFIG_WIN32)
> +#define qemu_fgetxattr fgetxattr_win32
>  #else
>  #define qemu_fgetxattr fgetxattr
>  #endif
>  
> +#ifdef CONFIG_WIN32
> +#define qemu_openat openat_win32
> +#define qemu_fstatatfstatat_win32
> +#define qemu_mkdiratmkdirat_win32
> +#define qemu_renameat   renameat_win32
> +#define qemu_utimensat  utimensat_win32
> +#define qemu_unlinkat   unlinkat_win32
> +#else
>  #define qemu_openat openat
>  #define qemu_fstatatfstatat
>  #define qemu_mkdiratmkdirat
>  #define qemu_renameat   renameat
>  #define qemu_utimensat  utimensat
>  #define qemu_unlinkat   unlinkat
> +#endif
> +
> +#ifdef CONFIG_WIN32
> +char *get_full_path_win32(P9_FILE_ID fd, const char *name);
> +ssize_t fgetxattr_win32(int fd, const char *name, void *value, size_t size);
> +P9_FILE_ID openat_win32(P9_FILE_ID dirfd, const char *pathname, int flags,
> +mode_t mode);
> +int fstatat_win32(P9_FILE_ID dirfd, const char *pathname,
> +  struct stat *statbuf, int flags);
> +int mkdirat_win32(P9_FILE_ID dirfd, const char *pathname, mode_t mode);
> +int renameat_win32(P9_FILE_ID olddirfd, const char *oldpath,
> +   P9_FILE_ID newdirfd, const char *newpath);
> +int utimensat_win32(P9_FILE_ID dirfd, const char *pathname,
> +const struct timespec times[2], int flags);
> +int unlinkat_win32(P9_FILE_ID dirfd, const char *pathname, int flags);
> +int statfs_win32(const char *root_path, struct statfs *stbuf);
> +P9_FILE_ID openat_dir(P9_FILE_ID dirfd, const char *name);
> +P9_FILE_ID openat_file(P9_FILE_ID dirfd, const char *name, int flags,
> +   mode_t mode);
> +#endif
>  
>  static inline void close_preserve_errno(P9_FILE_ID fd)
>  {
>  int serrno = errno;
> +#ifndef CONFIG_WIN32
>  close(fd);
> +#else
> +CloseHandle(fd);
> +#endif
>  errno = serrno;
>  }
>  
> +#ifndef CONFIG_WIN32
>  static inline P9_FILE_ID openat_dir(P9_FILE_ID dirfd, const char *name)
>  {
>  return qemu_openat(dirfd, name,
> @@ -157,6 +192,7 @@ again:
>  errno = serrno;
>  return fd;
>  }
> +#endif
>  
>  ssize_t fgetxattrat_nofollow(P9_FILE_ID dirfd, const char *path,
>   const char *name, void *value, size_t size);
> @@ -167,6 +203,7 @@ ssize_t flistxattrat_nofollow(P9_FILE_ID dirfd, const 
> char *filename,
>  ssize_t fremovexattrat_nofollow(P9_FILE_ID dirfd, const char *filename,
>  const char *name);
>  
> +#ifndef CONFIG_WIN32
>  /*
>   * Darwin has d_seekoff, which appears to function similarly to d_off.
>   * However, it does not appear to be supported on all file systems,
> @@ -181,6 +218,7 @@ static i

Re: [PULL 08/30] target/arm: Add ptw_idx to S1Translate

2022-11-01 Thread Philippe Mathieu-Daudé

On 1/11/22 11:10, Philippe Mathieu-Daudé wrote:

On 1/11/22 00:14, Philippe Mathieu-Daudé wrote:

On 25/10/22 18:39, Peter Maydell wrote:

From: Richard Henderson 

Hoist the computation of the mmu_idx for the ptw up to
get_phys_addr_with_struct and get_phys_addr_twostage.
This removes the duplicate check for stage2 disabled
from the middle of the walk, performing it only once.

Signed-off-by: Richard Henderson 
Reviewed-by: Alex Bennée 
Tested-by: Alex Bennée 
Message-id: 20221024051851.3074715-3-richard.hender...@linaro.org
Signed-off-by: Peter Maydell 
---
  target/arm/ptw.c | 71 
  1 file changed, 54 insertions(+), 17 deletions(-)


Since this commit I can not boot Trusted Firmware on the SBSA-ref 
machine.


Do we need to set in_ptw_idx in get_phys_addr_with_secure()?


I opened https://gitlab.com/qemu-project/qemu/-/issues/1293 to track.



[PATCH v8 08/17] msi: Omit errp for pci_add_capability

2022-11-01 Thread Akihiko Odaki
Omitting errp for pci_add_capability() causes it to abort if
capabilities overlap. A caller of msi_init(), which calls
pci_add_capability() in turn, is expected to ensure that will not
happen.

Signed-off-by: Akihiko Odaki 
---
 hw/pci/msi.c | 9 +
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/hw/pci/msi.c b/hw/pci/msi.c
index 058d1d1ef1..5283a08b5a 100644
--- a/hw/pci/msi.c
+++ b/hw/pci/msi.c
@@ -194,7 +194,6 @@ int msi_init(struct PCIDevice *dev, uint8_t offset,
 unsigned int vectors_order;
 uint16_t flags;
 uint8_t cap_size;
-int config_offset;
 
 if (!msi_nonbroken) {
 error_setg(errp, "MSI is not supported by interrupt controller");
@@ -221,13 +220,7 @@ int msi_init(struct PCIDevice *dev, uint8_t offset,
 }
 
 cap_size = msi_cap_sizeof(flags);
-config_offset = pci_add_capability(dev, PCI_CAP_ID_MSI, offset,
-cap_size, errp);
-if (config_offset < 0) {
-return config_offset;
-}
-
-dev->msi_cap = config_offset;
+dev->msi_cap = pci_add_capability(dev, PCI_CAP_ID_MSI, offset, cap_size);
 dev->cap_present |= QEMU_PCI_CAP_MSI;
 
 pci_set_word(dev->config + msi_flags_off(dev), flags);
-- 
2.38.1




RE: [PATCH 07/16] hw/9pfs: Implement Windows specific utilities functions for 9pfs

2022-11-01 Thread Shi, Guohuai



> -Original Message-
> From: Shi, Guohuai
> Sent: Tuesday, November 1, 2022 23:13
> To: Christian Schoenebeck ; qemu-devel@nongnu.org
> Cc: Greg Kurz ; Meng, Bin 
> Subject: RE: [PATCH 07/16] hw/9pfs: Implement Windows specific utilities
> functions for 9pfs
> 
> 
> 
> > -Original Message-
> > From: Christian Schoenebeck 
> > Sent: Tuesday, November 1, 2022 22:28
> > To: qemu-devel@nongnu.org
> > Cc: Shi, Guohuai ; Greg Kurz
> > ; Meng, Bin 
> > Subject: Re: [PATCH 07/16] hw/9pfs: Implement Windows specific
> > utilities functions for 9pfs
> >
> > [Please note: This e-mail is from an EXTERNAL e-mail address]
> >
> > On Monday, October 24, 2022 6:57:50 AM CET Bin Meng wrote:
> > > From: Guohuai Shi 
> > >
> > > Windows POSIX API and MinGW library do not provide the NO_FOLLOW
> > > flag, and do not allow opening a directory by POSIX open(). This
> > > causes all
> > > xxx_at() functions cannot work directly. However, we can provide
> > > Windows handle based functions to emulate xxx_at() functions (e.g.:
> > > openat_win32, utimensat_win32, etc.).
> > >
> > > Windows does not support extended attributes. 9pfs for Windows uses
> > > NTFS ADS (Alternate Data Streams) to emulate extended attributes.
> > >
> > > Windows does not provide POSIX compatible readlink(), and symbolic
> > > link feature in 9pfs will be disabled on Windows.
> >
> > Wouldn't it be more user friendly if the relevant error locations
> > would use something like error_report_once() and suggesting to enable
> > mapped(-xattr) to make 9p symlinks on guest working if desired by the user?
> >
> > Probably this error case would need to wrapped into a dedicated
> > function, otherwise I guess error_report_once() would fire several
> > times by different callers.
> >
> 
> Windows (MinGW) does not only support symlink, but also does not have symlink
> definitions.
> Windows does not support symlink flags S_IFLNK.
> 
> So even I add symlink support by mapped-xattr, the MinGW library does not
> have symlink flags and get a build error.
> And this flags is defined by Windows header files.
> The impact of adding a new flags to an pre-defined structure (struct stat) is
> unknown.
> 
> So I think it is not a good idea to do that.

Because Windows does not support symlink, so error_report_once() and report it 
to user will be OK.
But mapped-xattr could not work.

> 
> > > Signed-off-by: Guohuai Shi 
> > > Signed-off-by: Bin Meng 
> > > ---
> > >
> > >  hw/9pfs/9p-local.h  |   7 +
> > >  hw/9pfs/9p-util.h   |  40 +-
> > >  hw/9pfs/9p-local.c  |   4 -
> > >  hw/9pfs/9p-util-win32.c | 885
> > > 
> > >  4 files changed, 931 insertions(+), 5 deletions(-)  create mode
> > > 100644 hw/9pfs/9p-util-win32.c
> > >
> > > diff --git a/hw/9pfs/9p-local.h b/hw/9pfs/9p-local.h index
> > > c8404063e5..02fd894ba3 100644
> > > --- a/hw/9pfs/9p-local.h
> > > +++ b/hw/9pfs/9p-local.h
> > > @@ -15,6 +15,13 @@
> > >
> > >  #include "9p-file-id.h"
> > >
> > > +typedef struct {
> > > +P9_FILE_ID mountfd;
> > > +#ifdef CONFIG_WIN32
> > > +char *root_path;
> > > +#endif
> > > +} LocalData;
> > > +
> > >  P9_FILE_ID local_open_nofollow(FsContext *fs_ctx, const char *path,
> > > int
> > flags,
> > > mode_t mode);  P9_FILE_ID
> > > local_opendir_nofollow(FsContext *fs_ctx, const char *path); diff
> > > --git a/hw/9pfs/9p-util.h b/hw/9pfs/9p-util.h index
> > > 1e7dc76345..82b2d0c3e4 100644
> > > --- a/hw/9pfs/9p-util.h
> > > +++ b/hw/9pfs/9p-util.h
> > > @@ -90,26 +90,61 @@ static inline int errno_to_dotl(int err) {
> > >  return err;
> > >  }
> > >
> > > -#ifdef CONFIG_DARWIN
> > > +#if defined(CONFIG_DARWIN)
> > >  #define qemu_fgetxattr(...) fgetxattr(__VA_ARGS__, 0, 0)
> > > +#elif defined(CONFIG_WIN32)
> > > +#define qemu_fgetxattr fgetxattr_win32
> > >  #else
> > >  #define qemu_fgetxattr fgetxattr
> > >  #endif
> > >
> > > +#ifdef CONFIG_WIN32
> > > +#define qemu_openat openat_win32
> > > +#define qemu_fstatatfstatat_win32
> > > +#define qemu_mkdiratmkdirat_win32
> > > +#define qemu_renameat   renameat_win32
> > > +#define qemu_utimensat  utimensat_win32
> > > +#define qemu_unlinkat   unlinkat_win32
> > > +#else
> > >  #define qemu_openat openat
> > >  #define qemu_fstatatfstatat
> > >  #define qemu_mkdiratmkdirat
> > >  #define qemu_renameat   renameat
> > >  #define qemu_utimensat  utimensat
> > >  #define qemu_unlinkat   unlinkat
> > > +#endif
> > > +
> > > +#ifdef CONFIG_WIN32
> > > +char *get_full_path_win32(P9_FILE_ID fd, const char *name); ssize_t
> > > +fgetxattr_win32(int fd, const char *name, void *value, size_t
> > > +size); P9_FILE_ID openat_win32(P9_FILE_ID dirfd, const char *pathname,
> int flags,
> > > +mode_t mode); int fstatat_win32(P9_FILE_ID
> > > +dirfd, const char *pathname,
> > > +  struct stat *statbuf, int flags); int
> > > +mkdirat_win32(P9_FILE_ID dirfd, const char *pathname, 

Re: HMAT patches failure (was Re: [PULL 00/86] pci,pc,virtio: features, tests, fixes, cleanups)

2022-11-01 Thread Jonathan Cameron via
On Tue, 1 Nov 2022 06:32:05 -0400
"Michael S. Tsirkin"  wrote:

> On Mon, Oct 31, 2022 at 04:06:03PM -0400, Stefan Hajnoczi wrote:
> > Here is another CI failure:
> > 
> > qemu-system-i386: current -smp configuration requires kernel irqchip
> > and X2APIC API support.
> > Broken pipe
> > ../tests/qtest/libqtest.c:179: kill_qemu() tried to terminate QEMU
> > process but encountered exit status 1 (expected 0)
> > TAP parsing error: Too few tests run (expected 49, got 22)

Got a bit thrown by this which is unrelated to the HMAT series.  Given I 
bisected it...

   bios-tables-test: add test for number of cores > 255
seems to be issue.  I'll take a look into why shortly.


> > (test program exited with status code -6)
> > ――
> > 6/202 qemu:qtest+qtest-i386 / qtest-i386/test-hmp OK 7.46s 9 subtests passed
> > ▶ 7/202 ERROR:../tests/qtest/bios-tables-test.c:533:test_acpi_asl:
> > assertion failed: (all_tables_match) ERROR
> > 7/202 qemu:qtest+qtest-aarch64 / qtest-aarch64/bios-tables-test ERROR
> > 108.34s killed by signal 6 SIGABRT  
> > >>> G_TEST_DBUS_DAEMON=/builds/qemu-project/qemu/tests/dbus-vmstate-daemon.sh
> > >>>  QTEST_QEMU_BINARY=./qemu-system-aarch64 MALLOC_PERTURB_=89 
> > >>> /builds/qemu-project/qemu/build/tests/qtest/bios-tables-test --tap -k  
> > ― ✀ 
> > ―
> > stderr:
> > acpi-test: Warning! APIC binary file mismatch. Actual
> > [aml:/tmp/aml-UKB6U1], Expected
> > [aml:tests/data/acpi/virt/APIC.acpihmatvirt].
> > See source file tests/qtest/bios-tables-test.c for instructions on how
> > to update expected files.
> > to see ASL diff between mismatched files install IASL, rebuild QEMU
> > from scratch and re-run tests with V=1 environment variable set**
> > ERROR:../tests/qtest/bios-tables-test.c:533:test_acpi_asl: assertion
> > failed: (all_tables_match)

Ah. I'd failed to notice you said to drop first patch. Now replicating.
Looks like the tables introduced for HMAT need updating to take into account
changes made earlier in your pull request (version numbers etc)


Jonathan

> > (test program exited with status code -6)
> > 
> > https://gitlab.com/qemu-project/qemu/-/jobs/3253817453  
> 
> 
> Hesham Jonathan pls take a look, if you post a fixup today
> or early tomorrow I can squash it
> and then this patchset can still be included in the release.
> 
> Thanks!
> 




[PATCH v8 16/17] virtio-pci: Omit errp for pci_add_capability

2022-11-01 Thread Akihiko Odaki
Omitting errp for pci_add_capability() causes it to abort if
capabilities overlap. This behavior is appropriate here because all of
the capabilities set in this device are defined in the program and
their overlap should not happen unless there is a programming error.

Signed-off-by: Akihiko Odaki 
---
 hw/virtio/virtio-pci.c | 9 ++---
 include/hw/virtio/virtio-pci.h | 2 +-
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index c37bdc77ea..b393ff01be 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1154,8 +1154,7 @@ static int virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy,
 PCIDevice *dev = &proxy->pci_dev;
 int offset;
 
-offset = pci_add_capability(dev, PCI_CAP_ID_VNDR, 0,
-cap->cap_len, &error_abort);
+offset = pci_add_capability(dev, PCI_CAP_ID_VNDR, 0, cap->cap_len);
 
 assert(cap->cap_len >= sizeof *cap);
 memcpy(dev->config + offset + PCI_CAP_FLAGS, &cap->cap_len,
@@ -1864,11 +1863,7 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error 
**errp)
 
 pcie_endpoint_cap_init(pci_dev, 0);
 
-pos = pci_add_capability(pci_dev, PCI_CAP_ID_PM, 0,
- PCI_PM_SIZEOF, errp);
-if (pos < 0) {
-return;
-}
+pos = pci_add_capability(pci_dev, PCI_CAP_ID_PM, 0, PCI_PM_SIZEOF);
 
 pci_dev->exp.pm_cap = pos;
 
diff --git a/include/hw/virtio/virtio-pci.h b/include/hw/virtio/virtio-pci.h
index 2446dcd9ae..9f3736723c 100644
--- a/include/hw/virtio/virtio-pci.h
+++ b/include/hw/virtio/virtio-pci.h
@@ -141,7 +141,7 @@ struct VirtIOPCIProxy {
 uint32_t msix_bar_idx;
 uint32_t modern_io_bar_idx;
 uint32_t modern_mem_bar_idx;
-int config_cap;
+uint8_t config_cap;
 uint32_t flags;
 bool disable_modern;
 bool ignore_backend_features;
-- 
2.38.1




[PATCH v8 00/17] pci: Abort if pci_add_capability fails

2022-11-01 Thread Akihiko Odaki
pci_add_capability appears most PCI devices. Its error handling required
lots of code, and led to inconsistent behaviors such as:
- passing error_abort
- passing error_fatal
- asserting the returned value
- propagating the error to the caller
- skipping the rest of the function
- just ignoring

The code generating errors in pci_add_capability had a comment which
says:
> Verify that capabilities don't overlap.  Note: device assignment
> depends on this check to verify that the device is not broken.
> Should never trigger for emulated devices, but it's helpful for
> debugging these.

Indeed vfio has some code that passes capability offsets and sizes from
a physical device, but it explicitly pays attention so that the
capabilities never overlap and the only exception are MSI and MSI-X
capabilities. Therefore, we can add code specific to the case, and
always assert that capabilities never overlap in the other cases,
resolving these inconsistencies.

v8:
- Return boolean with pci_check_capability_overlap() (Philippe Mathieu-Daudé)

v7:
- Perform checks in vfio_msi_setup() and vfio_msix_setup() (Alex Williamson)

v6:
- Error in case of MSI/MSI-X capability overlap (Alex Williamson)

v5:
- Fix capability ID specification in vfio_msi_early_setup (Alex Williamson)
- Use range_covers_byte() (Alex Williamson)
- warn_report() in case of MSI/MSI-X capability overlap (Alex Williamson)

v4:
- Fix typos in messages (Markus Armbruster)
- hw/vfio/pci: Ensure MSI and MSI-X do not overlap (Alex Williamson)

v3:
- Correct patch split between virtio-pci and pci (Markus Armbruster)
- Add messages for individual patches (Markus Armbruster)
- Acked-by: Jonathan Cameron 

Akihiko Odaki (17):
  hw/vfio/pci: Ensure MSI and MSI-X do not overlap
  pci: Allow to omit errp for pci_add_capability
  hw/i386/amd_iommu: Omit errp for pci_add_capability
  ahci: Omit errp for pci_add_capability
  e1000e: Omit errp for pci_add_capability
  eepro100: Omit errp for pci_add_capability
  hw/nvme: Omit errp for pci_add_capability
  msi: Omit errp for pci_add_capability
  hw/pci/pci_bridge: Omit errp for pci_add_capability
  pcie: Omit errp for pci_add_capability
  pci/shpc: Omit errp for pci_add_capability
  msix: Omit errp for pci_add_capability
  pci/slotid: Omit errp for pci_add_capability
  hw/pci-bridge/pcie_pci_bridge: Omit errp for pci_add_capability
  hw/vfio/pci: Omit errp for pci_add_capability
  virtio-pci: Omit errp for pci_add_capability
  pci: Remove legacy errp from pci_add_capability

 docs/pcie_sriov.txt|  4 +--
 hw/display/bochs-display.c |  4 +--
 hw/i386/amd_iommu.c| 21 +++-
 hw/ide/ich.c   |  8 ++---
 hw/net/e1000e.c| 22 +++--
 hw/net/eepro100.c  |  7 +---
 hw/nvme/ctrl.c | 14 ++--
 hw/pci-bridge/cxl_downstream.c |  9 ++
 hw/pci-bridge/cxl_upstream.c   |  8 ++---
 hw/pci-bridge/i82801b11.c  | 14 ++--
 hw/pci-bridge/pci_bridge_dev.c |  2 +-
 hw/pci-bridge/pcie_pci_bridge.c| 19 +++
 hw/pci-bridge/pcie_root_port.c | 16 ++---
 hw/pci-bridge/xio3130_downstream.c | 15 ++---
 hw/pci-bridge/xio3130_upstream.c   | 15 ++---
 hw/pci-host/designware.c   |  3 +-
 hw/pci-host/xilinx-pcie.c  |  4 +--
 hw/pci/msi.c   |  9 +-
 hw/pci/msix.c  |  8 ++---
 hw/pci/pci.c   | 48 +--
 hw/pci/pci_bridge.c| 21 
 hw/pci/pcie.c  | 52 --
 hw/pci/shpc.c  | 23 -
 hw/pci/slotid_cap.c|  8 ++---
 hw/usb/hcd-xhci-pci.c  |  3 +-
 hw/vfio/pci-quirks.c   | 15 ++---
 hw/vfio/pci.c  | 29 +++--
 hw/virtio/virtio-pci.c | 12 ++-
 include/hw/pci/pci.h   |  8 +++--
 include/hw/pci/pci_bridge.h|  5 ++-
 include/hw/pci/pcie.h  | 11 +++
 include/hw/pci/shpc.h  |  3 +-
 include/hw/virtio/virtio-pci.h |  2 +-
 33 files changed, 133 insertions(+), 309 deletions(-)

-- 
2.38.1




UI layer threading and locking strategy; memory_region_snapshot_and_clear_dirty() races

2022-11-01 Thread Peter Maydell
Hi; I'm trying to find out what the UI layer's threading and
locking strategy is, at least as far as it applies to display
device models.

Specifically:
 * is the device's GraphicHwOps::gfx_update method always called
   from one specific thread, or might it be called from any thread?
 * is that method called with any locks guaranteed held? (eg the
   iothread lock)
 * is the caller of the gfx_update method OK if an implementation
   of the method drops the iothread lock temporarily while it is
   executing? (my guess would be "no")
 * for a gfx_update_async = true device, what are the requirements
   on calling graphic_hw_update_done()? Does the caller need to hold
   any particular lock? Does the call need to be done from any
   particular thread?

The background to this is that I'm looking again at the race
condition involving the memory_region_snapshot_and_clear_dirty()
function, as described here:
 
https://lore.kernel.org/qemu-devel/CAFEAcA9odnPo2LPip295Uztri7JfoVnQbkJ=wn+k8dqneb_...@mail.gmail.com/T/#u

Having worked through what is going on, as far as I can see:
 (1) in order to be sure that we have the right data to match
 the snapshotted dirty bitmap state, we must wait for all TCG
 vCPUs to leave their current TB
 (2) a vCPU might block waiting for the iothread lock mid-TB
 (3) therefore we cannot wait for the TCG vCPUs without dropping
 the iothread lock one way or another
 (4) but none of the callers expect that and various things break

My tentative idea for a fix is a bit of an upheaval:
 * have the display devices set gfx_update_async = true
 * instead of doing everything synchronously in their gfx_update
   method, they do the initial setup and call an 'async' version
   of memory_region_snapshot_and_clear_dirty()
 * that async version of the function will do what it does today,
   but without trying to wait for TCG vCPUs
 * instead the caller arranges (via call_rcu(), probably) a
   callback that will happen once all the TCG CPUs have finished
   executing their current TB
 * that callback does the actual copy-from-guest-ram-to-display
   and then calls graphic_hw_update_done()

This seems like an awful pain in the neck but I couldn't see
anything better :-(

Paolo: what (if any) guarantee does call_rcu() make about
which thread the callback function gets executed on, and what
locks are/are not held when it's called?

(I haven't looked at the migration code's use of
memory_global_after_dirty_log_sync() but I suspect it's
similarly broken.)

thanks
-- PMM



[PATCH v8 12/17] msix: Omit errp for pci_add_capability

2022-11-01 Thread Akihiko Odaki
Omitting errp for pci_add_capability() causes it to abort if
capabilities overlap. A caller of msix_init(), which calls
pci_add_capability() in turn, is expected to ensure that will not
happen.

Signed-off-by: Akihiko Odaki 
---
 hw/pci/msix.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/hw/pci/msix.c b/hw/pci/msix.c
index 1e381a9813..28af83403b 100644
--- a/hw/pci/msix.c
+++ b/hw/pci/msix.c
@@ -311,7 +311,7 @@ int msix_init(struct PCIDevice *dev, unsigned short 
nentries,
   uint8_t pba_bar_nr, unsigned pba_offset, uint8_t cap_pos,
   Error **errp)
 {
-int cap;
+uint8_t cap;
 unsigned table_size, pba_size;
 uint8_t *config;
 
@@ -340,11 +340,7 @@ int msix_init(struct PCIDevice *dev, unsigned short 
nentries,
 return -EINVAL;
 }
 
-cap = pci_add_capability(dev, PCI_CAP_ID_MSIX,
-  cap_pos, MSIX_CAP_LENGTH, errp);
-if (cap < 0) {
-return cap;
-}
+cap = pci_add_capability(dev, PCI_CAP_ID_MSIX, cap_pos, MSIX_CAP_LENGTH);
 
 dev->msix_cap = cap;
 dev->cap_present |= QEMU_PCI_CAP_MSIX;
-- 
2.38.1




Re: [PULL 00/14] qemu-macppc queue 20221031

2022-11-01 Thread Stefan Hajnoczi
Applied, thanks.

Please update the changelog at https://wiki.qemu.org/ChangeLog/7.2 for any 
user-visible changes.


signature.asc
Description: PGP signature


Re: [PATCH 15/16] tests/qtest: virtio-9p-test: Adapt the case for win32

2022-11-01 Thread Christian Schoenebeck
On Monday, October 24, 2022 6:57:58 AM CET Bin Meng wrote:
> From: Guohuai Shi 
> 
> Windows does not provide the getuid() API. Let's create a local
> one and return a fixed value 0 as the uid for testing.
> 
> Signed-off-by: Guohuai Shi 
> Signed-off-by: Xuzhou Cheng 
> Signed-off-by: Bin Meng 
> ---
> 
>  tests/qtest/virtio-9p-test.c | 7 +++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/tests/qtest/virtio-9p-test.c b/tests/qtest/virtio-9p-test.c
> index 25305a4cf7..e81e3e3709 100644
> --- a/tests/qtest/virtio-9p-test.c
> +++ b/tests/qtest/virtio-9p-test.c
> @@ -34,6 +34,13 @@ static uint32_t genfid(void)
>  return fid_generator++;
>  }
>  
> +#ifdef CONFIG_WIN32
> +static uint32_t getuid(void)
> +{
> +return 0;
> +}
> +#endif
> +

Due to recent 9p tests restructuring changes, same would be needed for new
tests/qtest/libqos/virtio-9p-client.c source file, as it's also calling 
getuid().

>  /**
>   * Splits the @a in string by @a delim into individual (non empty) strings
>   * and outputs them to @a out. The output array @a out is NULL terminated.
> 






[PATCH v9 15/17] hw/vfio/pci: Omit errp for pci_add_capability

2022-11-01 Thread Akihiko Odaki
The code generating errors in pci_add_capability has a comment which
says:
> Verify that capabilities don't overlap.  Note: device assignment
> depends on this check to verify that the device is not broken.
> Should never trigger for emulated devices, but it's helpful for
> debugging these.

Indeed vfio has some code that passes capability offsets and sizes from
a physical device, but it explicitly pays attention so that the
capabilities never overlap. Therefore, in pci_add_capability(), we can
always assert that capabilities never overlap, and that is what happens
when omitting errp.

Signed-off-by: Akihiko Odaki 
---
 hw/vfio/pci-quirks.c | 15 +++
 hw/vfio/pci.c| 14 +-
 2 files changed, 8 insertions(+), 21 deletions(-)

diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
index f0147a050a..e94fd273ea 100644
--- a/hw/vfio/pci-quirks.c
+++ b/hw/vfio/pci-quirks.c
@@ -1530,7 +1530,7 @@ const PropertyInfo qdev_prop_nv_gpudirect_clique = {
 static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
 {
 PCIDevice *pdev = &vdev->pdev;
-int ret, pos = 0xC8;
+int pos = 0xC8;
 
 if (vdev->nv_gpudirect_clique == 0xFF) {
 return 0;
@@ -1547,11 +1547,7 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice 
*vdev, Error **errp)
 return -EINVAL;
 }
 
-ret = pci_add_capability(pdev, PCI_CAP_ID_VNDR, pos, 8, errp);
-if (ret < 0) {
-error_prepend(errp, "Failed to add NVIDIA GPUDirect cap: ");
-return ret;
-}
+pci_add_capability(pdev, PCI_CAP_ID_VNDR, pos, 8);
 
 memset(vdev->emulated_config_bits + pos, 0xFF, 8);
 pos += PCI_CAP_FLAGS;
@@ -1718,12 +1714,7 @@ static int vfio_add_vmd_shadow_cap(VFIOPCIDevice *vdev, 
Error **errp)
 return -EFAULT;
 }
 
-ret = pci_add_capability(&vdev->pdev, PCI_CAP_ID_VNDR, pos,
- VMD_SHADOW_CAP_LEN, errp);
-if (ret < 0) {
-error_prepend(errp, "Failed to add VMD MEMBAR Shadow cap: ");
-return ret;
-}
+pci_add_capability(&vdev->pdev, PCI_CAP_ID_VNDR, pos, VMD_SHADOW_CAP_LEN);
 
 memset(vdev->emulated_config_bits + pos, 0xFF, VMD_SHADOW_CAP_LEN);
 pos += PCI_CAP_FLAGS;
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 0ca6b5ff4b..458729eae3 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1839,7 +1839,7 @@ static void vfio_add_emulated_long(VFIOPCIDevice *vdev, 
int pos,
 vfio_set_long_bits(vdev->emulated_config_bits + pos, mask, mask);
 }
 
-static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size,
+static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, uint8_t pos, uint8_t size,
Error **errp)
 {
 uint16_t flags;
@@ -1956,11 +1956,7 @@ static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int 
pos, uint8_t size,
1, PCI_EXP_FLAGS_VERS);
 }
 
-pos = pci_add_capability(&vdev->pdev, PCI_CAP_ID_EXP, pos, size,
- errp);
-if (pos < 0) {
-return pos;
-}
+pos = pci_add_capability(&vdev->pdev, PCI_CAP_ID_EXP, pos, size);
 
 vdev->pdev.exp.exp_cap = pos;
 
@@ -2058,14 +2054,14 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, 
uint8_t pos, Error **errp)
 case PCI_CAP_ID_PM:
 vfio_check_pm_reset(vdev, pos);
 vdev->pm_cap = pos;
-ret = pci_add_capability(pdev, cap_id, pos, size, errp);
+pci_add_capability(pdev, cap_id, pos, size);
 break;
 case PCI_CAP_ID_AF:
 vfio_check_af_flr(vdev, pos);
-ret = pci_add_capability(pdev, cap_id, pos, size, errp);
+pci_add_capability(pdev, cap_id, pos, size);
 break;
 default:
-ret = pci_add_capability(pdev, cap_id, pos, size, errp);
+pci_add_capability(pdev, cap_id, pos, size);
 break;
 }
 
-- 
2.38.1




Re: [PATCH] migration: check magic value for deciding the mapping of channels

2022-11-01 Thread manish.mishra



On 01/11/22 8:21 pm, Daniel P. Berrangé wrote:

On Tue, Nov 01, 2022 at 02:30:29PM +, manish.mishra wrote:

Current logic assumes that channel connections on the destination side are
always established in the same order as the source and the first one will
always be the default channel followed by the multifid or post-copy
preemption channel. This may not be always true, as even if a channel has a
connection established on the source side it can be in the pending state on
the destination side and a newer connection can be established first.
Basically causing out of order mapping of channels on the destination side.
Currently, all channels except post-copy preempt send a magic number, this
patch uses that magic number to decide the type of channel. This logic is
applicable only for precopy(multifd) live migration, as mentioned, the
post-copy preempt channel does not send any magic number. Also, this patch
uses MSG_PEEK to check the magic number of channels so that current
data/control stream management remains un-effected.

Signed-off-by: manish.mishra 
---
  include/io/channel.h | 25 +
  io/channel-socket.c  | 27 +++
  io/channel.c | 39 +++
  migration/migration.c| 33 +
  migration/multifd.c  | 12 
  migration/multifd.h  |  2 +-
  migration/postcopy-ram.c |  5 +
  migration/postcopy-ram.h |  2 +-
  8 files changed, 119 insertions(+), 26 deletions(-)

diff --git a/include/io/channel.h b/include/io/channel.h
index c680ee7480..74177aeeea 100644
--- a/include/io/channel.h
+++ b/include/io/channel.h
@@ -115,6 +115,10 @@ struct QIOChannelClass {
  int **fds,
  size_t *nfds,
  Error **errp);
+ssize_t (*io_read_peek)(QIOChannel *ioc,
+void *buf,
+size_t nbytes,
+Error **errp);
  int (*io_close)(QIOChannel *ioc,
  Error **errp);
  GSource * (*io_create_watch)(QIOChannel *ioc,
@@ -475,6 +479,27 @@ int qio_channel_write_all(QIOChannel *ioc,
size_t buflen,
Error **errp);
  
+/**

+ * qio_channel_read_peek_all:
+ * @ioc: the channel object
+ * @buf: the memory region to read in data
+ * @nbytes: the number of bytes to read
+ * @errp: pointer to a NULL-initialized error object
+ *
+ * Read given @nbytes data from peek of channel into
+ * memory region @buf.
+ *
+ * The function will be blocked until read size is
+ * equal to requested size.
+ *
+ * Returns: 1 if all bytes were read, 0 if end-of-file
+ *  occurs without data, or -1 on error
+ */
+int qio_channel_read_peek_all(QIOChannel *ioc,
+  void* buf,
+  size_t nbytes,
+  Error **errp);
+
  /**
   * qio_channel_set_blocking:
   * @ioc: the channel object
diff --git a/io/channel-socket.c b/io/channel-socket.c
index b76dca9cc1..b99f5dfda6 100644
--- a/io/channel-socket.c
+++ b/io/channel-socket.c
@@ -705,6 +705,32 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
  }
  #endif /* WIN32 */
  
+static ssize_t qio_channel_socket_read_peek(QIOChannel *ioc,

+void *buf,
+size_t nbytes,
+Error **errp)
+{
+QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
+ssize_t bytes = 0;
+
+retry:
+bytes = recv(sioc->fd, buf, nbytes, MSG_PEEK);
+
+if (bytes < 0) {
+if (errno == EINTR) {
+goto retry;
+}
+if (errno == EAGAIN) {
+return QIO_CHANNEL_ERR_BLOCK;
+}
+
+error_setg_errno(errp, errno,
+ "Unable to read from peek of socket");
+return -1;
+}
+
+return bytes;
+}
  
  #ifdef QEMU_MSG_ZEROCOPY

  static int qio_channel_socket_flush(QIOChannel *ioc,
@@ -902,6 +928,7 @@ static void qio_channel_socket_class_init(ObjectClass 
*klass,
  
  ioc_klass->io_writev = qio_channel_socket_writev;

  ioc_klass->io_readv = qio_channel_socket_readv;
+ioc_klass->io_read_peek = qio_channel_socket_read_peek;
  ioc_klass->io_set_blocking = qio_channel_socket_set_blocking;
  ioc_klass->io_close = qio_channel_socket_close;
  ioc_klass->io_shutdown = qio_channel_socket_shutdown;
diff --git a/io/channel.c b/io/channel.c
index 0640941ac5..a2d9b96f3f 100644
--- a/io/channel.c
+++ b/io/channel.c
@@ -346,6 +346,45 @@ int qio_channel_write_all(QIOChannel *ioc,
  return qio_channel_writev_all(ioc, &iov, 1, errp);
  }
  
+int qio_channel_read_peek_all(QIOChannel *ioc,

+  void* buf,
+  size_t nbytes,
+  Error **errp)
+{
+   QIOChannelClass *klass = QIO

[PATCH v9 11/17] pci/shpc: Omit errp for pci_add_capability

2022-11-01 Thread Akihiko Odaki
Omitting errp for pci_add_capability() causes it to abort if
capabilities overlap. A caller of shpc_init(), which calls
pci_add_capability() in turn, is expected to ensure that will not
happen.

Signed-off-by: Akihiko Odaki 
---
 include/hw/pci/shpc.h   |  3 +--
 hw/pci-bridge/pci_bridge_dev.c  |  2 +-
 hw/pci-bridge/pcie_pci_bridge.c |  2 +-
 hw/pci/shpc.c   | 23 ++-
 4 files changed, 9 insertions(+), 21 deletions(-)

diff --git a/include/hw/pci/shpc.h b/include/hw/pci/shpc.h
index d5683b7399..18ab16ec9f 100644
--- a/include/hw/pci/shpc.h
+++ b/include/hw/pci/shpc.h
@@ -38,8 +38,7 @@ struct SHPCDevice {
 
 void shpc_reset(PCIDevice *d);
 int shpc_bar_size(PCIDevice *dev);
-int shpc_init(PCIDevice *dev, PCIBus *sec_bus, MemoryRegion *bar,
-  unsigned off, Error **errp);
+int shpc_init(PCIDevice *dev, PCIBus *sec_bus, MemoryRegion *bar, unsigned 
off);
 void shpc_cleanup(PCIDevice *dev, MemoryRegion *bar);
 void shpc_free(PCIDevice *dev);
 void shpc_cap_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int len);
diff --git a/hw/pci-bridge/pci_bridge_dev.c b/hw/pci-bridge/pci_bridge_dev.c
index 657a06ddbe..4b6d1876eb 100644
--- a/hw/pci-bridge/pci_bridge_dev.c
+++ b/hw/pci-bridge/pci_bridge_dev.c
@@ -66,7 +66,7 @@ static void pci_bridge_dev_realize(PCIDevice *dev, Error 
**errp)
 dev->config[PCI_INTERRUPT_PIN] = 0x1;
 memory_region_init(&bridge_dev->bar, OBJECT(dev), "shpc-bar",
shpc_bar_size(dev));
-err = shpc_init(dev, &br->sec_bus, &bridge_dev->bar, 0, errp);
+err = shpc_init(dev, &br->sec_bus, &bridge_dev->bar, 0);
 if (err) {
 goto shpc_error;
 }
diff --git a/hw/pci-bridge/pcie_pci_bridge.c b/hw/pci-bridge/pcie_pci_bridge.c
index df5dfdd139..99778e3e24 100644
--- a/hw/pci-bridge/pcie_pci_bridge.c
+++ b/hw/pci-bridge/pcie_pci_bridge.c
@@ -42,7 +42,7 @@ static void pcie_pci_bridge_realize(PCIDevice *d, Error 
**errp)
 d->config[PCI_INTERRUPT_PIN] = 0x1;
 memory_region_init(&pcie_br->shpc_bar, OBJECT(d), "shpc-bar",
shpc_bar_size(d));
-rc = shpc_init(d, &br->sec_bus, &pcie_br->shpc_bar, 0, errp);
+rc = shpc_init(d, &br->sec_bus, &pcie_br->shpc_bar, 0);
 if (rc) {
 goto error;
 }
diff --git a/hw/pci/shpc.c b/hw/pci/shpc.c
index e71f3a7483..5b3228c793 100644
--- a/hw/pci/shpc.c
+++ b/hw/pci/shpc.c
@@ -440,16 +440,11 @@ static void shpc_cap_update_dword(PCIDevice *d)
 }
 
 /* Add SHPC capability to the config space for the device. */
-static int shpc_cap_add_config(PCIDevice *d, Error **errp)
+static void shpc_cap_add_config(PCIDevice *d)
 {
 uint8_t *config;
-int config_offset;
-config_offset = pci_add_capability(d, PCI_CAP_ID_SHPC,
-   0, SHPC_CAP_LENGTH,
-   errp);
-if (config_offset < 0) {
-return config_offset;
-}
+uint8_t config_offset;
+config_offset = pci_add_capability(d, PCI_CAP_ID_SHPC, 0, SHPC_CAP_LENGTH);
 config = d->config + config_offset;
 
 pci_set_byte(config + SHPC_CAP_DWORD_SELECT, 0);
@@ -459,7 +454,6 @@ static int shpc_cap_add_config(PCIDevice *d, Error **errp)
 /* Make dword select and data writable. */
 pci_set_byte(d->wmask + config_offset + SHPC_CAP_DWORD_SELECT, 0xff);
 pci_set_long(d->wmask + config_offset + SHPC_CAP_DWORD_DATA, 0x);
-return 0;
 }
 
 static uint64_t shpc_mmio_read(void *opaque, hwaddr addr,
@@ -584,18 +578,13 @@ void shpc_device_unplug_request_cb(HotplugHandler 
*hotplug_dev,
 }
 
 /* Initialize the SHPC structure in bridge's BAR. */
-int shpc_init(PCIDevice *d, PCIBus *sec_bus, MemoryRegion *bar,
-  unsigned offset, Error **errp)
+int shpc_init(PCIDevice *d, PCIBus *sec_bus, MemoryRegion *bar, unsigned 
offset)
 {
-int i, ret;
+int i;
 int nslots = SHPC_MAX_SLOTS; /* TODO: qdev property? */
 SHPCDevice *shpc = d->shpc = g_malloc0(sizeof(*d->shpc));
 shpc->sec_bus = sec_bus;
-ret = shpc_cap_add_config(d, errp);
-if (ret) {
-g_free(d->shpc);
-return ret;
-}
+shpc_cap_add_config(d);
 if (nslots < SHPC_MIN_SLOTS) {
 return 0;
 }
-- 
2.38.1




[PATCH v8 04/17] ahci: Omit errp for pci_add_capability

2022-11-01 Thread Akihiko Odaki
Omitting errp for pci_add_capability() causes it to abort if
capabilities overlap. This behavior is appropriate here because all of
the capabilities set in this device are defined in the program and
their overlap should not happen unless there is a programming error.

Signed-off-by: Akihiko Odaki 
---
 hw/ide/ich.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/hw/ide/ich.c b/hw/ide/ich.c
index 1007a51fcb..3b478b01f8 100644
--- a/hw/ide/ich.c
+++ b/hw/ide/ich.c
@@ -106,7 +106,7 @@ static void pci_ich9_ahci_init(Object *obj)
 static void pci_ich9_ahci_realize(PCIDevice *dev, Error **errp)
 {
 struct AHCIPCIState *d;
-int sata_cap_offset;
+uint8_t sata_cap_offset;
 uint8_t *sata_cap;
 d = ICH9_AHCI(dev);
 int ret;
@@ -130,11 +130,7 @@ static void pci_ich9_ahci_realize(PCIDevice *dev, Error 
**errp)
  &d->ahci.mem);
 
 sata_cap_offset = pci_add_capability(dev, PCI_CAP_ID_SATA,
-  ICH9_SATA_CAP_OFFSET, SATA_CAP_SIZE,
-  errp);
-if (sata_cap_offset < 0) {
-return;
-}
+  ICH9_SATA_CAP_OFFSET, SATA_CAP_SIZE);
 
 sata_cap = dev->config + sata_cap_offset;
 pci_set_word(sata_cap + SATA_CAP_REV, 0x10);
-- 
2.38.1




  1   2   >