Re: [PATCH 1/1] hw/ide/core: terminate in-flight DMA on IDE bus reset

2023-09-26 Thread Fiona Ebner
Am 25.09.23 um 21:53 schrieb John Snow:
> On Thu, Sep 21, 2023 at 12:07 PM Simon Rowe  wrote:
>>
>> When an IDE controller is reset, its internal state is being cleared
>> before any outstanding I/O is cancelled. If a response to DMA is
>> received in this window, the aio callback will incorrectly continue
>> with the next part of the transfer (now using sector 0 from
>> the cleared controller state).
> 
> Eugh, yikes. It feels like we should fix the cancellation ... 
Please note that there already is a patch for that on the list:
https://lists.nongnu.org/archive/html/qemu-devel/2023-09/msg01011.html

Best Regards,
Fiona




[PATCH 1/1] target/loongarch: Clean up local variable shadowing

2023-09-26 Thread Song Gao
Fix:

  [1839/2601] Compiling C object 
libqemu-loongarch64-softmmu.fa.p/hw_loongarch_virt.c.o
  ../hw/loongarch/virt.c: In function 'loongarch_irq_init':
  ../hw/loongarch/virt.c:665:14: warning: declaration of 'i' shadows a previous 
local [-Wshadow=compatible-local]
   for (int i = 0; i < num; i++) {
^
  ../hw/loongarch/virt.c:582:19: note: shadowed declaration is here
   int cpu, pin, i, start, num;

Signed-off-by: Song Gao 
---
 hw/loongarch/virt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c
index 2629128aed..b0a004f860 100644
--- a/hw/loongarch/virt.c
+++ b/hw/loongarch/virt.c
@@ -662,7 +662,7 @@ static void loongarch_irq_init(LoongArchMachineState *lams)
 sysbus_mmio_get_region(d, 2));
 
 /* Connect pch_pic irqs to extioi */
-for (int i = 0; i < num; i++) {
+for (i = 0; i < num; i++) {
 qdev_connect_gpio_out(DEVICE(d), i, qdev_get_gpio_in(extioi, i));
 }
 
-- 
2.39.1




Re: [RFC] Proposal of QEMU PCI Endpoint test environment

2023-09-26 Thread Christoph Hellwig
On Thu, Sep 21, 2023 at 02:41:54PM +0530, Kishon Vijay Abraham I wrote:
> > PCI Endpoint function driver is implemented using the PCIe Endpoint
> > framework, but it requires physical boards for testing, and it is difficult
> > to test sufficiently. In order to find bugs and hardware-dependent
> > implementations early, continuous testing is required. Since it is
> > difficult to automate tests that require hardware, this RFC proposes a
> > virtual environment for testing PCI endpoint function drivers.
> 
> This would be quite useful and thank you for attempting it! I would like to
> compare other mechanisms available in-addition to QEMU before going with the
> QEMU approach.

Well, the point of PCIe endpoint subsystem in vhost or similar is that
you can use one and the same endpoint implementation.  So you can debug
it using qemu and the use it with a physical port, which would be really
amazing.




Re: [PATCH] m68k: Silence -Wshadow=local warnings in the m68k code

2023-09-26 Thread Laurent Vivier

Le 25/09/2023 à 20:56, Thomas Huth a écrit :

Rename the innermost variables to make the code compile
without warnings when using -Wshadow=local.

Signed-off-by: Thomas Huth 
---
  hw/m68k/bootinfo.h  | 10 --
  disas/m68k.c|  8 
  target/m68k/translate.c |  8 
  3 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/hw/m68k/bootinfo.h b/hw/m68k/bootinfo.h
index a3d37e3c80..d077d03559 100644
--- a/hw/m68k/bootinfo.h
+++ b/hw/m68k/bootinfo.h
@@ -44,15 +44,14 @@
  
  #define BOOTINFOSTR(base, id, string) \

  do { \
-int i; \
  stw_p(base, id); \
  base += 2; \
  stw_p(base, \
   (sizeof(struct bi_record) + strlen(string) + \
1 /* null termination */ + 3 /* padding */) & ~3); \
  base += 2; \
-for (i = 0; string[i]; i++) { \
-stb_p(base++, string[i]); \
+for (int _i = 0; string[_i]; _i++) { \
+stb_p(base++, string[_i]); \
  } \
  stb_p(base++, 0); \
  base = QEMU_ALIGN_PTR_UP(base, 4); \
@@ -60,7 +59,6 @@
  
  #define BOOTINFODATA(base, id, data, len) \

  do { \
-int i; \
  stw_p(base, id); \
  base += 2; \
  stw_p(base, \
@@ -69,8 +67,8 @@
  base += 2; \
  stw_p(base, len); \
  base += 2; \
-for (i = 0; i < len; ++i) { \
-stb_p(base++, data[i]); \
+for (int _i = 0; _i < len; ++_i) { \
+stb_p(base++, data[_i]); \
  } \
  base = QEMU_ALIGN_PTR_UP(base, 4); \
  } while (0)
diff --git a/disas/m68k.c b/disas/m68k.c
index aefaecfbd6..a384b4cb64 100644
--- a/disas/m68k.c
+++ b/disas/m68k.c
@@ -1632,10 +1632,10 @@ print_insn_arg (const char *d,
  case '2':
  case '3':
{
-   int val = fetch_arg (buffer, place, 5, info);
+   int val2 = fetch_arg (buffer, place, 5, info);
  const char *name = 0;
  
-	switch (val)

+   switch (val2)
  {
  case 2: name = "%tt0"; break;
  case 3: name = "%tt1"; break;
@@ -1655,12 +1655,12 @@ print_insn_arg (const char *d,
  int break_reg = ((buffer[3] >> 2) & 7);
  
  	  (*info->fprintf_func)

-   (info->stream, val == 0x1c ? "%%bad%d" : "%%bac%d",
+   (info->stream, val2 == 0x1c ? "%%bad%d" : "%%bac%d",
 break_reg);
}
break;
  default:
-   (*info->fprintf_func) (info->stream, "", val);
+   (*info->fprintf_func) (info->stream, "", val2);
  }
if (name)
  (*info->fprintf_func) (info->stream, "%s", name);


"reg" would be a better name than "val2".


diff --git a/target/m68k/translate.c b/target/m68k/translate.c
index 9e224fe796..b28d7f7d4b 100644
--- a/target/m68k/translate.c
+++ b/target/m68k/translate.c
@@ -824,14 +824,14 @@ static TCGv gen_ea_mode(CPUM68KState *env, DisasContext 
*s, int mode, int reg0,
  reg = get_areg(s, reg0);
  result = gen_ldst(s, opsize, reg, val, what, index);
  if (what == EA_STORE || !addrp) {
-TCGv tmp = tcg_temp_new();
+TCGv tmp2 = tcg_temp_new();
  if (reg0 == 7 && opsize == OS_BYTE &&
  m68k_feature(s->env, M68K_FEATURE_M68K)) {
-tcg_gen_addi_i32(tmp, reg, 2);
+tcg_gen_addi_i32(tmp2, reg, 2);
  } else {
-tcg_gen_addi_i32(tmp, reg, opsize_bytes(opsize));
+tcg_gen_addi_i32(tmp2, reg, opsize_bytes(opsize));
  }
-delay_set_areg(s, reg0, tmp, true);
+delay_set_areg(s, reg0, tmp2, true);
  }
  return result;
  case 4: /* Indirect predecrememnt.  */


"inc" would be a better name than "val2".

Otherwise:

Reviewed-by: Laurent Vivier 


Thanks,
Laurent




Re: [PATCH v2 10/20] q800: add easc bool machine class property to switch between ASC and EASC

2023-09-26 Thread Laurent Vivier

Le 09/09/2023 à 11:48, Mark Cave-Ayland a écrit :

This determines whether the Apple Sound Chip (ASC) is set to enhanced mode
(default) or to original mode. The real Q800 hardware used an EASC chip however
a lot of older software only works with the older ASC chip.

Adding this as a machine parameter allows QEMU to be used as an developer aid
for testing and migrating code from ASC to EASC.

Signed-off-by: Mark Cave-Ayland 
Reviewed-by: Philippe Mathieu-Daudé 
---
  hw/m68k/q800.c | 30 +-
  include/hw/m68k/q800.h |  1 +
  2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/hw/m68k/q800.c b/hw/m68k/q800.c
index ae07aa20ff..5ae7c37760 100644
--- a/hw/m68k/q800.c
+++ b/hw/m68k/q800.c
@@ -484,7 +484,8 @@ static void q800_machine_init(MachineState *machine)
  /* Apple Sound Chip */
  
  object_initialize_child(OBJECT(machine), "asc", &m->asc, TYPE_ASC);

-qdev_prop_set_uint8(DEVICE(&m->asc), "asctype", ASC_TYPE_EASC);
+qdev_prop_set_uint8(DEVICE(&m->asc), "asctype", m->easc ? ASC_TYPE_EASC
+: ASC_TYPE_ASC);
  sysbus = SYS_BUS_DEVICE(&m->asc);
  sysbus_realize_and_unref(sysbus, &error_fatal);
  memory_region_add_subregion(&m->macio, ASC_BASE - IO_BASE,
@@ -674,6 +675,28 @@ static void q800_machine_init(MachineState *machine)
  }
  }
  
+static bool q800_get_easc(Object *obj, Error **errp)

+{
+Q800MachineState *ms = Q800_MACHINE(obj);
+
+return ms->easc;
+}
+
+static void q800_set_easc(Object *obj, bool value, Error **errp)
+{
+Q800MachineState *ms = Q800_MACHINE(obj);
+
+ms->easc = value;
+}
+
+static void q800_init(Object *obj)
+{
+Q800MachineState *ms = Q800_MACHINE(obj);
+
+/* Default to EASC */
+ms->easc = true;
+}
+
  static GlobalProperty hw_compat_q800[] = {
  { "scsi-hd", "quirk_mode_page_vendor_specific_apple", "on" },
  { "scsi-hd", "vendor", " SEAGATE" },
@@ -706,11 +729,16 @@ static void q800_machine_class_init(ObjectClass *oc, void 
*data)
  mc->block_default_type = IF_SCSI;
  mc->default_ram_id = "m68k_mac.ram";
  compat_props_add(mc->compat_props, hw_compat_q800, hw_compat_q800_len);
+
+object_class_property_add_bool(oc, "easc", q800_get_easc, q800_set_easc);
+object_class_property_set_description(oc, "easc",
+"Set to off to use ASC rather than EASC");
  }
  
  static const TypeInfo q800_machine_typeinfo = {

  .name   = MACHINE_TYPE_NAME("q800"),
  .parent = TYPE_MACHINE,
+.instance_init = q800_init,
  .instance_size = sizeof(Q800MachineState),
  .class_init = q800_machine_class_init,
  };
diff --git a/include/hw/m68k/q800.h b/include/hw/m68k/q800.h
index 790cf433f3..fbaacd88bd 100644
--- a/include/hw/m68k/q800.h
+++ b/include/hw/m68k/q800.h
@@ -47,6 +47,7 @@
  struct Q800MachineState {
  MachineState parent_obj;
  
+bool easc;

  M68kCPU cpu;
  MemoryRegion rom;
  GLUEState glue;


Reviewed-by: Laurent Vivier 




Re: [PATCH v2 11/20] swim: add trace events for IWM and ISM registers

2023-09-26 Thread Laurent Vivier

Le 09/09/2023 à 11:48, Mark Cave-Ayland a écrit :

Signed-off-by: Mark Cave-Ayland 
---
  hw/block/swim.c   | 14 ++
  hw/block/trace-events |  7 +++
  2 files changed, 21 insertions(+)

diff --git a/hw/block/swim.c b/hw/block/swim.c
index 333da08ce0..7df36ea139 100644
--- a/hw/block/swim.c
+++ b/hw/block/swim.c
@@ -19,6 +19,7 @@
  #include "hw/block/block.h"
  #include "hw/block/swim.h"
  #include "hw/qdev-properties.h"
+#include "trace.h"
  
  /* IWM registers */
  
@@ -125,6 +126,13 @@

  #define SWIM_HEDSEL  0x20
  #define SWIM_MOTON   0x80
  
+static const char *swim_reg_names[] = {

+"WRITE_DATA", "WRITE_MARK", "WRITE_CRC", "WRITE_PARAMETER",
+"WRITE_PHASE", "WRITE_SETUP", "WRITE_MODE0", "WRITE_MODE1",
+"READ_DATA", "READ_MARK", "READ_ERROR", "READ_PARAMETER",
+"READ_PHASE", "READ_SETUP", "READ_STATUS", "READ_HANDSHAKE"
+};
+
  static void fd_recalibrate(FDrive *drive)
  {
  }
@@ -267,6 +275,7 @@ static void iwmctrl_write(void *opaque, hwaddr reg, 
uint64_t value,
  reg >>= REG_SHIFT;
  
  swimctrl->regs[reg >> 1] = reg & 1;

+trace_swim_iwmctrl_write((reg >> 1), size, (reg & 1));
  
  if (swimctrl->regs[IWM_Q6] &&

  swimctrl->regs[IWM_Q7]) {
@@ -297,6 +306,7 @@ static void iwmctrl_write(void *opaque, hwaddr reg, 
uint64_t value,
  if (value == 0x57) {
  swimctrl->mode = SWIM_MODE_SWIM;
  swimctrl->iwm_switch = 0;
+trace_swim_iwm_switch();
  }
  break;
  }
@@ -312,6 +322,7 @@ static uint64_t iwmctrl_read(void *opaque, hwaddr reg, 
unsigned size)
  
  swimctrl->regs[reg >> 1] = reg & 1;
  
+trace_swim_iwmctrl_read((reg >> 1), size, (reg & 1));

  return 0;
  }
  
@@ -327,6 +338,8 @@ static void swimctrl_write(void *opaque, hwaddr reg, uint64_t value,
  
  reg >>= REG_SHIFT;
  
+trace_swim_swimctrl_write(reg, swim_reg_names[reg], size, value);

+
  switch (reg) {
  case SWIM_WRITE_PHASE:
  swimctrl->swim_phase = value;
@@ -376,6 +389,7 @@ static uint64_t swimctrl_read(void *opaque, hwaddr reg, 
unsigned size)
  break;
  }
  
+trace_swim_swimctrl_read(reg, swim_reg_names[reg], size, value);

  return value;
  }
  
diff --git a/hw/block/trace-events b/hw/block/trace-events

index 34be8b9135..c041ec45e3 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -90,3 +90,10 @@ m25p80_read_data(void *s, uint32_t pos, uint8_t v) "[%p] Read data 
0x%"PRIx32"=0
  m25p80_read_sfdp(void *s, uint32_t addr, uint8_t v) "[%p] Read SFDP 
0x%"PRIx32"=0x%"PRIx8
  m25p80_binding(void *s) "[%p] Binding to IF_MTD drive"
  m25p80_binding_no_bdrv(void *s) "[%p] No BDRV - binding to RAM"
+
+# swim.c
+swim_swimctrl_read(int reg, const char *name, unsigned size, uint64_t value) 
"reg=%d [%s] size=%u value=0x%"PRIx64
+swim_swimctrl_write(int reg, const char *name, unsigned size, uint64_t value) 
"reg=%d [%s] size=%u value=0x%"PRIx64
+swim_iwmctrl_read(int reg, unsigned size, uint64_t value) "reg=%d size=%u 
value=0x%"PRIx64
+swim_iwmctrl_write(int reg, unsigned size, uint64_t value) "reg=%d size=%u 
value=0x%"PRIx64
+swim_iwm_switch(void) "switch from IWM to SWIM mode"


Reviewed-by: Laurent Vivier 




[PATCH 1/1] tcg/loongarch64: Fix buid error

2023-09-26 Thread Song Gao
From: gaosong 

Fix:

  In file included from ../tcg/tcg.c:735:
  /home1/gaosong/bugfix/qemu/tcg/loongarch64/tcg-target.c.inc: In function 
‘tcg_out_vec_op’:
  /home1/gaosong/bugfix/qemu/tcg/loongarch64/tcg-target.c.inc:1855:9: error: a 
label can only be part of a statement and a declaration is not a statement
   TCGCond cond = args[3];
   ^~~

Signed-off-by: gaosong 
---
 tcg/loongarch64/tcg-target.c.inc | 68 
 1 file changed, 35 insertions(+), 33 deletions(-)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index b701df50db..8f7091002b 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -1852,43 +1852,45 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 tcg_out_opc_vnor_v(s, a0, a1, a1);
 break;
 case INDEX_op_cmp_vec:
-TCGCond cond = args[3];
-if (const_args[2]) {
-/*
- * cmp_vec dest, src, value
- * Try vseqi/vslei/vslti
- */
-int64_t value = sextract64(a2, 0, 8 << vece);
-if ((cond == TCG_COND_EQ || cond == TCG_COND_LE || \
- cond == TCG_COND_LT) && (-0x10 <= value && value <= 0x0f)) {
-tcg_out32(s, encode_vdvjsk5_insn(cmp_vec_imm_insn[cond][vece], 
\
- a0, a1, value));
-break;
-} else if ((cond == TCG_COND_LEU || cond == TCG_COND_LTU) &&
-(0x00 <= value && value <= 0x1f)) {
-tcg_out32(s, encode_vdvjuk5_insn(cmp_vec_imm_insn[cond][vece], 
\
- a0, a1, value));
-break;
-}
+{
+TCGCond cond = args[3];
+if (const_args[2]) {
+/*
+ * cmp_vec dest, src, value
+ * Try vseqi/vslei/vslti
+ */
+int64_t value = sextract64(a2, 0, 8 << vece);
+if ((cond == TCG_COND_EQ || cond == TCG_COND_LE || \
+ cond == TCG_COND_LT) && (-0x10 <= value && value <= 
0x0f)) {
+tcg_out32(s, 
encode_vdvjsk5_insn(cmp_vec_imm_insn[cond][vece], \
+ a0, a1, value));
+break;
+} else if ((cond == TCG_COND_LEU || cond == TCG_COND_LTU) &&
+(0x00 <= value && value <= 0x1f)) {
+tcg_out32(s, 
encode_vdvjuk5_insn(cmp_vec_imm_insn[cond][vece], \
+ a0, a1, value));
+break;
+}
 
-/*
- * Fallback to:
- * dupi_vec temp, a2
- * cmp_vec a0, a1, temp, cond
- */
-tcg_out_dupi_vec(s, type, vece, temp_vec, a2);
-a2 = temp_vec;
-}
+/*
+ * Fallback to:
+ * dupi_vec temp, a2
+ * cmp_vec a0, a1, temp, cond
+ */
+tcg_out_dupi_vec(s, type, vece, temp_vec, a2);
+a2 = temp_vec;
+}
 
-insn = cmp_vec_insn[cond][vece];
-if (insn == 0) {
-TCGArg t;
-t = a1, a1 = a2, a2 = t;
-cond = tcg_swap_cond(cond);
 insn = cmp_vec_insn[cond][vece];
-tcg_debug_assert(insn != 0);
+if (insn == 0) {
+TCGArg t;
+t = a1, a1 = a2, a2 = t;
+cond = tcg_swap_cond(cond);
+insn = cmp_vec_insn[cond][vece];
+tcg_debug_assert(insn != 0);
+}
+tcg_out32(s, encode_vdvjvk_insn(insn, a0, a1, a2));
 }
-tcg_out32(s, encode_vdvjvk_insn(insn, a0, a1, a2));
 break;
 case INDEX_op_add_vec:
 tcg_out_addsub_vec(s, vece, a0, a1, a2, const_args[2], true);
-- 
2.39.3




Re: [PATCH 0/4] ui/console: multihead: fix crash, simplify logic

2023-09-26 Thread Marc-André Lureau
Hi Laszlo

On Mon, Sep 25, 2023 at 7:36 PM Laszlo Ersek  wrote:
> Has this been queued by someone? Both Gerd and Marc-André are "odd
> fixers", so I'm not sure who should be sending a PR with these patches
> (and I don't see a pending PULL at
> 
> with these patch subjects included).

I have the series in my "ui" branch. I was waiting for a few more
patches to be accumulated. But if someone else takes this first, I'll
drop them.


-- 
Marc-André Lureau



Re: [PATCH v2 14/20] mac_via: work around underflow in TimeDBRA timing loop in SETUPTIMEK

2023-09-26 Thread Laurent Vivier

Le 09/09/2023 à 11:48, Mark Cave-Ayland a écrit :

The MacOS toolbox ROM calculates the number of branches that can be executed
per millisecond as part of its timer calibration. Since modern hosts are
considerably quicker than original hardware, the negative counter reaches zero
before the calibration completes leading to division by zero later in
CALCULATESLOD.

Instead of trying to fudge the timing loop (which won't work for 
TimeDBRA/TimeSCCDB
anyhow), use the pattern of access to the VIA1 registers to detect when 
SETUPTIMEK
has finished executing and write some well-known good timer values to TimeDBRA
and TimeSCCDB taken from real hardware with a suitable scaling factor.

Signed-off-by: Mark Cave-Ayland 
---
  hw/misc/mac_via.c | 115 ++
  hw/misc/trace-events  |   1 +
  include/hw/misc/mac_via.h |   3 +
  3 files changed, 119 insertions(+)

diff --git a/hw/misc/mac_via.c b/hw/misc/mac_via.c
index baeb73eeb3..766a32a95d 100644
--- a/hw/misc/mac_via.c
+++ b/hw/misc/mac_via.c
@@ -16,6 +16,7 @@
   */
  
  #include "qemu/osdep.h"

+#include "exec/address-spaces.h"
  #include "migration/vmstate.h"
  #include "hw/sysbus.h"
  #include "hw/irq.h"
@@ -871,6 +872,112 @@ static void via1_auxmode_update(MOS6522Q800VIA1State *v1s)
  }
  }
  
+/*

+ * Addresses and real values for TimeDBRA/TimeSCCB to allow timer calibration
+ * to succeed (NOTE: both values have been multiplied by 3 to cope with the
+ * speed of QEMU execution on a modern host
+ */
+#define MACOS_TIMEDBRA0xd00
+#define MACOS_TIMESCCB0xd02
+
+#define MACOS_TIMEDBRA_VALUE  (0x2a00 * 3)
+#define MACOS_TIMESCCB_VALUE  (0x079d * 3)
+
+static bool via1_is_toolbox_timer_calibrated(void)
+{
+/*
+ * Indicate whether the MacOS toolbox has been calibrated by checking
+ * for the value of our magic constants
+ */
+uint16_t timedbra = lduw_be_phys(&address_space_memory, MACOS_TIMEDBRA);
+uint16_t timesccdb = lduw_be_phys(&address_space_memory, MACOS_TIMESCCB);
+
+return (timedbra == MACOS_TIMEDBRA_VALUE &&
+timesccdb == MACOS_TIMESCCB_VALUE);
+}
+
+static void via1_timer_calibration_hack(MOS6522Q800VIA1State *v1s, int addr,
+uint64_t val, int size)
+{
+/*
+ * Work around timer calibration to ensure we that we have non-zero and
+ * known good values for TIMEDRBA and TIMESCCDB.
+ *
+ * This works by attempting to detect the reset and calibration sequence
+ * of writes to VIA1
+ */
+int old_timer_hack_state = v1s->timer_hack_state;
+
+switch (v1s->timer_hack_state) {
+case 0:
+if (addr == VIA_REG_PCR && val == 0x22) {
+/* VIA_REG_PCR: configure VIA1 edge triggering */
+v1s->timer_hack_state = 1;
+}
+break;
+case 1:
+if (addr == VIA_REG_T2CL && val == 0xc) {
+/* VIA_REG_T2CL: low byte of 1ms counter */
+if (!via1_is_toolbox_timer_calibrated()) {
+v1s->timer_hack_state = 2;
+} else {
+v1s->timer_hack_state = 0;
+}
+}
+break;
+case 2:
+if (addr == VIA_REG_T2CH && val == 0x3) {
+/*
+ * VIA_REG_T2CH: high byte of 1ms counter (very likely at the
+ * start of SETUPTIMEK)
+ */
+if (!via1_is_toolbox_timer_calibrated()) {
+v1s->timer_hack_state = 3;
+} else {
+v1s->timer_hack_state = 0;
+}
+}
+break;
+case 3:
+if (addr == VIA_REG_IER && val == 0x20) {
+/*
+ * VIA_REG_IER: update at end of SETUPTIMEK
+ *
+ * Timer calibration has finished: unfortunately the values in
+ * TIMEDBRA (0xd00) and TIMESCCDB (0xd02) are so far out they
+ * cause divide by zero errors.
+ *
+ * Update them with values obtained from a real Q800 but with
+ * a x3 scaling factor which seems to work well
+ */
+stw_be_phys(&address_space_memory, MACOS_TIMEDBRA,
+MACOS_TIMEDBRA_VALUE);
+stw_be_phys(&address_space_memory, MACOS_TIMESCCB,
+MACOS_TIMESCCB_VALUE);
+
+v1s->timer_hack_state = 4;
+}
+break;
+case 4:
+/*
+ * This is the normal post-calibration timer state: we should
+ * generally remain here unless we detect the A/UX calibration
+ * loop, or a write to VIA_REG_PCR suggesting a reset
+ */
+if (addr == VIA_REG_PCR && val == 0x22) {
+/* Looks like there has been a reset? */
+v1s->timer_hack_state = 1;
+}
+break;
+default:
+g_assert_not_reached();
+}
+
+if (old_timer_hack_state != v1s->timer_hack_state) {
+trace_via1_timer_hack_state(v1s->timer_hack_state);
+}
+}
+
  stati

Re: [PATCH v2 15/20] mac_via: workaround NetBSD ADB bus enumeration issue

2023-09-26 Thread Laurent Vivier

Le 09/09/2023 à 11:48, Mark Cave-Ayland a écrit :

NetBSD assumes it can send its first ADB command after sending the ADB_BUSRESET
command in ADB_STATE_NEW without changing the state back to ADB_STATE_IDLE
first as detailed in the ADB protocol.

Add a workaround to detect this condition at the start of ADB enumeration
and send the next command written to SR after a ADB_BUSRESET onto the bus
regardless, even if we don't detect a state transition to ADB_STATE_NEW.

Signed-off-by: Mark Cave-Ayland 
---
  hw/misc/mac_via.c| 34 ++
  hw/misc/trace-events |  1 +
  2 files changed, 35 insertions(+)

diff --git a/hw/misc/mac_via.c b/hw/misc/mac_via.c
index 766a32a95d..208216aed3 100644
--- a/hw/misc/mac_via.c
+++ b/hw/misc/mac_via.c
@@ -1001,6 +1001,8 @@ static void mos6522_q800_via1_write(void *opaque, hwaddr 
addr, uint64_t val,
  {
  MOS6522Q800VIA1State *v1s = MOS6522_Q800_VIA1(opaque);
  MOS6522State *ms = MOS6522(v1s);
+int oldstate, state;
+int oldsr = ms->sr;
  
  addr = (addr >> 9) & 0xf;
  
@@ -1016,6 +1018,38 @@ static void mos6522_q800_via1_write(void *opaque, hwaddr addr, uint64_t val,
  
  v1s->last_b = ms->b;

  break;
+
+case VIA_REG_SR:
+{
+/*
+ * NetBSD assumes it can send its first ADB command after sending
+ * the ADB_BUSRESET command in ADB_STATE_NEW without changing the
+ * state back to ADB_STATE_IDLE first as detailed in the ADB
+ * protocol.
+ *
+ * Add a workaround to detect this condition at the start of ADB
+ * enumeration and send the next command written to SR after a
+ * ADB_BUSRESET onto the bus regardless, even if we don't detect a
+ * state transition to ADB_STATE_NEW.
+ *
+ * Note that in my tests the NetBSD state machine takes one ADB
+ * operation to recover which means the probe for an ADB device at
+ * address 1 always fails. However since the first device is at
+ * address 2 then this will work fine, without having to come up
+ * with a more complicated and invasive solution.
+ */
+oldstate = (v1s->last_b & VIA1B_vADB_StateMask) >>
+   VIA1B_vADB_StateShift;
+state = (ms->b & VIA1B_vADB_StateMask) >> VIA1B_vADB_StateShift;
+
+if (oldstate == ADB_STATE_NEW && state == ADB_STATE_NEW &&
+(ms->acr & VIA1ACR_vShiftOut) &&
+oldsr == 0 /* ADB_BUSRESET */) {
+trace_via1_adb_netbsd_enum_hack();
+adb_via_send(v1s, state, ms->sr);
+}
+}
+break;
  }
  }
  
diff --git a/hw/misc/trace-events b/hw/misc/trace-events

index 0c9762fdf6..db8bb2d28a 100644
--- a/hw/misc/trace-events
+++ b/hw/misc/trace-events
@@ -271,6 +271,7 @@ via1_rtc_cmd_pram_sect_write(int sector, int offset, int addr, 
int value) "secto
  via1_adb_send(const char *state, uint8_t data, const char *vadbint) "state %s 
data=0x%02x vADBInt=%s"
  via1_adb_receive(const char *state, uint8_t data, const char *vadbint, int status, int 
index, int size) "state %s data=0x%02x vADBInt=%s status=0x%x index=%d size=%d"
  via1_adb_poll(uint8_t data, const char *vadbint, int status, int index, int size) 
"data=0x%02x vADBInt=%s status=0x%x index=%d size=%d"
+via1_adb_netbsd_enum_hack(void) "using NetBSD enum hack"
  via1_auxmode(int mode) "setting auxmode to %d"
  via1_timer_hack_state(int state) "setting timer_hack_state to %d"
  


Did you ask NetBSD to fix their code?

Reviewed-by: Laurent Vivier 




Re: [PATCH v2 16/20] mac_via: implement ADB_STATE_IDLE state if shift register in input mode

2023-09-26 Thread Laurent Vivier

Le 09/09/2023 à 11:48, Mark Cave-Ayland a écrit :

NetBSD switches directly to IDLE state without switching the shift register to
input mode. Duplicate the existing ADB_STATE_IDLE logic in input mode from when
the shift register is in output mode which allows the ADB autopoll handler to
handle the response.

Signed-off-by: Mark Cave-Ayland 
---
  hw/misc/mac_via.c | 6 ++
  1 file changed, 6 insertions(+)

diff --git a/hw/misc/mac_via.c b/hw/misc/mac_via.c
index 208216aed3..398e8d1967 100644
--- a/hw/misc/mac_via.c
+++ b/hw/misc/mac_via.c
@@ -702,6 +702,12 @@ static void adb_via_send(MOS6522Q800VIA1State *v1s, int 
state, uint8_t data)
  break;
  
  case ADB_STATE_IDLE:

+ms->b |= VIA1B_vADBInt;
+adb_autopoll_unblock(adb_bus);
+
+trace_via1_adb_send("IDLE", data,
+(ms->b & VIA1B_vADBInt) ? "+" : "-");
+
  return;
  }
  

Reviewed-by: Laurent Vivier 




Re: [PATCH v2 17/20] mac_via: always clear ADB interrupt when switching to A/UX mode

2023-09-26 Thread Laurent Vivier

Le 09/09/2023 à 11:48, Mark Cave-Ayland a écrit :

When the NetBSD kernel initialises it can leave the ADB interrupt asserted
depending upon where in the ADB poll cycle the MacOS ADB interrupt handler
is when the NetBSD kernel disables interrupts.

The NetBSD ADB driver uses the ADB interrupt state to determine if the ADB
is busy and refuses to send ADB commands unless it is clear. To ensure that
this doesn't happen, always clear the ADB interrupt when switching to A/UX
mode to ensure that the bus enumeration always occurs.

Signed-off-by: Mark Cave-Ayland 
---
  hw/misc/mac_via.c | 9 +
  1 file changed, 9 insertions(+)

diff --git a/hw/misc/mac_via.c b/hw/misc/mac_via.c
index 398e8d1967..5d1adf5863 100644
--- a/hw/misc/mac_via.c
+++ b/hw/misc/mac_via.c
@@ -875,6 +875,15 @@ static void via1_auxmode_update(MOS6522Q800VIA1State *v1s)
  if (irq != oldirq) {
  trace_via1_auxmode(irq);
  qemu_set_irq(v1s->auxmode_irq, irq);
+
+/*
+ * Clear the ADB interrupt. MacOS can leave VIA1B_vADBInt asserted
+ * (low) if a poll sequence doesn't complete before NetBSD disables
+ * interrupts upon boot. Fortunately NetBSD switches to the so-called
+ * "A/UX" interrupt mode after it initialises, so we can use this as
+ * a convenient place to clear the ADB interrupt for now.
+ */
+s->b |= VIA1B_vADBInt;
  }
  }
  


Reviewed-by: Laurent Vivier 




Re: [PATCH v2 18/20] q800: add ESCC alias at 0xc000

2023-09-26 Thread Laurent Vivier

Le 09/09/2023 à 11:48, Mark Cave-Ayland a écrit :

Tests on real Q800 hardware show that the ESCC is addressable at multiple 
locations
within the ESCC memory region - at least 0xc000, 0xc020 (as expected by the 
MacOS
toolbox ROM) and 0xc040.

All released NetBSD kernels before 10 use the 0xc000 address which causes a 
fatal
error when running the MacOS booter. Add a single memory region alias at 0xc000
to enable NetBSD kernels to start booting under QEMU.

Signed-off-by: Mark Cave-Ayland 
---
  hw/m68k/q800.c | 6 ++
  include/hw/m68k/q800.h | 1 +
  2 files changed, 7 insertions(+)

diff --git a/hw/m68k/q800.c b/hw/m68k/q800.c
index 5ae7c37760..b5b2cabc33 100644
--- a/hw/m68k/q800.c
+++ b/hw/m68k/q800.c
@@ -451,6 +451,12 @@ static void q800_machine_init(MachineState *machine)
  memory_region_add_subregion(&m->macio, SCC_BASE - IO_BASE,
  sysbus_mmio_get_region(sysbus, 0));
  
+/* Create alias for NetBSD */

+memory_region_init_alias(&m->escc_alias, OBJECT(machine), "escc-alias",
+ sysbus_mmio_get_region(sysbus, 0), 0, 0x8);
+memory_region_add_subregion(&m->macio, SCC_BASE - IO_BASE - 0x20,
+&m->escc_alias);
+
  /* SCSI */
  
  object_initialize_child(OBJECT(machine), "esp", &m->esp,

diff --git a/include/hw/m68k/q800.h b/include/hw/m68k/q800.h
index fbaacd88bd..348eaf4703 100644
--- a/include/hw/m68k/q800.h
+++ b/include/hw/m68k/q800.h
@@ -67,6 +67,7 @@ struct Q800MachineState {
  MemoryRegion macio;
  MemoryRegion macio_alias;
  MemoryRegion machine_id;
+MemoryRegion escc_alias;
  };
  
  #define TYPE_Q800_MACHINE MACHINE_TYPE_NAME("q800")


Reviewed-by: Laurent Vivier 




Re: [PATCH v2 00/12] VIRTIO-IOMMU/VFIO: Don't assume 64b IOVA space

2023-09-26 Thread YangHang Liu
The original issue I found : After starting a VM which has two ice PFs
and a virtio-iommu device, qemu-kvm and VM guest dmesg throw lots of
duplicate VFIO_MAP_DMA errors

After testing with Eric's build, the  original issue is gone and the
Tier1 regression test against ice PF and virtio iommu device gets PASS
as well.

Tested-by: Yanghang Liu 



On Wed, Sep 13, 2023 at 4:06 PM Eric Auger  wrote:
>
> On x86, when assigning VFIO-PCI devices protected with virtio-iommu
> we encounter the case where the guest tries to map IOVAs beyond 48b
> whereas the physical VTD IOMMU only supports 48b. This ends up with
> VFIO_MAP_DMA failures at qemu level because at kernel level,
> vfio_iommu_iova_dma_valid() check returns false on vfio_map_do_map().
>
> This is due to the fact the virtio-iommu currently unconditionally
> exposes an IOVA range of 64b through its config input range fields.
>
> This series removes this assumption by retrieving the usable IOVA
> regions through the VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE UAPI when
> a VFIO device is attached. This info is communicated to the
> virtio-iommu memory region, transformed into the inversed info, ie.
> the host reserved IOVA regions. Then those latter are combined with the
> reserved IOVA regions set though the virtio-iommu reserved-regions
> property. That way, the guest virtio-iommu driver, unchanged, is
> able to probe the whole set of reserved regions and prevent any IOVA
> belonging to those ranges from beeing used, achieving the original goal.
>
> Best Regards
>
> Eric
>
> This series can be found at:
> https://github.com/eauger/qemu/tree/virtio-iommu_geometry_v2
>
> History:
> v1 -> v2:
> - Remove "[PATCH 12/13] virtio-iommu: Resize memory region according
>   to the max iova info" which causes way too much trouble: trigger
>   a coredump in vhost, causes duplication of IOMMU notifiers causing
>   EEXIST vfio_dma_map errors, ... This looks like a bad usage of the
>   memory API so I prefer removing this from this series. So I was
>   also obliged to remove the vfio_find_hostwin() check in the case
>   of an IOMMU MR.
> - Let range_inverse_array() take low/high args instead of hardcoding
>   0, UINT64_MAX which both complexifies the algo and the tests.
> - Move range function description in header.
> - Check that if set_iova_ranges is called several times, new resv
>   regions are included in previous ones
>
> Eric Auger (12):
>   memory: Let ReservedRegion use Range
>   memory: Introduce memory_region_iommu_set_iova_ranges
>   vfio: Collect container iova range info
>   virtio-iommu: Rename reserved_regions into prop_resv_regions
>   virtio-iommu: Introduce per IOMMUDevice reserved regions
>   range: Introduce range_inverse_array()
>   virtio-iommu: Implement set_iova_ranges() callback
>   range: Make range_compare() public
>   util/reserved-region: Add new ReservedRegion helpers
>   virtio-iommu: Consolidate host reserved regions and property set ones
>   test: Add some tests for range and resv-mem helpers
>   vfio: Remove 64-bit IOVA address space assumption
>
>  include/exec/memory.h|  30 +++-
>  include/hw/vfio/vfio-common.h|   2 +
>  include/hw/virtio/virtio-iommu.h |   7 +-
>  include/qemu/range.h |  14 ++
>  include/qemu/reserved-region.h   |  32 
>  hw/core/qdev-properties-system.c |   9 +-
>  hw/vfio/common.c |  70 +++--
>  hw/virtio/virtio-iommu-pci.c |   8 +-
>  hw/virtio/virtio-iommu.c | 110 --
>  softmmu/memory.c |  15 ++
>  tests/unit/test-resv-mem.c   | 251 +++
>  util/range.c |  51 ++-
>  util/reserved-region.c   |  94 
>  hw/virtio/trace-events   |   1 +
>  tests/unit/meson.build   |   1 +
>  util/meson.build |   1 +
>  16 files changed, 655 insertions(+), 41 deletions(-)
>  create mode 100644 include/qemu/reserved-region.h
>  create mode 100644 tests/unit/test-resv-mem.c
>  create mode 100644 util/reserved-region.c
>
> --
> 2.41.0
>
>




Re: [PATCH v2 20/20] mac_via: extend timer calibration hack to work with A/UX

2023-09-26 Thread Laurent Vivier

Le 09/09/2023 à 11:48, Mark Cave-Ayland a écrit :

The A/UX timer calibration loop runs continuously until 2 consecutive iterations
differ by at least 0x492 timer ticks. Modern hosts execute the timer calibration
loop so fast that this situation never occurs causing a hang on boot.

Use a similar method to Shoebill which is to randomly add 0x500 to the T2
counter value during calibration to enable it to eventually succeed.

Signed-off-by: Mark Cave-Ayland 
---
  hw/misc/mac_via.c | 56 +++
  1 file changed, 56 insertions(+)

diff --git a/hw/misc/mac_via.c b/hw/misc/mac_via.c
index 5d1adf5863..2ce389e435 100644
--- a/hw/misc/mac_via.c
+++ b/hw/misc/mac_via.c
@@ -983,6 +983,44 @@ static void 
via1_timer_calibration_hack(MOS6522Q800VIA1State *v1s, int addr,
  /* Looks like there has been a reset? */
  v1s->timer_hack_state = 1;
  }
+
+if (addr == VIA_REG_T2CL && val == 0xf0) {
+/* VIA_REG_T2CL: low byte of counter (A/UX) */
+v1s->timer_hack_state = 5;
+}
+break;
+case 5:
+if (addr == VIA_REG_T2CH && val == 0x3c) {
+/*
+ * VIA_REG_T2CH: high byte of counter (A/UX). We are now extremely
+ * likely to be in the A/UX timer calibration routine, so move to
+ * the next state where we enable the calibration hack.
+ */
+v1s->timer_hack_state = 6;
+} else if ((addr == VIA_REG_IER && val == 0x20) ||
+   addr == VIA_REG_T2CH) {
+/* We're doing something else with the timer, not calibration */
+v1s->timer_hack_state = 0;
+}
+break;
+case 6:
+if ((addr == VIA_REG_IER && val == 0x20) || addr == VIA_REG_T2CH) {
+/* End of A/UX timer calibration routine, or another write */
+v1s->timer_hack_state = 7;
+} else {
+v1s->timer_hack_state = 0;
+}
+break;
+case 7:
+/*
+ * This is the normal post-calibration timer state once both the
+ * MacOS toolbox and A/UX have been calibrated, until we see a write
+ * to VIA_REG_PCR to suggest a reset
+ */
+if (addr == VIA_REG_PCR && val == 0x22) {
+/* Looks like there has been a reset? */
+v1s->timer_hack_state = 1;
+}
  break;
  default:
  g_assert_not_reached();
@@ -998,6 +1036,7 @@ static uint64_t mos6522_q800_via1_read(void *opaque, 
hwaddr addr, unsigned size)
  MOS6522Q800VIA1State *s = MOS6522_Q800_VIA1(opaque);
  MOS6522State *ms = MOS6522(s);
  uint64_t ret;
+int64_t now;
  
  addr = (addr >> 9) & 0xf;

  ret = mos6522_read(ms, addr, size);
@@ -1007,6 +1046,23 @@ static uint64_t mos6522_q800_via1_read(void *opaque, 
hwaddr addr, unsigned size)
  /* Quadra 800 Id */
  ret = (ret & ~VIA1A_CPUID_MASK) | VIA1A_CPUID_Q800;
  break;
+case VIA_REG_T2CH:
+if (s->timer_hack_state == 6) {
+/*
+ * The A/UX timer calibration loop runs continuously until 2
+ * consecutive iterations differ by at least 0x492 timer ticks.
+ * Modern hosts execute the timer calibration loop so fast that
+ * this situation never occurs causing a hang on boot. Use a
+ * similar method to Shoebill which is to randomly add 0x500 to
+ * the T2 counter value during calibration to enable it to
+ * eventually succeed.
+ */
+now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+if (now & 1) {
+ret += 0x5;
+}
+}
+break;
  }
  return ret;
  }


Reviewed-by: Laurent Vivier 




Re: [PATCH v2 19/20] q800: add alias for MacOS toolbox ROM at 0x40000000

2023-09-26 Thread Laurent Vivier

Le 09/09/2023 à 11:48, Mark Cave-Ayland a écrit :

According to the Apple Quadra 800 Developer Note document, the Quadra 800 ROM
consists of 2 ROM code sections based at offsets 0x0 and 0x80. A/UX attempts
to access the toolbox ROM at the lower offset during startup, so provide a
memory alias to allow the access to succeed.

Signed-off-by: Mark Cave-Ayland 
---
  hw/m68k/q800.c | 5 +
  include/hw/m68k/q800.h | 1 +
  2 files changed, 6 insertions(+)

diff --git a/hw/m68k/q800.c b/hw/m68k/q800.c
index b5b2cabc33..87665c6407 100644
--- a/hw/m68k/q800.c
+++ b/hw/m68k/q800.c
@@ -657,6 +657,11 @@ static void q800_machine_init(MachineState *machine)
  filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
  memory_region_add_subregion(get_system_memory(), MACROM_ADDR, 
&m->rom);
  
+memory_region_init_alias(&m->rom_alias, NULL, "m68k_mac.rom-alias",

+ &m->rom, 0, MACROM_SIZE);
+memory_region_add_subregion(get_system_memory(), 0x4000,
+&m->rom_alias);
+
  /* Load MacROM binary */
  if (filename) {
  bios_size = load_image_targphys(filename, MACROM_ADDR, 
MACROM_SIZE);
diff --git a/include/hw/m68k/q800.h b/include/hw/m68k/q800.h
index 348eaf4703..a9661f65f6 100644
--- a/include/hw/m68k/q800.h
+++ b/include/hw/m68k/q800.h
@@ -50,6 +50,7 @@ struct Q800MachineState {
  bool easc;
  M68kCPU cpu;
  MemoryRegion rom;
+MemoryRegion rom_alias;
  GLUEState glue;
  MOS6522Q800VIA1State via1;
  MOS6522Q800VIA2State via2;


Reviewed-by: Laurent Vivier 




Re: [PATCH v2 12/20] swim: split into separate IWM and ISM register blocks

2023-09-26 Thread Laurent Vivier

Le 09/09/2023 à 11:48, Mark Cave-Ayland a écrit :

The swim chip provides an implementation of both Apple's IWM and ISM floppy disk
controllers. Split the existing implementation into separate register banks for
each controller, whilst also switching the IWM registers from 16-bit to 8-bit
as implemented in real hardware.

Signed-off-by: Mark Cave-Ayland 
---
  hw/block/swim.c | 85 -
  hw/block/trace-events   |  4 +-
  include/hw/block/swim.h | 15 +++-
  3 files changed, 58 insertions(+), 46 deletions(-)

diff --git a/hw/block/swim.c b/hw/block/swim.c
index 7df36ea139..735b335883 100644
--- a/hw/block/swim.c
+++ b/hw/block/swim.c
@@ -126,7 +126,14 @@
  #define SWIM_HEDSEL  0x20
  #define SWIM_MOTON   0x80
  
-static const char *swim_reg_names[] = {

+static const char *iwm_reg_names[] = {
+"PH0L", "PH0H", "PH1L", "PH1H",
+"PH2L", "PH2H", "PH3L", "PH3H",
+"MTROFF", "MTRON", "INTDRIVE", "EXTDRIVE",
+"Q6L", "Q6H", "Q7L", "Q7H"
+};
+
+static const char *ism_reg_names[] = {
  "WRITE_DATA", "WRITE_MARK", "WRITE_CRC", "WRITE_PARAMETER",
  "WRITE_PHASE", "WRITE_SETUP", "WRITE_MODE0", "WRITE_MODE1",
  "READ_DATA", "READ_MARK", "READ_ERROR", "READ_PARAMETER",
@@ -274,12 +281,11 @@ static void iwmctrl_write(void *opaque, hwaddr reg, 
uint64_t value,
  
  reg >>= REG_SHIFT;
  
-swimctrl->regs[reg >> 1] = reg & 1;

-trace_swim_iwmctrl_write((reg >> 1), size, (reg & 1));
+swimctrl->iwmregs[reg] = value;
+trace_swim_iwmctrl_write(reg, iwm_reg_names[reg], size, value);
  
-if (swimctrl->regs[IWM_Q6] &&

-swimctrl->regs[IWM_Q7]) {
-if (swimctrl->regs[IWM_MTR]) {
+if (swimctrl->iwmregs[IWM_Q7H]) {
+if (swimctrl->iwmregs[IWM_MTRON]) {
  /* data register */
  swimctrl->iwm_data = value;
  } else {
@@ -307,6 +313,12 @@ static void iwmctrl_write(void *opaque, hwaddr reg, 
uint64_t value,
  swimctrl->mode = SWIM_MODE_SWIM;
  swimctrl->iwm_switch = 0;
  trace_swim_iwm_switch();
+
+/* Switch to ISM registers */
+memory_region_del_subregion(&swimctrl->swim,
+&swimctrl->iwm);
+memory_region_add_subregion(&swimctrl->swim, 0x0,
+&swimctrl->ism);
  }
  break;
  }
@@ -317,28 +329,30 @@ static void iwmctrl_write(void *opaque, hwaddr reg, 
uint64_t value,
  static uint64_t iwmctrl_read(void *opaque, hwaddr reg, unsigned size)
  {
  SWIMCtrl *swimctrl = opaque;
+uint16_t value;


Why not uint8_t as iwmregs is uint8_t?


  
  reg >>= REG_SHIFT;
  
-swimctrl->regs[reg >> 1] = reg & 1;

+value = swimctrl->iwmregs[reg];
+trace_swim_iwmctrl_read(reg, iwm_reg_names[reg], size, value);
  
-trace_swim_iwmctrl_read((reg >> 1), size, (reg & 1));

-return 0;
+return value;
  }
  
-static void swimctrl_write(void *opaque, hwaddr reg, uint64_t value,

-   unsigned size)
+static const MemoryRegionOps swimctrl_iwm_ops = {
+.write = iwmctrl_write,
+.read = iwmctrl_read,
+.endianness = DEVICE_BIG_ENDIAN,
+};
+
+static void ismctrl_write(void *opaque, hwaddr reg, uint64_t value,
+  unsigned size)
  {
  SWIMCtrl *swimctrl = opaque;
  
-if (swimctrl->mode == SWIM_MODE_IWM) {

-iwmctrl_write(opaque, reg, value, size);
-return;
-}
-
  reg >>= REG_SHIFT;
  
-trace_swim_swimctrl_write(reg, swim_reg_names[reg], size, value);

+trace_swim_swimctrl_write(reg, ism_reg_names[reg], size, value);
  
  switch (reg) {

  case SWIM_WRITE_PHASE:
@@ -359,15 +373,11 @@ static void swimctrl_write(void *opaque, hwaddr reg, 
uint64_t value,
  }
  }
  
-static uint64_t swimctrl_read(void *opaque, hwaddr reg, unsigned size)

+static uint64_t ismctrl_read(void *opaque, hwaddr reg, unsigned size)
  {
  SWIMCtrl *swimctrl = opaque;
  uint32_t value = 0;
  
-if (swimctrl->mode == SWIM_MODE_IWM) {

-return iwmctrl_read(opaque, reg, size);
-}
-
  reg >>= REG_SHIFT;
  
  switch (reg) {

@@ -389,14 +399,14 @@ static uint64_t swimctrl_read(void *opaque, hwaddr reg, 
unsigned size)
  break;
  }
  
-trace_swim_swimctrl_read(reg, swim_reg_names[reg], size, value);

+trace_swim_swimctrl_read(reg, ism_reg_names[reg], size, value);
  return value;
  }
  
-static const MemoryRegionOps swimctrl_mem_ops = {

-.write = swimctrl_write,
-.read = swimctrl_read,
-.endianness = DEVICE_NATIVE_ENDIAN,
+static const MemoryRegionOps swimctrl_ism_ops = {
+.write = ismctrl_write,
+.read = ismctrl_read,
+.endianness = DEVICE_BIG_ENDIAN,
  };
  
  static void sysbus_swim_reset(DeviceState *d)

@@ -407,13 +417,13 @@ static void sysbus_swim_reset(DeviceState 

Re: [PATCH v2 13/20] swim: update IWM/ISM register block decoding

2023-09-26 Thread Laurent Vivier

Le 09/09/2023 à 11:48, Mark Cave-Ayland a écrit :

Update the IWM/ISM register block decoding to match the description given in the
"SWIM Chip Users Reference". This allows us to validate the device response to
the guest OS which currently only does just enough to indicate that the floppy
drive is unavailable.

Signed-off-by: Mark Cave-Ayland 
---
  hw/block/swim.c | 212 +---
  hw/block/trace-events   |   7 +-
  include/hw/block/swim.h |   8 +-
  3 files changed, 143 insertions(+), 84 deletions(-)

diff --git a/hw/block/swim.c b/hw/block/swim.c
index 735b335883..fd65c59f8a 100644
--- a/hw/block/swim.c
+++ b/hw/block/swim.c
@@ -21,24 +21,28 @@
  #include "hw/qdev-properties.h"
  #include "trace.h"
  
+

+/* IWM latch bits */
+
+#define IWMLB_PHASE00
+#define IWMLB_PHASE11
+#define IWMLB_PHASE22
+#define IWMLB_PHASE33
+#define IWMLB_MOTORON   4
+#define IWMLB_DRIVESEL  5
+#define IWMLB_L66
+#define IWMLB_L77
+
  /* IWM registers */
  
-#define IWM_PH0L0

-#define IWM_PH0H1
-#define IWM_PH1L2
-#define IWM_PH1H3
-#define IWM_PH2L4
-#define IWM_PH2H5
-#define IWM_PH3L6
-#define IWM_PH3H7
-#define IWM_MTROFF  8
-#define IWM_MTRON   9
-#define IWM_INTDRIVE10
-#define IWM_EXTDRIVE11
-#define IWM_Q6L 12
-#define IWM_Q6H 13
-#define IWM_Q7L 14
-#define IWM_Q7H 15
+#define IWM_READALLONES 0
+#define IWM_READDATA1
+#define IWM_READSTATUS0 2
+#define IWM_READSTATUS1 3
+#define IWM_READWHANDSHAKE0 4
+#define IWM_READWHANDSHAKE1 5
+#define IWM_WRITESETMODE6
+#define IWM_WRITEDATA   7
  
  /* SWIM registers */
  
@@ -62,8 +66,9 @@
  
  #define REG_SHIFT   9
  
-#define SWIM_MODE_IWM  0

-#define SWIM_MODE_SWIM 1
+#define SWIM_MODE_STATUS_BIT6
+#define SWIM_MODE_IWM   0
+#define SWIM_MODE_ISM   1
  
  /* bits in phase register */
  
@@ -127,10 +132,8 @@

  #define SWIM_MOTON   0x80
  
  static const char *iwm_reg_names[] = {

-"PH0L", "PH0H", "PH1L", "PH1H",
-"PH2L", "PH2H", "PH3L", "PH3H",
-"MTROFF", "MTRON", "INTDRIVE", "EXTDRIVE",
-"Q6L", "Q6H", "Q7L", "Q7H"
+"READALLONES", "READDATA", "READSTATUS0", "READSTATUS1",
+"READWHANDSHAKE0", "READWHANDSHAKE1", "WRITESETMODE", "WRITEDATA"
  };
  
  static const char *ism_reg_names[] = {

@@ -274,68 +277,99 @@ static const TypeInfo swim_bus_info = {
  .instance_size = sizeof(SWIMBus),
  };
  
-static void iwmctrl_write(void *opaque, hwaddr reg, uint64_t value,

+static void iwmctrl_write(void *opaque, hwaddr addr, uint64_t value,
unsigned size)
  {
  SWIMCtrl *swimctrl = opaque;
+uint8_t latch, reg, ism_bit;
  
-reg >>= REG_SHIFT;

+addr >>= REG_SHIFT;
+
+/* A3-A1 select a latch, A0 specifies the value */
+latch = (addr >> 1) & 7;
+if (addr & 1) {
+swimctrl->iwm_latches |= (1 << latch);
+} else {
+swimctrl->iwm_latches &= ~(1 << latch);
+}
+
+reg = (swimctrl->iwm_latches & 0xc0) >> 5 |
+  (swimctrl->iwm_latches & 0x10) >> 4;
  
  swimctrl->iwmregs[reg] = value;

  trace_swim_iwmctrl_write(reg, iwm_reg_names[reg], size, value);
  
-if (swimctrl->iwmregs[IWM_Q7H]) {

-if (swimctrl->iwmregs[IWM_MTRON]) {
-/* data register */
-swimctrl->iwm_data = value;
-} else {
-/* mode register */
-swimctrl->iwm_mode = value;
-/* detect sequence to switch from IWM mode to SWIM mode */
-switch (swimctrl->iwm_switch) {
-case 0:
-if (value == 0x57) {
-swimctrl->iwm_switch++;
-}
-break;
-case 1:
-if (value == 0x17) {
-swimctrl->iwm_switch++;
-}
-break;
-case 2:
-if (value == 0x57) {
-swimctrl->iwm_switch++;
-}
-break;
-case 3:
-if (value == 0x57) {
-swimctrl->mode = SWIM_MODE_SWIM;
-swimctrl->iwm_switch = 0;
-trace_swim_iwm_switch();
-
-/* Switch to ISM registers */
-memory_region_del_subregion(&swimctrl->swim,
-&swimctrl->iwm);
-memory_region_add_subregion(&swimctrl->swim, 0x0,
-&swimctrl->ism);
-}
-break;
+switch (reg) {
+case IWM_WRITESETMODE:
+/* detect sequence to switch from IWM mode 

[PATCH] analyze-migration: ignore RAM_SAVE_FLAG_MULTIFD_FLUSH

2023-09-26 Thread marcandre . lureau
From: Marc-André Lureau 

Traceback (most recent call last):
  File "scripts/analyze-migration.py", line 605, in 
dump.read(dump_memory = args.memory)
  File "scripts/analyze-migration.py", line 542, in read
section.read()
  File "scripts/analyze-migration.py", line 214, in read
raise Exception("Unknown RAM flags: %x" % flags)
Exception: Unknown RAM flags: 200

See commit 77c259a4cb ("multifd: Create property 
multifd-flush-after-each-section")

Signed-off-by: Marc-André Lureau 
---
 scripts/analyze-migration.py | 4 
 1 file changed, 4 insertions(+)

diff --git a/scripts/analyze-migration.py b/scripts/analyze-migration.py
index b82a1b0c58..082424558b 100755
--- a/scripts/analyze-migration.py
+++ b/scripts/analyze-migration.py
@@ -111,6 +111,8 @@ class RamSection(object):
 RAM_SAVE_FLAG_CONTINUE = 0x20
 RAM_SAVE_FLAG_XBZRLE   = 0x40
 RAM_SAVE_FLAG_HOOK = 0x80
+RAM_SAVE_FLAG_COMPRESS_PAGE = 0x100
+RAM_SAVE_FLAG_MULTIFD_FLUSH = 0x200
 
 def __init__(self, file, version_id, ramargs, section_key):
 if version_id != 4:
@@ -205,6 +207,8 @@ def read(self):
 raise Exception("XBZRLE RAM compression is not supported yet")
 elif flags & self.RAM_SAVE_FLAG_HOOK:
 raise Exception("RAM hooks don't make sense with files")
+if flags & self.RAM_SAVE_FLAG_MULTIFD_FLUSH:
+continue
 
 # End of RAM section
 if flags & self.RAM_SAVE_FLAG_EOS:
-- 
2.41.0




Re: [PATCH] MAINTAINERS: Add entry for rdma migration

2023-09-26 Thread Zhijian Li (Fujitsu)


On 25/09/2023 21:34, Peter Xu wrote:
> It's not obvious to many that RDMA migration is in Odd Fixes stage for a
> long time.  Add an explicit sub entry for it (besides migration, which
> already covers the rdma files) to be clear on that, meanwhile add Zhijian
> as Reviewer, so Zhijian can see the patches and review when he still has
> the bandwidth.


Feel free to add my Acked tag. thanks.

Acked-by: Li Zhijian 


> 
> Cc: Daniel P. Berrangé 
> Cc: Juan Quintela 
> Cc: Markus Armbruster 
> Cc: Zhijian Li (Fujitsu) 
> Cc: Fabiano Rosas 
> Signed-off-by: Peter Xu 
> ---
>   MAINTAINERS | 5 +
>   1 file changed, 5 insertions(+)
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 355b1960ce..f6b21da753 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -3217,6 +3217,11 @@ F: qapi/migration.json
>   F: tests/migration/
>   F: util/userfaultfd.c
>   
> +RDMA Migration
> +R: Li Zhijian 
> +S: Odd Fixes
> +F: migration/rdma*
> +>   Migration dirty limit and dirty page rate
>   M: Hyman Huang 
>   S: Maintained

Re: [PATCH 12/12] io/channel-socket: qio_channel_socket_flush(): improve msg validation

2023-09-26 Thread Maksim Davydov

Could you add a comment into the commit message why ee_data must be
bigger than ee_info?

On 9/25/23 22:40, Vladimir Sementsov-Ogievskiy wrote:

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  io/channel-socket.c | 5 +
  1 file changed, 5 insertions(+)

diff --git a/io/channel-socket.c b/io/channel-socket.c
index 02ffb51e99..3a899b0608 100644
--- a/io/channel-socket.c
+++ b/io/channel-socket.c
@@ -782,6 +782,11 @@ static int qio_channel_socket_flush(QIOChannel *ioc,
   "Error not from zero copy");
  return -1;
  }
+if (serr->ee_data < serr->ee_info) {
+error_setg_errno(errp, serr->ee_origin,
+ "Wrong notification bounds");
+return -1;
+}
  
  /* No errors, count successfully finished sendmsg()*/

  sioc->zero_copy_sent += serr->ee_data - serr->ee_info + 1;


--
Best regards,
Maksim Davydov




Re: [PATCH v2 0/3] migration-qtest: zero the first byte of each page on start

2023-09-26 Thread Daniil Tatianin
ping :)



Re: [PATCH 52/52] migration/rdma: Fix how we show device details on open

2023-09-26 Thread Markus Armbruster
"Zhijian Li (Fujitsu)"  writes:

> On 18/09/2023 22:42, Markus Armbruster wrote:
>> qemu_rdma_dump_id() dumps RDMA device details to stdout.
>> 
>> rdma_start_outgoing_migration() calls it via qemu_rdma_source_init()
>> and qemu_rdma_resolve_host() to show source device details.
>> rdma_start_incoming_migration() arranges its call via
>> rdma_accept_incoming_migration() and qemu_rdma_accept() to show
>> destination device details.
>> 
>> Two issues:
>> 
>> 1. rdma_start_outgoing_migration() can run in HMP context.  The
>> information should arguably go the monitor, not stdout.
>> 
>> 2. ibv_query_port() failure is reported as error.  Its callers remain
>> unaware of this failure (qemu_rdma_dump_id() can't fail), so
>> reporting this to the user as an error is problematic.
>> 
>> Use qemu_printf() instead of printf() and error_report().
>> 
>> Signed-off-by: Markus Armbruster 
>> ---
>>   migration/rdma.c | 20 +++-
>>   1 file changed, 11 insertions(+), 9 deletions(-)
>> 
>> diff --git a/migration/rdma.c b/migration/rdma.c
>> index 9e9904984e..8c84fbab7a 100644
>> --- a/migration/rdma.c
>> +++ b/migration/rdma.c
>> @@ -30,6 +30,7 @@
>>   #include "qemu/sockets.h"
>>   #include "qemu/bitmap.h"
>>   #include "qemu/coroutine.h"
>> +#include "qemu/qemu-print.h"
>>   #include "exec/memory.h"
>>   #include 
>>   #include 
>> @@ -742,24 +743,25 @@ static void qemu_rdma_dump_id(const char *who, struct 
>> ibv_context *verbs)
>>   struct ibv_port_attr port;
>>   
>>   if (ibv_query_port(verbs, 1, &port)) {
>> -error_report("Failed to query port information");
>> +qemu_printf("%s RDMA Device opened, but can't query port 
>> information",
>> +who);
>
>
> '\n' newline is missing ?

Yes.

>>   return;
>>   }
>>   
>> -printf("%s RDMA Device opened: kernel name %s "
>> -   "uverbs device name %s, "
>> -   "infiniband_verbs class device path %s, "
>> -   "infiniband class device path %s, "
>> -   "transport: (%d) %s\n",
>> +qemu_printf("%s RDMA Device opened: kernel name %s "
>> +"uverbs device name %s, "
>> +"infiniband_verbs class device path %s, "
>> +"infiniband class device path %s, "
>> +"transport: (%d) %s\n",
>>   who,
>>   verbs->device->name,
>>   verbs->device->dev_name,
>>   verbs->device->dev_path,
>>   verbs->device->ibdev_path,
>>   port.link_layer,
>> -(port.link_layer == IBV_LINK_LAYER_INFINIBAND) ? 
>> "Infiniband" :
>> - ((port.link_layer == IBV_LINK_LAYER_ETHERNET)
>> -? "Ethernet" : "Unknown"));
>> +port.link_layer == IBV_LINK_LAYER_INFINIBAND ? "Infiniband"
>> +: port.link_layer == IBV_LINK_LAYER_ETHERNET ? "Ethernet"
>> +: "Unknown");
>
>
> Most of the time, these messages are not needed, so i would prefer to put it 
> to the trace instead.

Makes sense.

>>   }
>>   
>>   /*

Thanks!




Re: [PATCH 39/52] migration/rdma: Convert qemu_rdma_write_one() to Error

2023-09-26 Thread Markus Armbruster
"Zhijian Li (Fujitsu)"  writes:

> On 26/09/2023 13:50, Li Zhijian wrote:
>> 
>> 
>> On 18/09/2023 22:41, Markus Armbruster wrote:
>>> Functions that use an Error **errp parameter to return errors should
>>> not also report them to the user, because reporting is the caller's
>>> job.  When the caller does, the error is reported twice.  When it
>>> doesn't (because it recovered from the error), there is no error to
>>> report, i.e. the report is bogus.
>>>
>>> qemu_rdma_write_flush() violates this principle: it calls
>>> error_report() via qemu_rdma_write_one().  I elected not to
>>> investigate how callers handle the error, i.e. precise impact is not
>>> known.
>>>
>>> Clean this up by converting qemu_rdma_write_one() to Error.
>>>
>>> Signed-off-by: Markus Armbruster
>>> ---
>>>   migration/rdma.c | 25 +++--
>>>   1 file changed, 11 insertions(+), 14 deletions(-)
>>>
>>> diff --git a/migration/rdma.c b/migration/rdma.c
>>> index c3c33fe242..9b8cbadfcd 100644
>>> --- a/migration/rdma.c
>>> +++ b/migration/rdma.c
>>> @@ -2019,9 +2019,8 @@ static int qemu_rdma_exchange_recv(RDMAContext *rdma, 
>>> RDMAControlHeader *head,
>>>    */
>>>   static int qemu_rdma_write_one(QEMUFile *f, RDMAContext *rdma,
>>>  int current_index, uint64_t current_addr,
>>> -   uint64_t length)
>>> +   uint64_t length, Error **errp)
>>>   {
>>> -    Error *err = NULL;
>>>   struct ibv_sge sge;
>>>   struct ibv_send_wr send_wr = { 0 };
>>>   struct ibv_send_wr *bad_wr;
>> 
>> [...]
>> 
>>>   }
>>> @@ -2219,7 +2216,7 @@ retry:
>>>   goto retry;
>>>   } else if (ret > 0) {
>>> -    perror("rdma migration: post rdma write failed");
>>> +    error_setg(errp, "rdma migration: post rdma write failed");
>> 
>> It reminds that do you miss to use error_setg_errno() instead.
>> 
>
> Answer it myself:
> ibv_post_send(3) says:
>
> RETURN VALUE
> ibv_post_send() returns 0 on success, or the value of errno on 
> failure (which indicates the failure reason).

I read this as "assign error code to errno and return it."  But...

> the global error is not defined here.

... your assertion made me check the source code, and it looks like it
does *not* assign to errno, at least not reliably.  Which means perror()
prints garbage.

I'll delete the perror() in a separate patch.

>>>   return -1;
>>>   }




Re: [RFC] Proposal of QEMU PCI Endpoint test environment

2023-09-26 Thread Shunsuke Mie



On 2023/09/21 18:11, Kishon Vijay Abraham I wrote:

+Vaishnav

Hi Shunsuke,

On 8/18/2023 7:16 PM, Shunsuke Mie wrote:

Hi all,

We are proposing to add a new test syste to Linux for PCIe Endpoint. 
That

can be run on QEMU without real hardware. At present, partially we have
confirmed that pci-epf-test is working, but it is not yet complete.
However, we would appreciate your comments on the architecture design.

# Background
The background is as follows.

PCI Endpoint function driver is implemented using the PCIe Endpoint
framework, but it requires physical boards for testing, and it is 
difficult

to test sufficiently. In order to find bugs and hardware-dependent
implementations early, continuous testing is required. Since it is
difficult to automate tests that require hardware, this RFC proposes a
virtual environment for testing PCI endpoint function drivers.


This would be quite useful and thank you for attempting it! I would 
like to compare other mechanisms available in-addition to QEMU before 
going with the QEMU approach.


I got it. I'll make a table to compare some methods that includes 
greybus to realize this emulation environment.



Best,

Shunsuke

Though I don't understand this fully, Looking at 
https://osseu2023.sched.com/event/1OGk8/emulating-devices-in-linux-using-greybus-subsystem-vaishnav-mohandas-achath-texas-instruments, 
Vaishnav seems to solve the same problem using greybus for multiple 
type s of devices.


Vaishnav, we'd wait for your OSS presentation but do you have any 
initial thoughts on how greybus could be used to test PCIe endpoint 
drivers?


Thanks,
Kishon



# Architecture
The overview of the architecture is as follows.

   Guest 1                        Guest 2
+-+    ++
| Linux kernel            |    | Linux kernel               |
|                         |    |                            |
| PCI EP function driver  |    |                            |
| (e.g. pci-epf-test)     |    |                            |
|-|    | PCI Device Driver          |
| (2) QEMU EPC Driver     |    | (e.g. pci_endpoint_test)   |
+-+    ++
+-+    ++
| QEMU                    |    | QEMU                       |
|-|    ||
| (1) QEMU PCI EPC Device ** (3) QEMU EPF Bridge Device |
+-+    ++

At present, it is designed to work guests only on the same host, and
communication is done through Unix domain sockets.

The three parts shown in the figure were introduced this time.

(1) QEMU PCI Endpoint Controller(EPC) Device
PCI Endpoint Controller implemented as QEMU PCI device.
(2) QEMU PCI Endpoint Controller(EPC) Driver
Linux kernel driver that drives the device (1). It registers a epc 
device

to linux kernel and handling each operations for the epc device.
(3) QEMU PCI Endpoint function(EPF) Bridge Device
QEMU PCI device that cooperates with (1) and performs accesses to pci
configuration space, BAR and memory space to communicate each guests, 
and

generates interruptions to the guest 1.

Each projects are:
(1), (3) https://github.com/ShunsukeMie/qemu/tree/epf-bridge/v1 


files: hw/misc/{qemu-epc.{c,h}, epf-bridge.c}
(2) https://github.com/ShunsukeMie/linux-virtio-rdma/tree/qemu-epc 


files: drivers/pci/controller/pcie-qemu-ep.c

# Protocol

PCI, PCIe has a layer structure that includes Physical, Data Lane and
Transaction. The communicates between the bridge(3) and controller (1)
mimic the Transaction. Specifically, a protocol is implemented for
exchanging fd for communication protocol version check and 
communication,
in addition to the interaction equivalent to PCIe Transaction Layer 
Packet

(Read and Write of I/O, Memory, Configuration space and Message). In my
mind, we need to discuss the communication mor.

We also are planning to post the patch set after the code is 
organized and

the protocol discussion is matured.

Best regards,
Shunsuke




[PATCH v3 1/7] Update ACPI GED framework to support vcpu hot-(un)plug

2023-09-26 Thread xianglai li
ACPI GED shall be used to convey to the guest kernel about any cpu hot-(un)plug
events. Therefore, existing ACPI GED framework inside QEMU needs to be enhanced
to support CPU hot-(un)plug state and events.

Co-authored-by: "Salil Mehta" 
Co-authored-by: "Salil Mehta" 
Cc: "Bernhard Beschow" 
Cc: "Salil Mehta" 
Cc: "Salil Mehta" 
Cc: Xiaojuan Yang 
Cc: Song Gao 
Cc: "Michael S. Tsirkin" 
Cc: Igor Mammedov 
Cc: Ani Sinha 
Cc: Paolo Bonzini 
Cc: Richard Henderson 
Cc: Eduardo Habkost 
Cc: Marcel Apfelbaum 
Cc: "Philippe Mathieu-Daudé" 
Cc: Yanan Wang 
Cc: "Daniel P. Berrangé" 
Cc: Peter Xu 
Cc: David Hildenbrand 
Cc: Bibo Mao 
Signed-off-by: xianglai li 
---
 hw/acpi/acpi-cpu-hotplug-stub.c|  6 +
 hw/acpi/cpu.c  |  7 --
 hw/acpi/generic_event_device.c | 33 ++
 include/hw/acpi/cpu_hotplug.h  | 10 
 include/hw/acpi/generic_event_device.h |  5 
 5 files changed, 54 insertions(+), 7 deletions(-)

diff --git a/hw/acpi/acpi-cpu-hotplug-stub.c b/hw/acpi/acpi-cpu-hotplug-stub.c
index 3fc4b14c26..2aec90d968 100644
--- a/hw/acpi/acpi-cpu-hotplug-stub.c
+++ b/hw/acpi/acpi-cpu-hotplug-stub.c
@@ -24,6 +24,12 @@ void acpi_cpu_ospm_status(CPUHotplugState *cpu_st, 
ACPIOSTInfoList ***list)
 return;
 }
 
+void cpu_hotplug_hw_init(MemoryRegion *as, Object *owner,
+ CPUHotplugState *state, hwaddr base_addr)
+{
+return;
+}
+
 void acpi_cpu_plug_cb(HotplugHandler *hotplug_dev,
   CPUHotplugState *cpu_st, DeviceState *dev, Error **errp)
 {
diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c
index 011d2c6c2d..5bad983928 100644
--- a/hw/acpi/cpu.c
+++ b/hw/acpi/cpu.c
@@ -7,13 +7,6 @@
 #include "trace.h"
 #include "sysemu/numa.h"
 
-#define ACPI_CPU_HOTPLUG_REG_LEN 12
-#define ACPI_CPU_SELECTOR_OFFSET_WR 0
-#define ACPI_CPU_FLAGS_OFFSET_RW 4
-#define ACPI_CPU_CMD_OFFSET_WR 5
-#define ACPI_CPU_CMD_DATA_OFFSET_RW 8
-#define ACPI_CPU_CMD_DATA2_OFFSET_R 0
-
 #define OVMF_CPUHP_SMI_CMD 4
 
 enum {
diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c
index a3d31631fe..c5a70957b4 100644
--- a/hw/acpi/generic_event_device.c
+++ b/hw/acpi/generic_event_device.c
@@ -12,6 +12,7 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "hw/acpi/acpi.h"
+#include "hw/acpi/cpu.h"
 #include "hw/acpi/generic_event_device.h"
 #include "hw/irq.h"
 #include "hw/mem/pc-dimm.h"
@@ -25,6 +26,7 @@ static const uint32_t ged_supported_events[] = {
 ACPI_GED_MEM_HOTPLUG_EVT,
 ACPI_GED_PWR_DOWN_EVT,
 ACPI_GED_NVDIMM_HOTPLUG_EVT,
+ACPI_GED_CPU_HOTPLUG_EVT,
 };
 
 /*
@@ -117,6 +119,10 @@ void build_ged_aml(Aml *table, const char *name, 
HotplugHandler *hotplug_dev,
aml_notify(aml_name("\\_SB.NVDR"),
   aml_int(0x80)));
 break;
+case ACPI_GED_CPU_HOTPLUG_EVT:
+aml_append(if_ctx, aml_call0(ACPI_CPU_CONTAINER "."
+ ACPI_CPU_SCAN_METHOD));
+break;
 default:
 /*
  * Please make sure all the events in ged_supported_events[]
@@ -234,6 +240,8 @@ static void acpi_ged_device_plug_cb(HotplugHandler 
*hotplug_dev,
 } else {
 acpi_memory_plug_cb(hotplug_dev, &s->memhp_state, dev, errp);
 }
+} else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
+acpi_cpu_plug_cb(hotplug_dev, &s->cpuhp_state, dev, errp);
 } else {
 error_setg(errp, "virt: device plug request for unsupported device"
" type: %s", object_get_typename(OBJECT(dev)));
@@ -248,6 +256,8 @@ static void acpi_ged_unplug_request_cb(HotplugHandler 
*hotplug_dev,
 if ((object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) &&
!(object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM {
 acpi_memory_unplug_request_cb(hotplug_dev, &s->memhp_state, dev, errp);
+} else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
+acpi_cpu_unplug_request_cb(hotplug_dev, &s->cpuhp_state, dev, errp);
 } else {
 error_setg(errp, "acpi: device unplug request for unsupported device"
" type: %s", object_get_typename(OBJECT(dev)));
@@ -261,6 +271,8 @@ static void acpi_ged_unplug_cb(HotplugHandler *hotplug_dev,
 
 if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
 acpi_memory_unplug_cb(&s->memhp_state, dev, errp);
+} else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
+acpi_cpu_unplug_cb(&s->cpuhp_state, dev, errp);
 } else {
 error_setg(errp, "acpi: device unplug for unsupported device"
" type: %s", object_get_typename(OBJECT(dev)));
@@ -272,6 +284,7 @@ static void acpi_ged_ospm_status(AcpiDeviceIf *adev, 
ACPIOSTInfoList ***list)
 AcpiGedState *s = ACPI_GED(adev);
 
 acpi_memory_ospm_status(&s->memhp_state, list);
+acpi_cpu_ospm_status(&s->cpuhp_state

[PATCH v3 5/7] Add basic CPU hot-(un)plug support for Loongarch

2023-09-26 Thread xianglai li
Add CPU hot-(un)plug related hook functions and
turn on the CPU hot-(un)plug custom switch.

Cc: "Bernhard Beschow" 
Cc: "Salil Mehta" 
Cc: "Salil Mehta" 
Cc: Xiaojuan Yang 
Cc: Song Gao 
Cc: "Michael S. Tsirkin" 
Cc: Igor Mammedov 
Cc: Ani Sinha 
Cc: Paolo Bonzini 
Cc: Richard Henderson 
Cc: Eduardo Habkost 
Cc: Marcel Apfelbaum 
Cc: "Philippe Mathieu-Daudé" 
Cc: Yanan Wang 
Cc: "Daniel P. Berrangé" 
Cc: Peter Xu 
Cc: David Hildenbrand 
Cc: Bibo Mao 
Signed-off-by: xianglai li 
---
 .../devices/loongarch64-softmmu/default.mak   |   1 +
 hw/loongarch/virt.c   | 210 ++
 2 files changed, 211 insertions(+)

diff --git a/configs/devices/loongarch64-softmmu/default.mak 
b/configs/devices/loongarch64-softmmu/default.mak
index 928bc117ef..e596706fab 100644
--- a/configs/devices/loongarch64-softmmu/default.mak
+++ b/configs/devices/loongarch64-softmmu/default.mak
@@ -1,3 +1,4 @@
 # Default configuration for loongarch64-softmmu
 
 CONFIG_LOONGARCH_VIRT=y
+CONFIG_ACPI_CPU_HOTPLUG=y
diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c
index fb06b4ab4e..c704f3117f 100644
--- a/hw/loongarch/virt.c
+++ b/hw/loongarch/virt.c
@@ -999,11 +999,93 @@ static void virt_get_cpu_topo_by_cpu_index(const 
MachineState *ms,
 cpu_topo->thread_id = cpu_index % ms->smp.threads;
 }
 
+/* find cpu slot in machine->possible_cpus by arch_id */
+static CPUArchId *loongarch_find_cpu_slot(MachineState *ms, int arch_id)
+{
+int n;
+for (n = 0; n < ms->possible_cpus->len; n++) {
+if (ms->possible_cpus->cpus[n].arch_id == arch_id) {
+return &ms->possible_cpus->cpus[n];
+}
+}
+
+return NULL;
+}
+
+static void loongarch_cpu_pre_plug(HotplugHandler *hotplug_dev,
+DeviceState *dev, Error **errp)
+{
+MachineState *ms = MACHINE(OBJECT(hotplug_dev));
+MachineClass *mc = MACHINE_GET_CLASS(hotplug_dev);
+LoongArchCPU *cpu = LOONGARCH_CPU(dev);
+CPUState *cs = CPU(dev);
+CPUArchId *cpu_slot;
+Error *local_err = NULL;
+LoongArchCPUTopo cpu_topo;
+int arch_id;
+
+if (dev->hotplugged && !mc->has_hotpluggable_cpus) {
+error_setg(&local_err, "CPU hotplug not supported for this machine");
+goto out;
+}
+
+/* sanity check the cpu */
+if (!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) {
+error_setg(&local_err, "Invalid CPU type, expected cpu type: '%s'",
+   ms->cpu_type);
+goto out;
+}
+
+if ((cpu->thread_id < 0) || (cpu->thread_id >= ms->smp.threads)) {
+error_setg(&local_err,
+   "Invalid thread-id %u specified, must be in range 1:%u",
+   cpu->thread_id, ms->smp.threads - 1);
+goto out;
+}
+
+if ((cpu->core_id < 0) || (cpu->core_id >= ms->smp.cores)) {
+error_setg(&local_err,
+   "Invalid core-id %u specified, must be in range 1:%u",
+   cpu->core_id, ms->smp.cores);
+goto out;
+}
+
+if ((cpu->socket_id < 0) || (cpu->socket_id >= ms->smp.sockets)) {
+error_setg(&local_err,
+   "Invalid socket-id %u specified, must be in range 1:%u",
+   cpu->socket_id, ms->smp.sockets - 1);
+goto out;
+}
+
+cpu_topo.socket_id = cpu->socket_id;
+cpu_topo.core_id = cpu->core_id;
+cpu_topo.thread_id = cpu->thread_id;
+arch_id = virt_get_arch_id_from_cpu_topo(ms, &cpu_topo);
+
+cpu_slot = loongarch_find_cpu_slot(ms, arch_id);
+if (CPU(cpu_slot->cpu)) {
+error_setg(&local_err,
+   "cpu(id%d=%d:%d:%d) with arch-id %" PRIu64 " exists",
+   cs->cpu_index, cpu->socket_id, cpu->core_id,
+   cpu->thread_id, cpu_slot->arch_id);
+goto out;
+}
+cpu->arch_id = arch_id;
+
+numa_cpu_pre_plug(cpu_slot, dev, &local_err);
+
+return ;
+out:
+error_propagate(errp, local_err);
+}
+
 static void virt_machine_device_pre_plug(HotplugHandler *hotplug_dev,
 DeviceState *dev, Error **errp)
 {
 if (memhp_type_supported(dev)) {
 virt_mem_pre_plug(hotplug_dev, dev, errp);
+} else if (object_dynamic_cast(OBJECT(dev), TYPE_LOONGARCH_CPU)) {
+loongarch_cpu_pre_plug(hotplug_dev, dev, errp);
 }
 }
 
@@ -1017,11 +1099,45 @@ static void virt_mem_unplug_request(HotplugHandler 
*hotplug_dev,
errp);
 }
 
+static void loongarch_cpu_unplug_request(HotplugHandler *hotplug_dev,
+DeviceState *dev, Error **errp)
+{
+MachineState *machine = MACHINE(OBJECT(hotplug_dev));
+LoongArchMachineState *lsms = LOONGARCH_MACHINE(machine);
+Error *local_err = NULL;
+HotplugHandlerClass *hhc;
+LoongArchCPU *cpu = LOONGARCH_CPU(dev);
+CPUState *cs = CPU(dev);
+
+if (!lsms->acpi_ged) {
+error_setg(&local_err, "CPU hot unplug not supported without ACPI");
+goto ou

[PATCH v3 0/7] *** Adds CPU hot-plug support to Loongarch ***

2023-09-26 Thread xianglai li
Hello everyone, We refer to the implementation of ARM CPU
Hot-Plug to add GED-based CPU Hot-Plug support to Loongarch.

The first 2 patches are changes to the QEMU common code,
including adding GED support for CPU Hot-Plug, updating
the ACPI table creation process.

For the modification of the public part of the code, we refer to the
arm-related patch, and the link address of the corresponding patch is
as follows:
https://lore.kernel.org/all/20200613213629.21984-1-salil.me...@huawei.com/

We also refer to shentey's patch link as follows:
https://patchew.org/QEMU/20230908084234.17642-1-shen...@gmail.com/

In order to respect the work of "Salil Mehta" and shentey, we will rebase
the first 2 patches in the final patch, which are referenced here to ensure
that the loongarch cpu hotplug can work properly.

The last 5 patches are Loongarch architecture-related,
and the modifications include the definition of the hook
function related to the CPU Hot-(UN)Plug, the allocation
and release of CPU resources when CPU Hot-(UN)Plug,
the creation process of updating the ACPI table,
and finally the custom switch for the CPU Hot-Plug.

V3:
- The call to the function qdev_disconnect_gpio_out_named was removed
- The use of the function cpu_address_space_destroy is necessary,
  most architectures also need to be called, we want to call
  in the common path, rather than each cpu architecture itself,
  because the scheme is not sure we want to solve this problem
  in a separate patch, here we do the function rollback processing.
- Referring to shentey's patch, loongarch no longer creates the
  TYPE_ACPI_GED_LOONGARCH device class, but instead passes build_madt_cpu_fn
  directly as an argument to the build_cpus_aml function

V2:
- Fix formatting and spelling errors
- Split large patches into smaller patches
  - Split the original patch
<> into
<>
<>
<>.
  - Split the original patch
<> into
<>
<>
- Added loongarch cpu topology calculation method.
- Change the position of the cpu topology patch.
- Change unreasonable variable and function names.

xianglai li (7):
  Update ACPI GED framework to support vcpu hot-(un)plug
  Update CPUs AML with cpu-(ctrl)dev change
  Added CPU topology support for Loongarch
  Optimize loongarch_irq_init function implementation
  Add basic CPU hot-(un)plug support for Loongarch
  Add support of *unrealize* for Loongarch cpu
  Update the ACPI table for the Loongarch CPU

 .../devices/loongarch64-softmmu/default.mak   |   1 +
 docs/system/loongarch/virt.rst|  31 ++
 hw/acpi/acpi-cpu-hotplug-stub.c   |  15 +
 hw/acpi/cpu.c |  27 +-
 hw/acpi/generic_event_device.c|  33 ++
 hw/i386/acpi-build.c  |   3 +-
 hw/loongarch/acpi-build.c |  34 +-
 hw/loongarch/virt.c   | 409 +++---
 include/hw/acpi/cpu.h |   5 +-
 include/hw/acpi/cpu_hotplug.h |  10 +
 include/hw/acpi/generic_event_device.h|   5 +
 include/hw/loongarch/virt.h   |   6 +-
 target/loongarch/cpu.c|  33 +-
 target/loongarch/cpu.h|  13 +-
 14 files changed, 542 insertions(+), 83 deletions(-)

-- 
2.39.1




[PATCH v3 3/7] Added CPU topology support for Loongarch

2023-09-26 Thread xianglai li
1.Add topological relationships for Loongarch VCPU
and initialize topology member variables.
2.Add a description of the calculation method of
the arch_id and the topological relationship of the CPU.

Cc: "Bernhard Beschow" 
Cc: "Salil Mehta" 
Cc: "Salil Mehta" 
Cc: Xiaojuan Yang 
Cc: Song Gao 
Cc: "Michael S. Tsirkin" 
Cc: Igor Mammedov 
Cc: Ani Sinha 
Cc: Paolo Bonzini 
Cc: Richard Henderson 
Cc: Eduardo Habkost 
Cc: Marcel Apfelbaum 
Cc: "Philippe Mathieu-Daudé" 
Cc: Yanan Wang 
Cc: "Daniel P. Berrangé" 
Cc: Peter Xu 
Cc: David Hildenbrand 
Cc: Bibo Mao 
Signed-off-by: xianglai li 
---
 docs/system/loongarch/virt.rst |  31 ++
 hw/loongarch/virt.c| 101 ++---
 target/loongarch/cpu.c |  13 -
 target/loongarch/cpu.h |  12 +++-
 4 files changed, 134 insertions(+), 23 deletions(-)

diff --git a/docs/system/loongarch/virt.rst b/docs/system/loongarch/virt.rst
index c37268b404..eaba9e2fd7 100644
--- a/docs/system/loongarch/virt.rst
+++ b/docs/system/loongarch/virt.rst
@@ -28,6 +28,37 @@ The ``qemu-system-loongarch64`` provides emulation for virt
 machine. You can specify the machine type ``virt`` and
 cpu type ``la464``.
 
+CPU Topology
+
+
+The ``LA464`` type CPUs have the concept of Socket Core and Thread.
+
+For example:
+
+``-smp 1,maxcpus=M,sockets=S,cores=C,threads=T``
+
+The above parameters indicate that the machine has a maximum of ``M`` vCPUs and
+``S`` sockets, each socket has ``C`` cores, each core has ``T`` threads,
+and each thread corresponds to a vCPU.
+
+Then ``M`` ``S`` ``C`` ``T`` has the following relationship:
+
+``M = S * C * T``
+
+In the CPU topology relationship, When we know the ``socket_id`` ``core_id``
+and ``thread_id`` of the CPU, we can calculate its ``arch_id``:
+
+``arch_id = (socket_id * S) + (core_id * C) + (thread_id * T)``
+
+Similarly, when we know the ``arch_id`` of the CPU,
+we can also get its ``socket_id`` ``core_id`` and ``thread_id``:
+
+``socket_id = arch_id / (C * T)``
+
+``core_id = (arch_id / T) % C``
+
+``thread_id = arch_id % T``
+
 Boot options
 
 
diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c
index 2629128aed..b8474e7b94 100644
--- a/hw/loongarch/virt.c
+++ b/hw/loongarch/virt.c
@@ -624,11 +624,11 @@ static void loongarch_irq_init(LoongArchMachineState 
*lams)
 sysbus_mmio_get_region(SYS_BUS_DEVICE(ipi),
 1));
 /*
-* extioi iocsr memory region
-* only one extioi is added on loongarch virt machine
-* external device interrupt can only be routed to cpu 0-3
-*/
-   if (cpu < EXTIOI_CPUS)
+ * extioi iocsr memory region
+ * only one extioi is added on loongarch virt machine
+ * external device interrupt can only be routed to cpu 0-3
+ */
+if (cpu < EXTIOI_CPUS)
 memory_region_add_subregion(&env->system_iocsr, APIC_BASE,
 sysbus_mmio_get_region(SYS_BUS_DEVICE(extioi),
 cpu));
@@ -789,9 +789,7 @@ static void loongarch_init(MachineState *machine)
 NodeInfo *numa_info = machine->numa_state->nodes;
 int i;
 hwaddr fdt_base;
-const CPUArchIdList *possible_cpus;
 MachineClass *mc = MACHINE_GET_CLASS(machine);
-CPUState *cpu;
 char *ramName = NULL;
 
 if (!cpu_model) {
@@ -803,16 +801,41 @@ static void loongarch_init(MachineState *machine)
 exit(1);
 }
 create_fdt(lams);
-/* Init CPUs */
 
-possible_cpus = mc->possible_cpu_arch_ids(machine);
-for (i = 0; i < possible_cpus->len; i++) {
-cpu = cpu_create(machine->cpu_type);
-cpu->cpu_index = i;
-machine->possible_cpus->cpus[i].cpu = OBJECT(cpu);
-lacpu = LOONGARCH_CPU(cpu);
-lacpu->phy_id = machine->possible_cpus->cpus[i].arch_id;
+/* Init CPUs */
+mc->possible_cpu_arch_ids(machine);
+for (i = 0; i < machine->smp.cpus; i++) {
+Object *cpuobj;
+cpuobj = object_new(machine->cpu_type);
+lacpu = LOONGARCH_CPU(cpuobj);
+
+lacpu->arch_id = machine->possible_cpus->cpus[i].arch_id;
+object_property_set_int(cpuobj, "socket-id",
+
machine->possible_cpus->cpus[i].props.socket_id,
+NULL);
+object_property_set_int(cpuobj, "core-id",
+machine->possible_cpus->cpus[i].props.core_id,
+NULL);
+object_property_set_int(cpuobj, "thread-id",
+
machine->possible_cpus->cpus[i].props.thread_id,
+NULL);
+/*
+ * The CPU in place at the time of machine startup will also enter
+ * the CPU hot-plug process when it is created, but at this time,
+ * the GED device has not been created, resulting in exit in the CPU
+ * hot-plug proc

[PATCH v3 7/7] Update the ACPI table for the Loongarch CPU

2023-09-26 Thread xianglai li
Add new types of GED devices for Loongarch machines,
add CPU hot-(un)plug event response and address spaces,
and update the ACPI table.

Cc: "Bernhard Beschow" 
Cc: "Salil Mehta" 
Cc: "Salil Mehta" 
Cc: Xiaojuan Yang 
Cc: Song Gao 
Cc: "Michael S. Tsirkin" 
Cc: Igor Mammedov 
Cc: Ani Sinha 
Cc: Paolo Bonzini 
Cc: Richard Henderson 
Cc: Eduardo Habkost 
Cc: Marcel Apfelbaum 
Cc: "Philippe Mathieu-Daudé" 
Cc: Yanan Wang 
Cc: "Daniel P. Berrangé" 
Cc: Peter Xu 
Cc: David Hildenbrand 
Cc: Bibo Mao 
Signed-off-by: xianglai li 
---
 hw/acpi/acpi-cpu-hotplug-stub.c |  9 +
 hw/loongarch/acpi-build.c   | 34 -
 hw/loongarch/virt.c |  3 ++-
 include/hw/loongarch/virt.h |  1 +
 4 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/hw/acpi/acpi-cpu-hotplug-stub.c b/hw/acpi/acpi-cpu-hotplug-stub.c
index 2aec90d968..b3ac7a1e31 100644
--- a/hw/acpi/acpi-cpu-hotplug-stub.c
+++ b/hw/acpi/acpi-cpu-hotplug-stub.c
@@ -19,6 +19,15 @@ void legacy_acpi_cpu_hotplug_init(MemoryRegion *parent, 
Object *owner,
 return;
 }
 
+void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts,
+build_madt_cpu_fn build_madt_cpu, hwaddr mmap_io_base,
+const char *res_root,
+const char *event_handler_method,
+AmlRegionSpace rs)
+{
+return;
+}
+
 void acpi_cpu_ospm_status(CPUHotplugState *cpu_st, ACPIOSTInfoList ***list)
 {
 return;
diff --git a/hw/loongarch/acpi-build.c b/hw/loongarch/acpi-build.c
index ae292fc543..cf89e5c1cb 100644
--- a/hw/loongarch/acpi-build.c
+++ b/hw/loongarch/acpi-build.c
@@ -46,6 +46,23 @@
 #define ACPI_BUILD_DPRINTF(fmt, ...)
 #endif
 
+static void virt_madt_cpu_entry(int uid,
+const CPUArchIdList *apic_ids,
+GArray *entry, bool force_enabled)
+{
+uint32_t apic_id = apic_ids->cpus[uid].arch_id;
+/* Flags – Local APIC Flags */
+uint32_t flags = apic_ids->cpus[uid].cpu != NULL || force_enabled ?
+ 1 /* Enabled */ : 0;
+
+/* Rev 1.0b, Table 5-13 Processor Local APIC Structure */
+build_append_int_noprefix(entry, 0, 1);   /* Type */
+build_append_int_noprefix(entry, 8, 1);   /* Length */
+build_append_int_noprefix(entry, uid, 1); /* ACPI Processor ID */
+build_append_int_noprefix(entry, apic_id, 1); /* APIC ID */
+build_append_int_noprefix(entry, flags, 4); /* Flags */
+}
+
 /* build FADT */
 static void init_common_fadt_data(AcpiFadtData *data)
 {
@@ -121,15 +138,18 @@ build_madt(GArray *table_data, BIOSLinker *linker, 
LoongArchMachineState *lams)
 build_append_int_noprefix(table_data, 1 /* PCAT_COMPAT */, 4); /* Flags */
 
 for (i = 0; i < arch_ids->len; i++) {
+uint32_t flags;
+
 /* Processor Core Interrupt Controller Structure */
 arch_id = arch_ids->cpus[i].arch_id;
+flags = arch_ids->cpus[i].cpu ? 1 : 0;
 
 build_append_int_noprefix(table_data, 17, 1);/* Type */
 build_append_int_noprefix(table_data, 15, 1);/* Length */
 build_append_int_noprefix(table_data, 1, 1); /* Version */
 build_append_int_noprefix(table_data, i, 4); /* ACPI Processor ID 
*/
 build_append_int_noprefix(table_data, arch_id, 4); /* Core ID */
-build_append_int_noprefix(table_data, 1, 4); /* Flags */
+build_append_int_noprefix(table_data, flags, 4);   /* Flags */
 }
 
 /* Extend I/O Interrupt Controller Structure */
@@ -292,6 +312,18 @@ build_la_ged_aml(Aml *dsdt, MachineState *machine)
  AML_SYSTEM_MEMORY,
  VIRT_GED_MEM_ADDR);
 }
+
+if (event & ACPI_GED_CPU_HOTPLUG_EVT) {
+CPUHotplugFeatures opts = {
+.acpi_1_compatible = false,
+.has_legacy_cphp = false
+};
+
+build_cpus_aml(dsdt, machine, opts, virt_madt_cpu_entry,
+   VIRT_GED_CPUHP_ADDR, "\\_SB", "\\_GPE._E01",
+   AML_SYSTEM_MEMORY);
+
+}
 acpi_dsdt_add_power_button(dsdt);
 }
 
diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c
index c704f3117f..22b287eb39 100644
--- a/hw/loongarch/virt.c
+++ b/hw/loongarch/virt.c
@@ -449,7 +449,7 @@ static DeviceState *create_acpi_ged(DeviceState *pch_pic, 
LoongArchMachineState
 {
 DeviceState *dev;
 MachineState *ms = MACHINE(lams);
-uint32_t event = ACPI_GED_PWR_DOWN_EVT;
+uint32_t event = ACPI_GED_PWR_DOWN_EVT | ACPI_GED_CPU_HOTPLUG_EVT;
 
 if (ms->ram_slots) {
 event |= ACPI_GED_MEM_HOTPLUG_EVT;
@@ -463,6 +463,7 @@ static DeviceState *create_acpi_ged(DeviceState *pch_pic, 
LoongArchMachineState
 sysbus_mmio_map(SYS_BUS_DEVICE(dev), 1, VIRT_GED_MEM_ADDR);
 /* ged regs used for reset and power down */
 sysbus_mmio_map(SYS_BUS_DEVICE(dev), 2, VIRT_GED_REG_ADDR);
+sysbus_mmio_map(SYS_BUS_DEVICE(dev), 3, VIRT_GED_CP

[PATCH v3 6/7] Add support of *unrealize* for Loongarch cpu

2023-09-26 Thread xianglai li
Add the unrealize function to the Loongarch CPU for cpu hot-(un)plug

Cc: "Bernhard Beschow" 
Cc: "Salil Mehta" 
Cc: "Salil Mehta" 
Cc: Xiaojuan Yang 
Cc: Song Gao 
Cc: "Michael S. Tsirkin" 
Cc: Igor Mammedov 
Cc: Ani Sinha 
Cc: Paolo Bonzini 
Cc: Richard Henderson 
Cc: Eduardo Habkost 
Cc: Marcel Apfelbaum 
Cc: "Philippe Mathieu-Daudé" 
Cc: Yanan Wang 
Cc: "Daniel P. Berrangé" 
Cc: Peter Xu 
Cc: David Hildenbrand 
Cc: Bibo Mao 
Signed-off-by: xianglai li 
---
 target/loongarch/cpu.c | 20 
 target/loongarch/cpu.h |  1 +
 2 files changed, 21 insertions(+)

diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
index 40b856554f..92fb23704f 100644
--- a/target/loongarch/cpu.c
+++ b/target/loongarch/cpu.c
@@ -576,6 +576,22 @@ static void loongarch_cpu_realizefn(DeviceState *dev, 
Error **errp)
 lacc->parent_realize(dev, errp);
 }
 
+static void loongarch_cpu_unrealizefn(DeviceState *dev)
+{
+LoongArchCPUClass *mcc = LOONGARCH_CPU_GET_CLASS(dev);
+
+#ifndef CONFIG_USER_ONLY
+LoongArchCPU *cpu = LOONGARCH_CPU(dev);
+CPULoongArchState *env = &cpu->env;
+
+cpu_remove_sync(CPU(dev));
+address_space_destroy(&env->address_space_iocsr);
+memory_region_del_subregion(&env->system_iocsr, &env->iocsr_mem);
+#endif
+
+mcc->parent_unrealize(dev);
+}
+
 #ifndef CONFIG_USER_ONLY
 static void loongarch_qemu_write(void *opaque, hwaddr addr,
  uint64_t val, unsigned size)
@@ -756,6 +772,9 @@ static void loongarch_cpu_class_init(ObjectClass *c, void 
*data)
 device_class_set_props(dc, loongarch_cpu_properties);
 device_class_set_parent_realize(dc, loongarch_cpu_realizefn,
 &lacc->parent_realize);
+device_class_set_parent_unrealize(dc, loongarch_cpu_unrealizefn,
+  &lacc->parent_unrealize);
+
 resettable_class_set_parent_phases(rc, NULL, loongarch_cpu_reset_hold, 
NULL,
&lacc->parent_phases);
 
@@ -777,6 +796,7 @@ static void loongarch_cpu_class_init(ObjectClass *c, void 
*data)
 #ifdef CONFIG_TCG
 cc->tcg_ops = &loongarch_tcg_ops;
 #endif
+dc->user_creatable = true;
 }
 
 static gchar *loongarch32_gdb_arch_name(CPUState *cs)
diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h
index 838492f014..ec4a9ff166 100644
--- a/target/loongarch/cpu.h
+++ b/target/loongarch/cpu.h
@@ -414,6 +414,7 @@ struct LoongArchCPUClass {
 /*< public >*/
 
 DeviceRealize parent_realize;
+DeviceUnrealize parent_unrealize;
 ResettablePhases parent_phases;
 };
 
-- 
2.39.1




[PATCH v3 4/7] Optimize loongarch_irq_init function implementation

2023-09-26 Thread xianglai li
Optimize loongarch_irq_init function implementation
and abstract the function loongarch_cpu_irq_init from it.

Cc: "Bernhard Beschow" 
Cc: "Salil Mehta" 
Cc: "Salil Mehta" 
Cc: Xiaojuan Yang 
Cc: Song Gao 
Cc: "Michael S. Tsirkin" 
Cc: Igor Mammedov 
Cc: Ani Sinha 
Cc: Paolo Bonzini 
Cc: Richard Henderson 
Cc: Eduardo Habkost 
Cc: Marcel Apfelbaum 
Cc: "Philippe Mathieu-Daudé" 
Cc: Yanan Wang 
Cc: "Daniel P. Berrangé" 
Cc: Peter Xu 
Cc: David Hildenbrand 
Cc: Bibo Mao 
Signed-off-by: xianglai li 
---
 hw/loongarch/virt.c | 105 
 include/hw/loongarch/virt.h |   5 +-
 2 files changed, 62 insertions(+), 48 deletions(-)

diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c
index b8474e7b94..fb06b4ab4e 100644
--- a/hw/loongarch/virt.c
+++ b/hw/loongarch/virt.c
@@ -46,6 +46,8 @@
 #include "hw/block/flash.h"
 #include "qemu/error-report.h"
 
+static LoongArchCPU *loongarch_cpu_irq_init(MachineState *machine,
+LoongArchCPU *cpu, Error **errp);
 
 static void virt_flash_create(LoongArchMachineState *lams)
 {
@@ -573,16 +575,16 @@ static void loongarch_devices_init(DeviceState *pch_pic, 
LoongArchMachineState *
 static void loongarch_irq_init(LoongArchMachineState *lams)
 {
 MachineState *ms = MACHINE(lams);
-DeviceState *pch_pic, *pch_msi, *cpudev;
-DeviceState *ipi, *extioi;
+DeviceState *pch_pic, *pch_msi;
+DeviceState *extioi;
 SysBusDevice *d;
 LoongArchCPU *lacpu;
-CPULoongArchState *env;
 CPUState *cpu_state;
-int cpu, pin, i, start, num;
+int cpu, i, start, num;
 
 extioi = qdev_new(TYPE_LOONGARCH_EXTIOI);
 sysbus_realize_and_unref(SYS_BUS_DEVICE(extioi), &error_fatal);
+lams->extioi = extioi;
 
 /*
  * The connection of interrupts:
@@ -607,44 +609,8 @@ static void loongarch_irq_init(LoongArchMachineState *lams)
  */
 for (cpu = 0; cpu < ms->smp.cpus; cpu++) {
 cpu_state = qemu_get_cpu(cpu);
-cpudev = DEVICE(cpu_state);
 lacpu = LOONGARCH_CPU(cpu_state);
-env = &(lacpu->env);
-
-ipi = qdev_new(TYPE_LOONGARCH_IPI);
-sysbus_realize_and_unref(SYS_BUS_DEVICE(ipi), &error_fatal);
-
-/* connect ipi irq to cpu irq */
-qdev_connect_gpio_out(ipi, 0, qdev_get_gpio_in(cpudev, IRQ_IPI));
-/* IPI iocsr memory region */
-memory_region_add_subregion(&env->system_iocsr, SMP_IPI_MAILBOX,
-sysbus_mmio_get_region(SYS_BUS_DEVICE(ipi),
-0));
-memory_region_add_subregion(&env->system_iocsr, MAIL_SEND_ADDR,
-sysbus_mmio_get_region(SYS_BUS_DEVICE(ipi),
-1));
-/*
- * extioi iocsr memory region
- * only one extioi is added on loongarch virt machine
- * external device interrupt can only be routed to cpu 0-3
- */
-if (cpu < EXTIOI_CPUS)
-memory_region_add_subregion(&env->system_iocsr, APIC_BASE,
-sysbus_mmio_get_region(SYS_BUS_DEVICE(extioi),
-cpu));
-env->ipistate = ipi;
-}
-
-/*
- * connect ext irq to the cpu irq
- * cpu_pin[9:2] <= intc_pin[7:0]
- */
-for (cpu = 0; cpu < MIN(ms->smp.cpus, EXTIOI_CPUS); cpu++) {
-cpudev = DEVICE(qemu_get_cpu(cpu));
-for (pin = 0; pin < LS3A_INTC_IP; pin++) {
-qdev_connect_gpio_out(extioi, (cpu * 8 + pin),
-  qdev_get_gpio_in(cpudev, pin + 2));
-}
+loongarch_cpu_irq_init(ms, lacpu, &error_fatal);
 }
 
 pch_pic = qdev_new(TYPE_LOONGARCH_PCH_PIC);
@@ -927,11 +893,7 @@ static void loongarch_init(MachineState *machine)
 }
 }
 fdt_add_flash_node(lams);
-/* register reset function */
-for (i = 0; i < machine->smp.cpus; i++) {
-lacpu = LOONGARCH_CPU(qemu_get_cpu(i));
-qemu_register_reset(reset_load_elf, lacpu);
-}
+
 /* Initialize the IO interrupt subsystem */
 loongarch_irq_init(lams);
 fdt_add_irqchip_node(lams);
@@ -1091,6 +1053,57 @@ static void virt_mem_plug(HotplugHandler *hotplug_dev,
  dev, &error_abort);
 }
 
+static LoongArchCPU *loongarch_cpu_irq_init(MachineState *machine,
+LoongArchCPU *cpu, Error **errp)
+{
+LoongArchMachineState *lsms = LOONGARCH_MACHINE(machine);
+CPUState *cs = CPU(cpu);
+unsigned int cpu_index = cs->cpu_index;
+DeviceState *cpudev = DEVICE(cpu);
+DeviceState *extioi = lsms->extioi;
+CPULoongArchState *env = &cpu->env;
+DeviceState *ipi;
+int pin;
+
+qemu_register_reset(reset_load_elf, cpu);
+
+ipi = qdev_new(TYPE_LOONGARCH_IPI);
+sysbus_realize_and_unref(SYS_BUS_DEVICE(ipi), errp);
+
+/* connect ipi irq to cpu irq */
+qdev_connect_gpio_out(ipi, 0, qdev_get_gpio_in(cpudev, IRQ_IPI));
+/* IPI iocsr m

[PATCH v3 2/7] Update CPUs AML with cpu-(ctrl)dev change

2023-09-26 Thread xianglai li
CPUs Control device(\\_SB.PCI0) register interface for the x86 arch
is based on PCI and is IO port based and hence existing cpus AML code
assumes _CRS objects would evaluate to a system resource which describes
IO Port address.
But on Loongarch arch CPUs control device(\\_SB.PRES) register interface
is memory-mapped hence _CRS object should evaluate to system resource
which describes memory-mapped base address.

This cpus AML code change updates the existing interface of the build cpus AML
function to accept both IO/MEMORY type regions and update the _CRS object
correspondingly.

Co-authored-by: "Bernhard Beschow" 
Co-authored-by: "Salil Mehta" 
Co-authored-by: "Salil Mehta" 
Cc: "Bernhard Beschow" 
Cc: "Salil Mehta" 
Cc: "Salil Mehta" 
Cc: Xiaojuan Yang 
Cc: Song Gao 
Cc: "Michael S. Tsirkin" 
Cc: Igor Mammedov 
Cc: Ani Sinha 
Cc: Paolo Bonzini 
Cc: Richard Henderson 
Cc: Eduardo Habkost 
Cc: Marcel Apfelbaum 
Cc: "Philippe Mathieu-Daudé" 
Cc: Yanan Wang 
Cc: "Daniel P. Berrangé" 
Cc: Peter Xu 
Cc: David Hildenbrand 
Cc: Bibo Mao 
Signed-off-by: xianglai li 
---
 hw/acpi/cpu.c | 20 +++-
 hw/i386/acpi-build.c  |  3 ++-
 include/hw/acpi/cpu.h |  5 +++--
 3 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c
index 5bad983928..0afa04832e 100644
--- a/hw/acpi/cpu.c
+++ b/hw/acpi/cpu.c
@@ -6,6 +6,7 @@
 #include "qapi/qapi-events-acpi.h"
 #include "trace.h"
 #include "sysemu/numa.h"
+#include "hw/acpi/cpu_hotplug.h"
 
 #define OVMF_CPUHP_SMI_CMD 4
 
@@ -332,9 +333,10 @@ const VMStateDescription vmstate_cpu_hotplug = {
 #define CPU_FW_EJECT_EVENT "CEJF"
 
 void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts,
-build_madt_cpu_fn build_madt_cpu, hwaddr io_base,
+build_madt_cpu_fn build_madt_cpu, hwaddr mmap_io_base,
 const char *res_root,
-const char *event_handler_method)
+const char *event_handler_method,
+AmlRegionSpace rs)
 {
 Aml *ifctx;
 Aml *field;
@@ -359,14 +361,22 @@ void build_cpus_aml(Aml *table, MachineState *machine, 
CPUHotplugFeatures opts,
 aml_append(cpu_ctrl_dev, aml_mutex(CPU_LOCK, 0));
 
 crs = aml_resource_template();
-aml_append(crs, aml_io(AML_DECODE16, io_base, io_base, 1,
+if (rs == AML_SYSTEM_IO) {
+aml_append(crs, aml_io(AML_DECODE16, mmap_io_base, mmap_io_base, 1,
ACPI_CPU_HOTPLUG_REG_LEN));
+} else {
+aml_append(crs, aml_memory32_fixed(mmap_io_base,
+   ACPI_CPU_HOTPLUG_REG_LEN, AML_READ_WRITE));
+}
+
 aml_append(cpu_ctrl_dev, aml_name_decl("_CRS", crs));
 
+g_assert(rs == AML_SYSTEM_IO || rs == AML_SYSTEM_MEMORY);
 /* declare CPU hotplug MMIO region with related access fields */
 aml_append(cpu_ctrl_dev,
-aml_operation_region("PRST", AML_SYSTEM_IO, aml_int(io_base),
- ACPI_CPU_HOTPLUG_REG_LEN));
+aml_operation_region("PRST", rs,
+ aml_int(mmap_io_base),
+ ACPI_CPU_HOTPLUG_REG_LEN));
 
 field = aml_field("PRST", AML_BYTE_ACC, AML_NOLOCK,
   AML_WRITE_AS_ZEROS);
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 863a939210..7016205d15 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -1550,7 +1550,8 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
 .fw_unplugs_cpu = pm->smi_on_cpu_unplug,
 };
 build_cpus_aml(dsdt, machine, opts, pc_madt_cpu_entry,
-   pm->cpu_hp_io_base, "\\_SB.PCI0", "\\_GPE._E02");
+   pm->cpu_hp_io_base, "\\_SB.PCI0", "\\_GPE._E02",
+   AML_SYSTEM_IO);
 }
 
 if (pcms->memhp_io_base && nr_mem) {
diff --git a/include/hw/acpi/cpu.h b/include/hw/acpi/cpu.h
index bc901660fb..601f644e57 100644
--- a/include/hw/acpi/cpu.h
+++ b/include/hw/acpi/cpu.h
@@ -60,9 +60,10 @@ typedef void (*build_madt_cpu_fn)(int uid, const 
CPUArchIdList *apic_ids,
   GArray *entry, bool force_enabled);
 
 void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts,
-build_madt_cpu_fn build_madt_cpu, hwaddr io_base,
+build_madt_cpu_fn build_madt_cpu, hwaddr mmap_io_base,
 const char *res_root,
-const char *event_handler_method);
+const char *event_handler_method,
+AmlRegionSpace rs);
 
 void acpi_cpu_ospm_status(CPUHotplugState *cpu_st, ACPIOSTInfoList ***list);
 
-- 
2.39.1




[PATCH v2 1/2] migration: Fix rdma migration failed

2023-09-26 Thread Li Zhijian
Migration over RDMA failed since
commit: 294e5a4034 ("multifd: Only flush once each full round of memory")
with erors:
qemu-system-x86_64: rdma: Too many requests in this message 
(3638950032).Bailing.

migration with RDMA is different from tcp. RDMA has its own control
message, and all traffic between RDMA_CONTROL_REGISTER_REQUEST and
RDMA_CONTROL_REGISTER_FINISHED should not be disturbed.

find_dirty_block() will be called during RDMA_CONTROL_REGISTER_REQUEST
and RDMA_CONTROL_REGISTER_FINISHED, it will send a extra traffic(
RAM_SAVE_FLAG_MULTIFD_FLUSH) to destination and cause migration to fail
even though multifd is disabled.

This change make migrate_multifd_flush_after_each_section() return true
when multifd is disabled, that also means RAM_SAVE_FLAG_MULTIFD_FLUSH
will not be sent to destination any more when multifd is disabled.

Fixes: 294e5a4034 ("multifd: Only flush once each full round of memory")
CC: Fabiano Rosas 
Signed-off-by: Li Zhijian 
---

V2: put that check at the entry of migrate_multifd_flush_after_each_section() # 
Peter
---
 migration/options.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/migration/options.c b/migration/options.c
index 1d1e1321b0..327bcf2fbe 100644
--- a/migration/options.c
+++ b/migration/options.c
@@ -368,7 +368,7 @@ bool migrate_multifd_flush_after_each_section(void)
 {
 MigrationState *s = migrate_get_current();
 
-return s->multifd_flush_after_each_section;
+return !migrate_multifd() || s->multifd_flush_after_each_section;
 }
 
 bool migrate_postcopy(void)
-- 
2.31.1




[PATCH v2 2/2] migration/rdma: zore out head.repeat to make the error more clear

2023-09-26 Thread Li Zhijian
Previously, we got a confusion error that complains
the RDMAControlHeader.repeat:
qemu-system-x86_64: rdma: Too many requests in this message 
(3638950032).Bailing.

Actually, it's caused by an unexpected RDMAControlHeader.type.
After this patch, error will become:
qemu-system-x86_64: Unknown control message QEMU FILE

Reviewed-by: Fabiano Rosas 
Reviewed-by: Peter Xu 
Signed-off-by: Li Zhijian 

---
V2: add reviewed-by tags
---
 migration/rdma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/migration/rdma.c b/migration/rdma.c
index a2a3db35b1..3073d9953c 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -2812,7 +2812,7 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc,
 size_t remaining = iov[i].iov_len;
 uint8_t * data = (void *)iov[i].iov_base;
 while (remaining) {
-RDMAControlHeader head;
+RDMAControlHeader head = {};
 
 len = MIN(remaining, RDMA_SEND_INCREMENT);
 remaining -= len;
-- 
2.31.1




[PATCH RFC V2 00/37] Support of Virtual CPU Hotplug for ARMv8 Arch

2023-09-26 Thread Salil Mehta via
[ *REPEAT: Sent patches got held at internal server yesterday* ]

PROLOGUE


To assist in review and set the right expectations from this RFC, please first
read below sections *APPENDED AT THE END* of this cover letter,

1. Important *DISCLAIMER* [Section (X)]
2. Work presented at KVMForum Conference (slides available) [Section (V)F]
3. Organization of patches [Section (XI)]
4. References [Section (XII)]
5. Detailed TODO list of the leftover work or work-in-progress [Section (IX)]

NOTE: There has been an interest shown by other organizations in adapting
this series for their architecture. I am planning to split this RFC into
architecture *agnostic* and *specific* patch-sets in subsequent releases. ARM
specific patch-set will continue as RFC V3 and architecture agnostic patch-set
will be floated without RFC tag and can be consumed in this Qemu cycle if
MAINTAINERs ack it.

[Please check section (XI)B for details of architecture agnostic patches]


SECTIONS [I - XIII] are as follows :

(I) Key Changes (RFC V1 -> RFC V2)
==

RFC V1: 
https://lore.kernel.org/qemu-devel/20200613213629.21984-1-salil.me...@huawei.com/

1. ACPI MADT Table GIC CPU Interface can now be presented [6] as ACPI
   *online-capable* or *enabled* to the Guest OS at the boot time. This means
   associated CPUs can have ACPI _STA as *enabled* or *disabled* even after boot
   See, UEFI ACPI 6.5 Spec, Section 05, Table 5.37 GICC CPU Interface Flags[20]
2. SMCC/HVC Hypercall exit handling in userspace/Qemu for PSCI CPU_{ON,OFF}
   request. This is required to {dis}allow online'ing a vCPU.
3. Always presenting unplugged vCPUs in CPUs ACPI AML code as ACPI _STA.PRESENT 
   to the Guest OS. Toggling ACPI _STA.Enabled to give an effect of the
   hot{un}plug.
4. Live Migration works (some issues are still there)
5. TCG/HVF/qtest does not support Hotplug and falls back to default.
6. Code for TCG support do exists in this release (it is a work-in-progress)
7. ACPI _OSC method can now be used by OSPM to negotiate Qemu VM platform
   hotplug capability (_OSC Query support still pending)
8. Misc. Bug fixes

(II) Summary
 ===

This patch-set introduces the virtual CPU hotplug support for ARMv8 architecture
in QEMU. Idea is to be able to hotplug and hot-unplug the vCPUs while guest VM
is running and no reboot is required. This does *not* makes any assumption of
the physical CPU hotplug availability within the host system but rather tries to
solve the problem at virtualizer/QEMU layer. Introduces ACPI CPU hotplug hooks
and event handling to interface with the guest kernel, code to initialize, plug
and unplug CPUs. No changes are required within the host kernel/KVM except the
support of hypercall exit handling in the user-space/Qemu which has recently
been added to the kernel. Its corresponding Guest kernel changes have been
posted on the mailing-list [3] [4] by James Morse.

(III) Motivation
  ==

This allows scaling the guest VM compute capacity on-demand which would be
useful for the following example scenarios,

1. Vertical Pod Autoscaling [9][10] in the cloud: Part of the orchestration
   framework which could adjust resource requests (CPU and Mem requests) for
   the containers in a pod, based on usage.
2. Pay-as-you-grow Business Model: Infrastructure provider could allocate and
   restrict the total number of compute resources available to the guest VM
   according to the SLA (Service Level Agreement). VM owner could request for
   more compute to be hot-plugged for some cost.

For example, Kata Container VM starts with a minimum amount of resources (i.e.
hotplug everything approach). why?

1. Allowing faster *boot time* and
2. Reduction in *memory footprint*

Kata Container VM can boot with just 1 vCPU and then later more vCPUs can be
hot-plugged as per requirement.

(IV) Terminology
 ===

(*) Posssible CPUs: Total vCPUs which could ever exist in VM. This includes
any cold booted CPUs plus any CPUs which could be later
hot-plugged.
- Qemu parameter(-smp maxcpus=N)
(*) Present CPUs:   Possible CPUs which are ACPI 'present'. These might or might
not be ACPI 'enabled'. 
- Present vCPUs = Possible vCPUs (Always on ARM Arch)
(*) Enabled CPUs:   Possible CPUs which are ACPI ‘present’ and 'enabled' and can
now be ‘onlined’ (PSCI) for use by Guest Kernel. All cold
booted vCPUs are ACPI 'enabled' at boot. Later, using
device_add more vCPUs can be hotplugged and be made ACPI
'enabled.
- Qemu parameter(-smp cpus=N). Can be used to specify some
  cold booted vCPUs during VM init. Some can be added using
  '-device' option.

(V) Constraints Due To ARMv8 CPU Architecture [+] Other Impediments
===

[PATCH RFC V2 01/37] arm/virt, target/arm: Add new ARMCPU {socket, cluster, core, thread}-id property

2023-09-26 Thread Salil Mehta via
This shall be used to store user specified topology{socket,cluster,core,thread}
and shall be converted to a unique 'vcpu-id' which is used as slot-index during
hot(un)plug of vCPU.

Co-developed-by: Salil Mehta 
Signed-off-by: Salil Mehta 
Co-developed-by: Keqian Zhu 
Signed-off-by: Keqian Zhu 
Signed-off-by: Salil Mehta 
---
 hw/arm/virt.c| 63 
 target/arm/cpu.c |  4 +++
 target/arm/cpu.h |  4 +++
 3 files changed, 71 insertions(+)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 7d9dbc2663..57fe97c242 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -221,6 +221,11 @@ static const char *valid_cpus[] = {
 ARM_CPU_TYPE_NAME("max"),
 };
 
+static int virt_get_socket_id(const MachineState *ms, int cpu_index);
+static int virt_get_cluster_id(const MachineState *ms, int cpu_index);
+static int virt_get_core_id(const MachineState *ms, int cpu_index);
+static int virt_get_thread_id(const MachineState *ms, int cpu_index);
+
 static bool cpu_type_valid(const char *cpu)
 {
 int i;
@@ -2168,6 +2173,14 @@ static void machvirt_init(MachineState *machine)
   &error_fatal);
 
 aarch64 &= object_property_get_bool(cpuobj, "aarch64", NULL);
+object_property_set_int(cpuobj, "socket-id",
+virt_get_socket_id(machine, n), NULL);
+object_property_set_int(cpuobj, "cluster-id",
+virt_get_cluster_id(machine, n), NULL);
+object_property_set_int(cpuobj, "core-id",
+virt_get_core_id(machine, n), NULL);
+object_property_set_int(cpuobj, "thread-id",
+virt_get_thread_id(machine, n), NULL);
 
 if (!vms->secure) {
 object_property_set_bool(cpuobj, "has_el3", false, NULL);
@@ -2652,10 +2665,59 @@ static int64_t virt_get_default_cpu_node_id(const 
MachineState *ms, int idx)
 return socket_id % ms->numa_state->num_nodes;
 }
 
+static int virt_get_socket_id(const MachineState *ms, int cpu_index)
+{
+assert(cpu_index >= 0 && cpu_index < ms->possible_cpus->len);
+
+return ms->possible_cpus->cpus[cpu_index].props.socket_id;
+}
+
+static int virt_get_cluster_id(const MachineState *ms, int cpu_index)
+{
+assert(cpu_index >= 0 && cpu_index < ms->possible_cpus->len);
+
+return ms->possible_cpus->cpus[cpu_index].props.cluster_id;
+}
+
+static int virt_get_core_id(const MachineState *ms, int cpu_index)
+{
+assert(cpu_index >= 0 && cpu_index < ms->possible_cpus->len);
+
+return ms->possible_cpus->cpus[cpu_index].props.core_id;
+}
+
+static int virt_get_thread_id(const MachineState *ms, int cpu_index)
+{
+assert(cpu_index >= 0 && cpu_index < ms->possible_cpus->len);
+
+return ms->possible_cpus->cpus[cpu_index].props.thread_id;
+}
+
+static int
+virt_get_cpu_id_from_cpu_topo(const MachineState *ms, DeviceState *dev)
+{
+int cpu_id, sock_vcpu_num, clus_vcpu_num, core_vcpu_num;
+ARMCPU *cpu = ARM_CPU(dev);
+
+/* calculate total logical cpus across socket/cluster/core */
+sock_vcpu_num = cpu->socket_id * (ms->smp.threads * ms->smp.cores *
+ms->smp.clusters);
+clus_vcpu_num = cpu->cluster_id * (ms->smp.threads * ms->smp.cores);
+core_vcpu_num = cpu->core_id * ms->smp.threads;
+
+/* get vcpu-id(logical cpu index) for this vcpu from this topology */
+cpu_id = (sock_vcpu_num + clus_vcpu_num + core_vcpu_num) + cpu->thread_id;
+
+assert(cpu_id >= 0 && cpu_id < ms->possible_cpus->len);
+
+return cpu_id;
+}
+
 static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms)
 {
 int n;
 unsigned int max_cpus = ms->smp.max_cpus;
+unsigned int smp_threads = ms->smp.threads;
 VirtMachineState *vms = VIRT_MACHINE(ms);
 MachineClass *mc = MACHINE_GET_CLASS(vms);
 
@@ -2669,6 +2731,7 @@ static const CPUArchIdList 
*virt_possible_cpu_arch_ids(MachineState *ms)
 ms->possible_cpus->len = max_cpus;
 for (n = 0; n < ms->possible_cpus->len; n++) {
 ms->possible_cpus->cpus[n].type = ms->cpu_type;
+ms->possible_cpus->cpus[n].vcpus_count = smp_threads;
 ms->possible_cpus->cpus[n].arch_id =
 virt_cpu_mp_affinity(vms, n);
 
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 93c28d50e5..1376350416 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -2277,6 +2277,10 @@ static Property arm_cpu_properties[] = {
 DEFINE_PROP_UINT64("mp-affinity", ARMCPU,
 mp_affinity, ARM64_AFFINITY_INVALID),
 DEFINE_PROP_INT32("node-id", ARMCPU, node_id, CPU_UNSET_NUMA_NODE_ID),
+DEFINE_PROP_INT32("socket-id", ARMCPU, socket_id, 0),
+DEFINE_PROP_INT32("cluster-id", ARMCPU, cluster_id, 0),
+DEFINE_PROP_INT32("core-id", ARMCPU, core_id, 0),
+DEFINE_PROP_INT32("thread-id", ARMCPU, thread_id, 0),
 DEFINE_PROP_INT32("core-count", ARMCPU, core_count, -1),
 DEFINE_PROP_END_OF_LIST()
 };
diff --git a/target/arm/cpu.

[PATCH RFC V2 02/37] cpus-common: Add common CPU utility for possible vCPUs

2023-09-26 Thread Salil Mehta via
Adds various utility functions which might be required to fetch or check the
state of the possible vCPUs. This also introduces concept of *disabled* vCPUs,
which are part of the *possible* vCPUs but are not part of the *present* vCPU.
This state shall be used during machine init time to check the presence of
vcpus.

Co-developed-by: Salil Mehta 
Signed-off-by: Salil Mehta 
Co-developed-by: Keqian Zhu 
Signed-off-by: Keqian Zhu 
Signed-off-by: Salil Mehta 
---
 cpus-common.c | 31 +
 include/hw/core/cpu.h | 53 +++
 2 files changed, 84 insertions(+)

diff --git a/cpus-common.c b/cpus-common.c
index 45c745ecf6..24c04199a1 100644
--- a/cpus-common.c
+++ b/cpus-common.c
@@ -24,6 +24,7 @@
 #include "sysemu/cpus.h"
 #include "qemu/lockable.h"
 #include "trace/trace-root.h"
+#include "hw/boards.h"
 
 QemuMutex qemu_cpu_list_lock;
 static QemuCond exclusive_cond;
@@ -107,6 +108,36 @@ void cpu_list_remove(CPUState *cpu)
 cpu_list_generation_id++;
 }
 
+CPUState *qemu_get_possible_cpu(int index)
+{
+MachineState *ms = MACHINE(qdev_get_machine());
+const CPUArchIdList *possible_cpus = ms->possible_cpus;
+
+assert((index >= 0) && (index < possible_cpus->len));
+
+return CPU(possible_cpus->cpus[index].cpu);
+}
+
+bool qemu_present_cpu(CPUState *cpu)
+{
+return cpu;
+}
+
+bool qemu_enabled_cpu(CPUState *cpu)
+{
+return cpu && !cpu->disabled;
+}
+
+uint64_t qemu_get_cpu_archid(int cpu_index)
+{
+MachineState *ms = MACHINE(qdev_get_machine());
+const CPUArchIdList *possible_cpus = ms->possible_cpus;
+
+assert((cpu_index >= 0) && (cpu_index < possible_cpus->len));
+
+return possible_cpus->cpus[cpu_index].arch_id;
+}
+
 CPUState *qemu_get_cpu(int index)
 {
 CPUState *cpu;
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index fdcbe87352..e5af79950c 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -413,6 +413,17 @@ struct CPUState {
 SavedIOTLB saved_iotlb;
 #endif
 
+/*
+ * Some architectures do not allow *presence* of vCPUs to be changed
+ * after guest has booted using information specified by VMM/firmware
+ * via ACPI MADT at the boot time. Thus to enable vCPU hotplug on these
+ * architectures possible vCPU can have CPUState object in 'disabled'
+ * state or can also not have CPUState object at all. This is possible
+ * when vCPU Hotplug is supported and vCPUs are 'yet-to-be-plugged' in
+ * the QOM or have been hot-unplugged.
+ * By default every CPUState is enabled as of now across all archs.
+ */
+bool disabled;
 /* TODO Move common fields from CPUArchState here. */
 int cpu_index;
 int cluster_index;
@@ -770,6 +781,48 @@ static inline bool cpu_in_exclusive_context(const CPUState 
*cpu)
  */
 CPUState *qemu_get_cpu(int index);
 
+/**
+ * qemu_get_possible_cpu:
+ * @index: The CPUState@cpu_index value of the CPU to obtain.
+ * Input index MUST be in range [0, Max Possible CPUs)
+ *
+ * If CPUState object exists,then it gets a CPU matching
+ * @index in the possible CPU array.
+ *
+ * Returns: The possible CPU or %NULL if CPU does not exist.
+ */
+CPUState *qemu_get_possible_cpu(int index);
+
+/**
+ * qemu_present_cpu:
+ * @cpu: The vCPU to check
+ *
+ * Checks if the vCPU is amongst the present possible vcpus.
+ *
+ * Returns: True if it is present possible vCPU else false
+ */
+bool qemu_present_cpu(CPUState *cpu);
+
+/**
+ * qemu_enabled_cpu:
+ * @cpu: The vCPU to check
+ *
+ * Checks if the vCPU is enabled.
+ *
+ * Returns: True if it is 'enabled' else false
+ */
+bool qemu_enabled_cpu(CPUState *cpu);
+
+/**
+ * qemu_get_cpu_archid:
+ * @cpu_index: possible vCPU for which arch-id needs to be retreived
+ *
+ * Fetches the vCPU arch-id from the present possible vCPUs.
+ *
+ * Returns: arch-id of the possible vCPU
+ */
+uint64_t qemu_get_cpu_archid(int cpu_index);
+
 /**
  * cpu_exists:
  * @id: Guest-exposed CPU ID to lookup.
-- 
2.34.1




[PATCH RFC V2 04/37] arm/virt, target/arm: Machine init time change common to vCPU {cold|hot}-plug

2023-09-26 Thread Salil Mehta via
Refactor and introduce the common logic required during the initialization of
both cold and hot plugged vCPUs. Also initialize the *disabled* state of the
vCPUs which shall be used further during init phases of various other components
like GIC, PMU, ACPI etc as part of the virt machine initialization.

KVM vCPUs corresponding to unplugged/yet-to-be-plugged QOM CPUs are kept in
powered-off state in the KVM Host and do not run the guest code. Plugged vCPUs
are also kept in powered-off state but vCPU threads exist and is kept sleeping.

TBD:
For the cold booted vCPUs, this change also exists in the arm_load_kernel()
in boot.c but for the hotplugged CPUs this change should still remain part of
the pre-plug phase. We are duplicating the powering-off of the cold booted CPUs.
Shall we remove the duplicate change from boot.c?

Co-developed-by: Salil Mehta 
Signed-off-by: Salil Mehta 
Co-developed-by: Keqian Zhu 
Signed-off-by: Keqian Zhu 
Reported-by: Gavin Shan 
[GS: pointed the assertion due to wrong range check]
Signed-off-by: Salil Mehta 
---
 hw/arm/virt.c  | 149 -
 target/arm/cpu.c   |   7 +++
 target/arm/cpu64.c |  14 +
 3 files changed, 156 insertions(+), 14 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 0eb6bf5a18..3668ad27ec 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -221,6 +221,7 @@ static const char *valid_cpus[] = {
 ARM_CPU_TYPE_NAME("max"),
 };
 
+static CPUArchId *virt_find_cpu_slot(MachineState *ms, int vcpuid);
 static int virt_get_socket_id(const MachineState *ms, int cpu_index);
 static int virt_get_cluster_id(const MachineState *ms, int cpu_index);
 static int virt_get_core_id(const MachineState *ms, int cpu_index);
@@ -2154,6 +2155,14 @@ static void machvirt_init(MachineState *machine)
 exit(1);
 }
 
+finalize_gic_version(vms);
+if (tcg_enabled() || hvf_enabled() || qtest_enabled() ||
+(vms->gic_version < VIRT_GIC_VERSION_3)) {
+machine->smp.max_cpus = smp_cpus;
+mc->has_hotpluggable_cpus = false;
+warn_report("cpu hotplug feature has been disabled");
+}
+
 possible_cpus = mc->possible_cpu_arch_ids(machine);
 
 /*
@@ -2180,11 +2189,6 @@ static void machvirt_init(MachineState *machine)
 virt_set_memmap(vms, pa_bits);
 }
 
-/* We can probe only here because during property set
- * KVM is not available yet
- */
-finalize_gic_version(vms);
-
 sysmem = vms->sysmem = get_system_memory();
 
 if (vms->secure) {
@@ -2289,17 +2293,9 @@ static void machvirt_init(MachineState *machine)
 assert(possible_cpus->len == max_cpus);
 for (n = 0; n < possible_cpus->len; n++) {
 Object *cpuobj;
-CPUState *cs;
-
-if (n >= smp_cpus) {
-break;
-}
 
 cpuobj = object_new(possible_cpus->cpus[n].type);
 
-cs = CPU(cpuobj);
-cs->cpu_index = n;
-
 aarch64 &= object_property_get_bool(cpuobj, "aarch64", NULL);
 object_property_set_int(cpuobj, "socket-id",
 virt_get_socket_id(machine, n), NULL);
@@ -2804,6 +2800,50 @@ static const CPUArchIdList 
*virt_possible_cpu_arch_ids(MachineState *ms)
 return ms->possible_cpus;
 }
 
+static CPUArchId *virt_find_cpu_slot(MachineState *ms, int vcpuid)
+{
+VirtMachineState *vms = VIRT_MACHINE(ms);
+CPUArchId *found_cpu;
+uint64_t mp_affinity;
+
+assert(vcpuid >= 0 && vcpuid < ms->possible_cpus->len);
+
+/*
+ * RFC: Question:
+ * TBD: Should mp-affinity be treated as MPIDR?
+ */
+mp_affinity = virt_cpu_mp_affinity(vms, vcpuid);
+found_cpu = &ms->possible_cpus->cpus[vcpuid];
+
+assert(found_cpu->arch_id == mp_affinity);
+
+/*
+ * RFC: Question:
+ * Slot-id is the index where vCPU with certain arch-id(=mpidr/ap-affinity)
+ * is plugged. For Host KVM, MPIDR for vCPU is derived using vcpu-id.
+ * As I understand, MPIDR and vcpu-id are property of vCPU but slot-id is
+ * more related to machine? Current code assumes slot-id and vcpu-id are
+ * same i.e. meaning of slot is bit vague.
+ *
+ * Q1: Is there any requirement to clearly represent slot and dissociate it
+ * from vcpu-id?
+ * Q2: Should we make MPIDR within host KVM user configurable?
+ *
+ *  +++++++++
+ * MPIDR|||  Res  |   Aff2  |   Aff1  |  Aff0   |
+ *  +++++++++
+ * \ \ \   ||
+ *  \   8bit  \   8bit  \  |4bit|
+ *   \<--->\<--->\ |<-->|
+ *\ \ \||
+ *  +++++++++
+ * VCPU-ID  |  Byte4  |  Byte2  |  Byte1  |  Byte0  |
+ *  +++++++++
+ */
+
+return found_cpu;
+}
+
 static void virt_memory_pre_plug(H

[PATCH RFC V2 05/37] accel/kvm: Extract common KVM vCPU {creation, parking} code

2023-09-26 Thread Salil Mehta via
KVM vCPU creation is done once during the initialization of the VM when Qemu
threads are spawned. This is common to all the architectures. If the 
architecture
supports vCPU hot-{un}plug then this KVM vCPU creation could be deferred to
later point as well. Some architectures might in any case create KVM vCPUs for
the yet-to-be plugged vCPUs (i.e. QoM Object & thread does not exists) during VM
init time and park them.

Hot-unplug of vCPU results in destruction of the vCPU objects in QOM but
the KVM vCPU objects in the Host KVM are not destroyed and their representative
KVM vCPU objects in Qemu are parked.

Signed-off-by: Salil Mehta 
---
 accel/kvm/kvm-all.c  | 61 ++--
 include/sysemu/kvm.h |  2 ++
 2 files changed, 49 insertions(+), 14 deletions(-)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 7b3da8dc3a..86e9c9ea60 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -137,6 +137,7 @@ static QemuMutex kml_slots_lock;
 #define kvm_slots_unlock()  qemu_mutex_unlock(&kml_slots_lock)
 
 static void kvm_slot_init_dirty_bitmap(KVMSlot *mem);
+static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id);
 
 static inline void kvm_resample_fd_remove(int gsi)
 {
@@ -320,11 +321,51 @@ err:
 return ret;
 }
 
+void kvm_park_vcpu(CPUState *cpu)
+{
+unsigned long vcpu_id = cpu->cpu_index;
+struct KVMParkedVcpu *vcpu;
+
+vcpu = g_malloc0(sizeof(*vcpu));
+vcpu->vcpu_id = vcpu_id;
+vcpu->kvm_fd = cpu->kvm_fd;
+QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
+}
+
+int kvm_create_vcpu(CPUState *cpu)
+{
+unsigned long vcpu_id = cpu->cpu_index;
+KVMState *s = kvm_state;
+int ret;
+
+DPRINTF("kvm_create_vcpu\n");
+
+/* check if the KVM vCPU already exist but is parked */
+ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu));
+if (ret > 0) {
+goto found;
+}
+
+/* create a new KVM vcpu */
+ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id);
+if (ret < 0) {
+return ret;
+}
+
+found:
+cpu->vcpu_dirty = true;
+cpu->kvm_fd = ret;
+cpu->kvm_state = s;
+cpu->dirty_pages = 0;
+cpu->throttle_us_per_full = 0;
+
+return 0;
+}
+
 static int do_kvm_destroy_vcpu(CPUState *cpu)
 {
 KVMState *s = kvm_state;
 long mmap_size;
-struct KVMParkedVcpu *vcpu = NULL;
 int ret = 0;
 
 DPRINTF("kvm_destroy_vcpu\n");
@@ -353,10 +394,7 @@ static int do_kvm_destroy_vcpu(CPUState *cpu)
 }
 }
 
-vcpu = g_malloc0(sizeof(*vcpu));
-vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
-vcpu->kvm_fd = cpu->kvm_fd;
-QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
+kvm_park_vcpu(cpu);
 err:
 return ret;
 }
@@ -384,7 +422,7 @@ static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id)
 }
 }
 
-return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id);
+return -1;
 }
 
 int kvm_init_vcpu(CPUState *cpu, Error **errp)
@@ -395,19 +433,14 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
 
 trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
 
-ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu));
+ret = kvm_create_vcpu(cpu);
 if (ret < 0) {
-error_setg_errno(errp, -ret, "kvm_init_vcpu: kvm_get_vcpu failed 
(%lu)",
+error_setg_errno(errp, -ret,
+ "kvm_init_vcpu: kvm_create_vcpu failed (%lu)",
  kvm_arch_vcpu_id(cpu));
 goto err;
 }
 
-cpu->kvm_fd = ret;
-cpu->kvm_state = s;
-cpu->vcpu_dirty = true;
-cpu->dirty_pages = 0;
-cpu->throttle_us_per_full = 0;
-
 mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
 if (mmap_size < 0) {
 ret = mmap_size;
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 115f0cca79..2c34889b01 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -473,6 +473,8 @@ void kvm_set_sigmask_len(KVMState *s, unsigned int 
sigmask_len);
 
 int kvm_physical_memory_addr_from_host(KVMState *s, void *ram_addr,
hwaddr *phys_addr);
+int kvm_create_vcpu(CPUState *cpu);
+void kvm_park_vcpu(CPUState *cpu);
 
 #endif /* NEED_CPU_H */
 
-- 
2.34.1




[PATCH RFC V2 03/37] hw/arm/virt: Move setting of common CPU properties in a function

2023-09-26 Thread Salil Mehta via
Factor out CPU properties code common for {hot,cold}-plugged CPUs. This allows
code reuse.

Signed-off-by: Salil Mehta 
---
 hw/arm/virt.c | 220 ++
 include/hw/arm/virt.h |   4 +
 2 files changed, 140 insertions(+), 84 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 57fe97c242..0eb6bf5a18 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2018,16 +2018,130 @@ static void virt_cpu_post_init(VirtMachineState *vms, 
MemoryRegion *sysmem)
 }
 }
 
+static void virt_cpu_set_properties(Object *cpuobj, const CPUArchId *cpu_slot,
+Error **errp)
+{
+MachineState *ms = MACHINE(qdev_get_machine());
+VirtMachineState *vms = VIRT_MACHINE(ms);
+Error *local_err = NULL;
+VirtMachineClass *vmc;
+
+vmc = VIRT_MACHINE_GET_CLASS(ms);
+
+/* now, set the cpu object property values */
+numa_cpu_pre_plug(cpu_slot, DEVICE(cpuobj), &local_err);
+if (local_err) {
+goto out;
+}
+
+object_property_set_int(cpuobj, "mp-affinity", cpu_slot->arch_id, NULL);
+
+if (!vms->secure) {
+object_property_set_bool(cpuobj, "has_el3", false, NULL);
+}
+
+if (!vms->virt && object_property_find(cpuobj, "has_el2")) {
+object_property_set_bool(cpuobj, "has_el2", false, NULL);
+}
+
+if (vmc->kvm_no_adjvtime &&
+object_property_find(cpuobj, "kvm-no-adjvtime")) {
+object_property_set_bool(cpuobj, "kvm-no-adjvtime", true, NULL);
+}
+
+if (vmc->no_kvm_steal_time &&
+object_property_find(cpuobj, "kvm-steal-time")) {
+object_property_set_bool(cpuobj, "kvm-steal-time", false, NULL);
+}
+
+if (vmc->no_pmu && object_property_find(cpuobj, "pmu")) {
+object_property_set_bool(cpuobj, "pmu", false, NULL);
+}
+
+if (vmc->no_tcg_lpa2 && object_property_find(cpuobj, "lpa2")) {
+object_property_set_bool(cpuobj, "lpa2", false, NULL);
+}
+
+if (object_property_find(cpuobj, "reset-cbar")) {
+object_property_set_int(cpuobj, "reset-cbar",
+vms->memmap[VIRT_CPUPERIPHS].base,
+&local_err);
+if (local_err) {
+goto out;
+}
+}
+
+/* link already initialized {secure,tag}-memory regions to this cpu */
+object_property_set_link(cpuobj, "memory", OBJECT(vms->sysmem), 
&local_err);
+if (local_err) {
+goto out;
+}
+
+if (vms->secure) {
+object_property_set_link(cpuobj, "secure-memory",
+ OBJECT(vms->secure_sysmem), &local_err);
+if (local_err) {
+goto out;
+}
+}
+
+if (vms->mte) {
+if (!object_property_find(cpuobj, "tag-memory")) {
+error_setg(&local_err, "MTE requested, but not supported "
+   "by the guest CPU");
+if (local_err) {
+goto out;
+}
+}
+
+object_property_set_link(cpuobj, "tag-memory", OBJECT(vms->tag_sysmem),
+ &local_err);
+if (local_err) {
+goto out;
+}
+
+if (vms->secure) {
+object_property_set_link(cpuobj, "secure-tag-memory",
+ OBJECT(vms->secure_tag_sysmem),
+ &local_err);
+if (local_err) {
+goto out;
+}
+}
+}
+
+/*
+ * RFC: Question: this must only be called for the hotplugged cpus. For the
+ * cold booted secondary cpus this is being taken care in arm_load_kernel()
+ * in boot.c. Perhaps we should remove that code now?
+ */
+if (vms->psci_conduit != QEMU_PSCI_CONDUIT_DISABLED) {
+object_property_set_int(cpuobj, "psci-conduit", vms->psci_conduit,
+NULL);
+
+/* Secondary CPUs start in PSCI powered-down state */
+if (CPU(cpuobj)->cpu_index > 0) {
+object_property_set_bool(cpuobj, "start-powered-off", true, NULL);
+}
+}
+
+out:
+if (local_err) {
+error_propagate(errp, local_err);
+}
+return;
+}
+
 static void machvirt_init(MachineState *machine)
 {
 VirtMachineState *vms = VIRT_MACHINE(machine);
 VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(machine);
 MachineClass *mc = MACHINE_GET_CLASS(machine);
 const CPUArchIdList *possible_cpus;
-MemoryRegion *sysmem = get_system_memory();
+MemoryRegion *secure_tag_sysmem = NULL;
 MemoryRegion *secure_sysmem = NULL;
 MemoryRegion *tag_sysmem = NULL;
-MemoryRegion *secure_tag_sysmem = NULL;
+MemoryRegion *sysmem;
 int n, virt_max_cpus;
 bool firmware_loaded;
 bool aarch64 = true;
@@ -2071,6 +2185,8 @@ static void machvirt_init(MachineState *machine)
  */
 finalize_gic_version(vms);
 
+sysmem = vms->sysmem = get_system_memory();
+
 if (vms->secure) {
 /*
  * The Secu

[PATCH RFC V2 08/37] arm/virt: Init PMU at host for all possible vcpus

2023-09-26 Thread Salil Mehta via
PMU for all possible vCPUs must be initialized at the VM initialization time.
Refactor existing code to accomodate possible vCPUs. This also assumes that all
processor being used are identical.

Past discussion for reference:
Link: https://lists.gnu.org/archive/html/qemu-devel/2020-06/msg00131.html

Co-developed-by: Salil Mehta 
Signed-off-by: Salil Mehta 
Co-developed-by: Keqian Zhu 
Signed-off-by: Keqian Zhu 
Signed-off-by: Salil Mehta 
---
 hw/arm/virt.c | 12 
 include/hw/arm/virt.h |  1 +
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index a208b4e517..070c36054e 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -1960,12 +1960,14 @@ static void finalize_gic_version(VirtMachineState *vms)
  */
 static void virt_cpu_post_init(VirtMachineState *vms, MemoryRegion *sysmem)
 {
+CPUArchIdList *possible_cpus = vms->parent.possible_cpus;
 int max_cpus = MACHINE(vms)->smp.max_cpus;
-bool aarch64, pmu, steal_time;
+bool aarch64, steal_time;
 CPUState *cpu;
+int n;
 
 aarch64 = object_property_get_bool(OBJECT(first_cpu), "aarch64", NULL);
-pmu = object_property_get_bool(OBJECT(first_cpu), "pmu", NULL);
+vms->pmu = object_property_get_bool(OBJECT(first_cpu), "pmu", NULL);
 steal_time = object_property_get_bool(OBJECT(first_cpu),
   "kvm-steal-time", NULL);
 
@@ -1992,8 +1994,10 @@ static void virt_cpu_post_init(VirtMachineState *vms, 
MemoryRegion *sysmem)
 memory_region_add_subregion(sysmem, pvtime_reg_base, pvtime);
 }
 
-CPU_FOREACH(cpu) {
-if (pmu) {
+for (n = 0; n < possible_cpus->len; n++) {
+cpu = qemu_get_possible_cpu(n);
+
+if (vms->pmu) {
 assert(arm_feature(&ARM_CPU(cpu)->env, ARM_FEATURE_PMU));
 if (kvm_irqchip_in_kernel()) {
 kvm_arm_pmu_set_irq(cpu, PPI(VIRTUAL_PMU_IRQ));
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index 098c7917a4..fc0469c33f 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -164,6 +164,7 @@ struct VirtMachineState {
 bool ras;
 bool mte;
 bool dtb_randomness;
+bool pmu;
 OnOffAuto acpi;
 VirtGICType gic_version;
 VirtIOMMUType iommu;
-- 
2.34.1




[PATCH RFC V2 09/37] hw/acpi: Move CPU ctrl-dev MMIO region len macro to common header file

2023-09-26 Thread Salil Mehta via
CPU ctrl-dev MMIO region length could be used in ACPI GED (common ACPI code
across architectures) and various other architecture specific places. To make
these code places independent of compilation order, ACPI_CPU_HOTPLUG_REG_LEN
macro should be moved to a header file.

Signed-off-by: Salil Mehta 
---
 hw/acpi/cpu.c | 2 +-
 include/hw/acpi/cpu_hotplug.h | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c
index 19c154d78f..45defdc0e2 100644
--- a/hw/acpi/cpu.c
+++ b/hw/acpi/cpu.c
@@ -1,12 +1,12 @@
 #include "qemu/osdep.h"
 #include "migration/vmstate.h"
 #include "hw/acpi/cpu.h"
+#include "hw/acpi/cpu_hotplug.h"
 #include "qapi/error.h"
 #include "qapi/qapi-events-acpi.h"
 #include "trace.h"
 #include "sysemu/numa.h"
 
-#define ACPI_CPU_HOTPLUG_REG_LEN 12
 #define ACPI_CPU_SELECTOR_OFFSET_WR 0
 #define ACPI_CPU_FLAGS_OFFSET_RW 4
 #define ACPI_CPU_CMD_OFFSET_WR 5
diff --git a/include/hw/acpi/cpu_hotplug.h b/include/hw/acpi/cpu_hotplug.h
index 3b932a..48b291e45e 100644
--- a/include/hw/acpi/cpu_hotplug.h
+++ b/include/hw/acpi/cpu_hotplug.h
@@ -19,6 +19,8 @@
 #include "hw/hotplug.h"
 #include "hw/acpi/cpu.h"
 
+#define ACPI_CPU_HOTPLUG_REG_LEN 12
+
 typedef struct AcpiCpuHotplug {
 Object *device;
 MemoryRegion io;
-- 
2.34.1




[PATCH RFC V2 06/37] arm/virt, kvm: Pre-create disabled possible vCPUs @machine init

2023-09-26 Thread Salil Mehta via
In ARMv8 architecture, GIC needs all the vCPUs to be created and present when
it is initialized. This is because:
1. GICC and MPIDR association must be fixed at the VM initialization time.
   This is represented by register GIC_TYPER(mp_afffinity, proc_num)
2. GICC(cpu interfaces), GICR(redistributors) etc all must be initialized
   at the boot time as well.
3. Memory regions associated with GICR etc. cannot be changed(add/del/mod)
   after VM has inited.

This patch adds the support to pre-create all such possible vCPUs within the
host using the KVM interface as part of the virt machine initialization. These
vCPUs could later be attached to QOM/ACPI while they are actually hot plugged
and made present.

Co-developed-by: Salil Mehta 
Signed-off-by: Salil Mehta 
Co-developed-by: Keqian Zhu 
Signed-off-by: Keqian Zhu 
Reported-by: Vishnu Pajjuri 
[VP: Identified CPU stall issue & suggested probable fix]
Signed-off-by: Salil Mehta 
---
 hw/arm/virt.c | 53 +--
 include/hw/core/cpu.h |  1 +
 target/arm/cpu64.c|  1 +
 target/arm/kvm.c  | 32 ++
 target/arm/kvm64.c|  9 +++-
 target/arm/kvm_arm.h  | 11 +
 6 files changed, 104 insertions(+), 3 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 3668ad27ec..6ba131b799 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2293,8 +2293,10 @@ static void machvirt_init(MachineState *machine)
 assert(possible_cpus->len == max_cpus);
 for (n = 0; n < possible_cpus->len; n++) {
 Object *cpuobj;
+CPUState *cs;
 
 cpuobj = object_new(possible_cpus->cpus[n].type);
+cs = CPU(cpuobj);
 
 aarch64 &= object_property_get_bool(cpuobj, "aarch64", NULL);
 object_property_set_int(cpuobj, "socket-id",
@@ -2306,8 +2308,55 @@ static void machvirt_init(MachineState *machine)
 object_property_set_int(cpuobj, "thread-id",
 virt_get_thread_id(machine, n), NULL);
 
-qdev_realize(DEVICE(cpuobj), NULL, &error_fatal);
-object_unref(cpuobj);
+if (n < smp_cpus) {
+qdev_realize(DEVICE(cpuobj), NULL, &error_fatal);
+object_unref(cpuobj);
+} else {
+CPUArchId *cpu_slot;
+
+/* handling for vcpus which are yet to be hot-plugged */
+cs->cpu_index = n;
+cpu_slot = virt_find_cpu_slot(machine, cs->cpu_index);
+
+/*
+ * ARM host vCPU features need to be fixed at the boot time. But as
+ * per current approach this CPU object will be destroyed during
+ * cpu_post_init(). During hotplug of vCPUs these properties are
+ * initialized again.
+ */
+virt_cpu_set_properties(cpuobj, cpu_slot, &error_fatal);
+
+/*
+ * For KVM, we shall be pre-creating the now disabled/un-plugged
+ * possbile host vcpus and park them till the time they are
+ * actually hot plugged. This is required to pre-size the host
+ * GICC and GICR with the all possible vcpus for this VM.
+ */
+if (kvm_enabled()) {
+kvm_arm_create_host_vcpu(ARM_CPU(cs));
+}
+/*
+ * Add disabled vCPU to CPU slot during the init phase of the virt
+ * machine
+ * 1. We need this ARMCPU object during the GIC init. This object
+ *will facilitate in pre-realizing the GIC. Any info like
+ *mp-affinity(required to derive gicr_type) etc. could still be
+ *fetched while preserving QOM abstraction akin to realized
+ *vCPUs.
+ * 2. Now, after initialization of the virt machine is complete we
+ *could use two approaches to deal with this ARMCPU object:
+ *(i) re-use this ARMCPU object during hotplug of this vCPU.
+ * OR
+ *(ii) defer release this ARMCPU object after gic has been
+ * initialized or during pre-plug phase when a vCPU is
+ * hotplugged.
+ *
+ *We will use the (ii) approach and release the ARMCPU objects
+ *after GIC and machine has been fully initialized during
+ *machine_init_done() phase.
+ */
+ cpu_slot->cpu = OBJECT(cs);
+}
 }
 fdt_add_timer_nodes(vms);
 fdt_add_cpu_nodes(vms);
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index e5af79950c..b2201a98ee 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -401,6 +401,7 @@ struct CPUState {
 uint32_t kvm_fetch_index;
 uint64_t dirty_pages;
 int kvm_vcpu_stats_fd;
+VMChangeStateEntry *vmcse;
 
 /* Use by accel-block: CPU is executing an ioctl() */
 QemuLockCnt in_ioctl_lock;
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c

[PATCH RFC V2 07/37] arm/virt, gicv3: Changes to pre-size GIC with possible vcpus @machine init

2023-09-26 Thread Salil Mehta via
GIC needs to be pre-sized with possible vcpus at the initialization time. This
is necessary because Memory regions and resources associated with GICC/GICR
etc cannot be changed (add/del/modified) after VM has inited. Also, GIC_TYPER
needs to be initialized with mp_affinity and cpu interface number association.
This cannot be changed after GIC has initialized.

Once all the cpu interfaces of the GIC has been inited it needs to be ensured
that any updates to the GICC during reset only takes place for the present
vcpus and not the disabled ones. Therefore, proper checks are required at
various places.

Co-developed-by: Salil Mehta 
Signed-off-by: Salil Mehta 
Co-developed-by: Keqian Zhu 
Signed-off-by: Keqian Zhu 
Signed-off-by: Jean-Philippe Brucker 
[changed the comment in arm_gicv3_icc_reset]
Signed-off-by: Salil Mehta 
---
 hw/arm/virt.c  | 15 ---
 hw/intc/arm_gicv3_common.c |  7 +--
 hw/intc/arm_gicv3_cpuif.c  |  8 
 hw/intc/arm_gicv3_kvm.c| 34 +++---
 include/hw/arm/virt.h  |  2 +-
 5 files changed, 53 insertions(+), 13 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 6ba131b799..a208b4e517 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -718,6 +718,7 @@ static void create_gic(VirtMachineState *vms, MemoryRegion 
*mem)
 const char *gictype;
 int i;
 unsigned int smp_cpus = ms->smp.cpus;
+unsigned int max_cpus = ms->smp.max_cpus;
 uint32_t nb_redist_regions = 0;
 int revision;
 
@@ -742,7 +743,7 @@ static void create_gic(VirtMachineState *vms, MemoryRegion 
*mem)
 }
 vms->gic = qdev_new(gictype);
 qdev_prop_set_uint32(vms->gic, "revision", revision);
-qdev_prop_set_uint32(vms->gic, "num-cpu", smp_cpus);
+qdev_prop_set_uint32(vms->gic, "num-cpu", max_cpus);
 /* Note that the num-irq property counts both internal and external
  * interrupts; there are always 32 of the former (mandated by GIC spec).
  */
@@ -753,7 +754,7 @@ static void create_gic(VirtMachineState *vms, MemoryRegion 
*mem)
 
 if (vms->gic_version != VIRT_GIC_VERSION_2) {
 uint32_t redist0_capacity = virt_redist_capacity(vms, VIRT_GIC_REDIST);
-uint32_t redist0_count = MIN(smp_cpus, redist0_capacity);
+uint32_t redist0_count = MIN(max_cpus, redist0_capacity);
 
 nb_redist_regions = virt_gicv3_redist_region_count(vms);
 
@@ -774,7 +775,7 @@ static void create_gic(VirtMachineState *vms, MemoryRegion 
*mem)
 virt_redist_capacity(vms, VIRT_HIGH_GIC_REDIST2);
 
 qdev_prop_set_uint32(vms->gic, "redist-region-count[1]",
-MIN(smp_cpus - redist0_count, redist1_capacity));
+MIN(max_cpus - redist0_count, redist1_capacity));
 }
 } else {
 if (!kvm_irqchip_in_kernel()) {
@@ -831,7 +832,7 @@ static void create_gic(VirtMachineState *vms, MemoryRegion 
*mem)
 } else if (vms->virt) {
 qemu_irq irq = qdev_get_gpio_in(vms->gic,
 ppibase + ARCH_GIC_MAINT_IRQ);
-sysbus_connect_irq(gicbusdev, i + 4 * smp_cpus, irq);
+sysbus_connect_irq(gicbusdev, i + 4 * max_cpus, irq);
 }
 
 qdev_connect_gpio_out_named(cpudev, "pmu-interrupt", 0,
@@ -839,11 +840,11 @@ static void create_gic(VirtMachineState *vms, 
MemoryRegion *mem)
  + VIRTUAL_PMU_IRQ));
 
 sysbus_connect_irq(gicbusdev, i, qdev_get_gpio_in(cpudev, 
ARM_CPU_IRQ));
-sysbus_connect_irq(gicbusdev, i + smp_cpus,
+sysbus_connect_irq(gicbusdev, i + max_cpus,
qdev_get_gpio_in(cpudev, ARM_CPU_FIQ));
-sysbus_connect_irq(gicbusdev, i + 2 * smp_cpus,
+sysbus_connect_irq(gicbusdev, i + 2 * max_cpus,
qdev_get_gpio_in(cpudev, ARM_CPU_VIRQ));
-sysbus_connect_irq(gicbusdev, i + 3 * smp_cpus,
+sysbus_connect_irq(gicbusdev, i + 3 * max_cpus,
qdev_get_gpio_in(cpudev, ARM_CPU_VFIQ));
 }
 
diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
index 2ebf880ead..ebd99af610 100644
--- a/hw/intc/arm_gicv3_common.c
+++ b/hw/intc/arm_gicv3_common.c
@@ -392,10 +392,13 @@ static void arm_gicv3_common_realize(DeviceState *dev, 
Error **errp)
 s->cpu = g_new0(GICv3CPUState, s->num_cpu);
 
 for (i = 0; i < s->num_cpu; i++) {
-CPUState *cpu = qemu_get_cpu(i);
+CPUState *cpu = qemu_get_possible_cpu(i);
 uint64_t cpu_affid;
 
-s->cpu[i].cpu = cpu;
+if (qemu_enabled_cpu(cpu)) {
+s->cpu[i].cpu = cpu;
+}
+
 s->cpu[i].gic = s;
 /* Store GICv3CPUState in CPUARMState gicv3state pointer */
 gicv3_set_gicv3state(cpu, &s->cpu[i]);
diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c
index d07b13eb27..7b7a0fdb9c 100644
--- a/hw/intc/arm_gicv3_cpuif.c
+++ b/hw/intc/arm_gicv3_cpuif.c
@@ -934

Re: [PATCH 05/12] device_tree: qmp_dumpdtb(): stronger assertion

2023-09-26 Thread Vladimir Sementsov-Ogievskiy

On 26.09.23 04:26, Alistair Francis wrote:

On Tue, Sep 26, 2023 at 6:42 AM Vladimir Sementsov-Ogievskiy
 wrote:


Coverity mark this size, got from the buffer as untrasted value, it's


s/untrasted/untrusted/g


will fix.




not good to use it as length when writing to file. Make the assertion
more strict to also check upper bound.

Signed-off-by: Vladimir Sementsov-Ogievskiy 


Reviewed-by: Alistair Francis 



Thanks!




---
  softmmu/device_tree.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/softmmu/device_tree.c b/softmmu/device_tree.c
index 30aa3aea9f..adc4236e21 100644
--- a/softmmu/device_tree.c
+++ b/softmmu/device_tree.c
@@ -660,7 +660,7 @@ void qmp_dumpdtb(const char *filename, Error **errp)

  size = fdt_totalsize(current_machine->fdt);

-g_assert(size > 0);
+g_assert(size > 0 && size <= FDT_MAX_SIZE);

  if (!g_file_set_contents(filename, current_machine->fdt, size, &err)) {
  error_setg(errp, "Error saving FDT to file %s: %s",
--
2.34.1




--
Best regards,
Vladimir




[PATCH RFC V2 10/37] arm/acpi: Enable ACPI support for vcpu hotplug

2023-09-26 Thread Salil Mehta via
ACPI is required to interface QEMU with the guest. Roughly falls into below
cases,

1. Convey the possible vcpus config at the machine init time to the guest
   using various DSDT tables like MADT etc.
2. Convey vcpu hotplug events to guest(using GED)
3. Assist in evaluation of various ACPI methods(like _EVT, _STA, _OST, _EJ0,
   _MAT etc.)
4. Provides ACPI cpu hotplug state and 12 Byte memory mapped cpu hotplug
   control register interface to the OSPM/guest corresponding to each possible
   vcpu. The register interface consists of various R/W fields and their
   handling operations. These are called when ever register fields or memory
   regions are accessed(i.e. read or written) by OSPM when ever it evaluates
   various ACPI methods.

Note: lot of this framework code is inherited from the changes already done for
  x86 but still some minor changes are required to make it compatible with
  ARM64.)

This patch enables the ACPI support for virtual cpu hotplug. ACPI changes
required will follow in subsequent patches.

Co-developed-by: Salil Mehta 
Signed-off-by: Salil Mehta 
Co-developed-by: Keqian Zhu 
Signed-off-by: Keqian Zhu 
Signed-off-by: Salil Mehta 
---
 hw/arm/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index 7e68348440..dae06158cd 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -29,6 +29,7 @@ config ARM_VIRT
 select ACPI_HW_REDUCED
 select ACPI_APEI
 select ACPI_VIOT
+select ACPI_CPU_HOTPLUG
 select VIRTIO_MEM_SUPPORTED
 select ACPI_CXL
 select ACPI_HMAT
-- 
2.34.1




[PATCH RFC V2 13/37] hw/acpi: Init GED framework with cpu hotplug events

2023-09-26 Thread Salil Mehta via
ACPI GED(as described in the ACPI 6.2 spec) can be used to generate ACPI events
when OSPM/guest receives an interrupt listed in the _CRS object of GED. OSPM
then maps or demultiplexes the event by evaluating _EVT method.

This change adds the support of cpu hotplug event initialization in the
existing GED framework.

Co-developed-by: Salil Mehta 
Signed-off-by: Salil Mehta 
Co-developed-by: Keqian Zhu 
Signed-off-by: Keqian Zhu 
Signed-off-by: Salil Mehta 
---
 hw/acpi/generic_event_device.c | 8 
 include/hw/acpi/generic_event_device.h | 5 +
 2 files changed, 13 insertions(+)

diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c
index a3d31631fe..d2fa1d0e4a 100644
--- a/hw/acpi/generic_event_device.c
+++ b/hw/acpi/generic_event_device.c
@@ -25,6 +25,7 @@ static const uint32_t ged_supported_events[] = {
 ACPI_GED_MEM_HOTPLUG_EVT,
 ACPI_GED_PWR_DOWN_EVT,
 ACPI_GED_NVDIMM_HOTPLUG_EVT,
+ACPI_GED_CPU_HOTPLUG_EVT,
 };
 
 /*
@@ -400,6 +401,13 @@ static void acpi_ged_initfn(Object *obj)
 memory_region_init_io(&ged_st->regs, obj, &ged_regs_ops, ged_st,
   TYPE_ACPI_GED "-regs", ACPI_GED_REG_COUNT);
 sysbus_init_mmio(sbd, &ged_st->regs);
+
+s->cpuhp.device = OBJECT(s);
+memory_region_init(&s->container_cpuhp, OBJECT(dev), "cpuhp container",
+   ACPI_CPU_HOTPLUG_REG_LEN);
+sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->container_cpuhp);
+cpu_hotplug_hw_init(&s->container_cpuhp, OBJECT(dev),
+&s->cpuhp_state, 0);
 }
 
 static void acpi_ged_class_init(ObjectClass *class, void *data)
diff --git a/include/hw/acpi/generic_event_device.h 
b/include/hw/acpi/generic_event_device.h
index d831bbd889..d0a5a43abf 100644
--- a/include/hw/acpi/generic_event_device.h
+++ b/include/hw/acpi/generic_event_device.h
@@ -60,6 +60,7 @@
 #define HW_ACPI_GENERIC_EVENT_DEVICE_H
 
 #include "hw/sysbus.h"
+#include "hw/acpi/cpu_hotplug.h"
 #include "hw/acpi/memory_hotplug.h"
 #include "hw/acpi/ghes.h"
 #include "qom/object.h"
@@ -97,6 +98,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(AcpiGedState, ACPI_GED)
 #define ACPI_GED_MEM_HOTPLUG_EVT   0x1
 #define ACPI_GED_PWR_DOWN_EVT  0x2
 #define ACPI_GED_NVDIMM_HOTPLUG_EVT 0x4
+#define ACPI_GED_CPU_HOTPLUG_EVT0x8
 
 typedef struct GEDState {
 MemoryRegion evt;
@@ -108,6 +110,9 @@ struct AcpiGedState {
 SysBusDevice parent_obj;
 MemHotplugState memhp_state;
 MemoryRegion container_memhp;
+CPUHotplugState cpuhp_state;
+MemoryRegion container_cpuhp;
+AcpiCpuHotplug cpuhp;
 GEDState ged_state;
 uint32_t ged_event_bitmap;
 qemu_irq irq;
-- 
2.34.1




[PATCH RFC V2 11/37] hw/acpi: Add ACPI CPU hotplug init stub

2023-09-26 Thread Salil Mehta via
ACPI CPU hotplug related initialization should only happend if ACPI_CPU_HOTPLUG
support has been enabled for particular architecture. Add cpu_hotplug_hw_init()
stub to avoid compilation break.

Signed-off-by: Salil Mehta 
---
 hw/acpi/acpi-cpu-hotplug-stub.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/hw/acpi/acpi-cpu-hotplug-stub.c b/hw/acpi/acpi-cpu-hotplug-stub.c
index 3fc4b14c26..c6c61bb9cd 100644
--- a/hw/acpi/acpi-cpu-hotplug-stub.c
+++ b/hw/acpi/acpi-cpu-hotplug-stub.c
@@ -19,6 +19,12 @@ void legacy_acpi_cpu_hotplug_init(MemoryRegion *parent, 
Object *owner,
 return;
 }
 
+void cpu_hotplug_hw_init(MemoryRegion *as, Object *owner,
+ CPUHotplugState *state, hwaddr base_addr)
+{
+return;
+}
+
 void acpi_cpu_ospm_status(CPUHotplugState *cpu_st, ACPIOSTInfoList ***list)
 {
 return;
-- 
2.34.1




[PATCH RFC V2 12/37] hw/acpi: Use qemu_present_cpu() API in ACPI CPU hotplug init

2023-09-26 Thread Salil Mehta via
ACPI CPU Hotplug code assumes a virtual CPU is unplugged if the CPUState object
is absent in the list of ths possible CPUs(CPUArchIdList *possible_cpus)
maintained on per-machine basis. Use the earlier introduced qemu_present_cpu()
API to check this state.

This change should have no bearing on the functionality of any architecture and
is mere a representational change.

Signed-off-by: Salil Mehta 
---
 hw/acpi/cpu.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c
index 45defdc0e2..d5ba37b209 100644
--- a/hw/acpi/cpu.c
+++ b/hw/acpi/cpu.c
@@ -225,7 +225,10 @@ void cpu_hotplug_hw_init(MemoryRegion *as, Object *owner,
 state->dev_count = id_list->len;
 state->devs = g_new0(typeof(*state->devs), state->dev_count);
 for (i = 0; i < id_list->len; i++) {
-state->devs[i].cpu =  CPU(id_list->cpus[i].cpu);
+struct CPUState *cpu = CPU(id_list->cpus[i].cpu);
+if (qemu_present_cpu(cpu)) {
+state->devs[i].cpu = cpu;
+}
 state->devs[i].arch_id = id_list->cpus[i].arch_id;
 }
 memory_region_init_io(&state->ctrl_reg, owner, &cpu_hotplug_ops, state,
-- 
2.34.1




[PATCH RFC V2 15/37] arm/virt: Create GED dev before *disabled* CPU Objs are destroyed

2023-09-26 Thread Salil Mehta via
ACPI CPU hotplug state (is_present=_STA.PRESENT, is_enabled=_STA.ENABLED) for
all the possible vCPUs MUST be initialized during machine init. This is done
during the creation of the GED device. VMM/Qemu MUST expose/fake the ACPI state
of the disabled vCPUs to the Guest kernel as 'present' (_STA.PRESENT) always
i.e. ACPI persistent. if the 'disabled' vCPU objectes are destroyed before the
GED device has been created then their ACPI hotplug state might not get
initialized correctly as acpi_persistent flag is part of the CPUState. This will
expose wrong status of the unplugged vCPUs to the Guest kernel.

Hence, moving the GED device creation before disabled vCPU objects get destroyed
as part of the post CPU init routine.

Signed-off-by: Salil Mehta 
---
 hw/arm/virt.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 5c8a0672dc..cbb6199ec6 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2376,6 +2376,12 @@ static void machvirt_init(MachineState *machine)
 
 create_gic(vms, sysmem);
 
+has_ged = has_ged && aarch64 && firmware_loaded &&
+  virt_is_acpi_enabled(vms);
+if (has_ged) {
+vms->acpi_dev = create_acpi_ged(vms);
+}
+
 virt_cpu_post_init(vms, sysmem);
 
 fdt_add_pmu_nodes(vms);
@@ -2398,9 +2404,7 @@ static void machvirt_init(MachineState *machine)
 
 create_pcie(vms);
 
-if (has_ged && aarch64 && firmware_loaded && virt_is_acpi_enabled(vms)) {
-vms->acpi_dev = create_acpi_ged(vms);
-} else {
+if (!has_ged) {
 create_gpio_devices(vms, VIRT_GPIO, sysmem);
 }
 
-- 
2.34.1




[PATCH RFC V2 14/37] arm/virt: Add cpu hotplug events to GED during creation

2023-09-26 Thread Salil Mehta via
Add CPU Hotplug event to the set of supported ged-events during the creation of
GED device during VM init. Also initialize the memory map for CPU Hotplug
control device used in event exchanges between Qemu/VMM and the guest.

Signed-off-by: Salil Mehta 
---
 hw/arm/virt.c | 5 -
 include/hw/arm/virt.h | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 070c36054e..5c8a0672dc 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -76,6 +76,7 @@
 #include "hw/mem/pc-dimm.h"
 #include "hw/mem/nvdimm.h"
 #include "hw/acpi/generic_event_device.h"
+#include "hw/acpi/cpu_hotplug.h"
 #include "hw/virtio/virtio-md-pci.h"
 #include "hw/virtio/virtio-iommu.h"
 #include "hw/char/pl011.h"
@@ -155,6 +156,7 @@ static const MemMapEntry base_memmap[] = {
 [VIRT_NVDIMM_ACPI] ={ 0x0909, NVDIMM_ACPI_IO_LEN},
 [VIRT_PVTIME] = { 0x090a, 0x0001 },
 [VIRT_SECURE_GPIO] ={ 0x090b, 0x1000 },
+[VIRT_CPUHP_ACPI] = { 0x090c, ACPI_CPU_HOTPLUG_REG_LEN},
 [VIRT_MMIO] =   { 0x0a00, 0x0200 },
 /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */
 [VIRT_PLATFORM_BUS] =   { 0x0c00, 0x0200 },
@@ -640,7 +642,7 @@ static inline DeviceState *create_acpi_ged(VirtMachineState 
*vms)
 DeviceState *dev;
 MachineState *ms = MACHINE(vms);
 int irq = vms->irqmap[VIRT_ACPI_GED];
-uint32_t event = ACPI_GED_PWR_DOWN_EVT;
+uint32_t event = ACPI_GED_PWR_DOWN_EVT | ACPI_GED_CPU_HOTPLUG_EVT;
 
 if (ms->ram_slots) {
 event |= ACPI_GED_MEM_HOTPLUG_EVT;
@@ -655,6 +657,7 @@ static inline DeviceState *create_acpi_ged(VirtMachineState 
*vms)
 
 sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, vms->memmap[VIRT_ACPI_GED].base);
 sysbus_mmio_map(SYS_BUS_DEVICE(dev), 1, 
vms->memmap[VIRT_PCDIMM_ACPI].base);
+sysbus_mmio_map(SYS_BUS_DEVICE(dev), 3, vms->memmap[VIRT_CPUHP_ACPI].base);
 sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, qdev_get_gpio_in(vms->gic, 
irq));
 
 sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index fc0469c33f..09a0b2d4f0 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -85,6 +85,7 @@ enum {
 VIRT_PCDIMM_ACPI,
 VIRT_ACPI_GED,
 VIRT_NVDIMM_ACPI,
+VIRT_CPUHP_ACPI,
 VIRT_PVTIME,
 VIRT_LOWMEMMAP_LAST,
 };
-- 
2.34.1




[PATCH RFC V2 16/37] hw/acpi: Update CPUs AML with cpu-(ctrl)dev change

2023-09-26 Thread Salil Mehta via
CPUs Control device(\\_SB.PCI0) register interface for the x86 arch is based on
PCI and is IO port based and hence existing cpus AML code assumes _CRS objects
would evaluate to a system resource which describes IO Port address. But on ARM
arch CPUs control device(\\_SB.PRES) register interface is memory-mapped hence
_CRS object should evaluate to system resource which describes memory-mapped
base address.

This cpus AML code change updates the existing inerface of the build cpus AML
function to accept both IO/MEMORY type regions and update the _CRS object
correspondingly.

NOTE: Beside above CPU scan shall be triggered when OSPM evaluates _EVT method
  part of the GED framework which is covered in subsequent patch.

Co-developed-by: Salil Mehta 
Signed-off-by: Salil Mehta 
Co-developed-by: Keqian Zhu 
Signed-off-by: Keqian Zhu 
Signed-off-by: Salil Mehta 
---
 hw/acpi/cpu.c | 23 ---
 hw/i386/acpi-build.c  |  2 +-
 include/hw/acpi/cpu.h |  5 +++--
 3 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c
index d5ba37b209..232720992d 100644
--- a/hw/acpi/cpu.c
+++ b/hw/acpi/cpu.c
@@ -341,9 +341,10 @@ const VMStateDescription vmstate_cpu_hotplug = {
 #define CPU_FW_EJECT_EVENT "CEJF"
 
 void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts,
-hwaddr io_base,
+hwaddr base_addr,
 const char *res_root,
-const char *event_handler_method)
+const char *event_handler_method,
+AmlRegionSpace rs)
 {
 Aml *ifctx;
 Aml *field;
@@ -370,13 +371,19 @@ void build_cpus_aml(Aml *table, MachineState *machine, 
CPUHotplugFeatures opts,
 aml_append(cpu_ctrl_dev, aml_mutex(CPU_LOCK, 0));
 
 crs = aml_resource_template();
-aml_append(crs, aml_io(AML_DECODE16, io_base, io_base, 1,
+if (rs == AML_SYSTEM_IO) {
+aml_append(crs, aml_io(AML_DECODE16, base_addr, base_addr, 1,
ACPI_CPU_HOTPLUG_REG_LEN));
+} else {
+aml_append(crs, aml_memory32_fixed(base_addr,
+   ACPI_CPU_HOTPLUG_REG_LEN, AML_READ_WRITE));
+}
+
 aml_append(cpu_ctrl_dev, aml_name_decl("_CRS", crs));
 
 /* declare CPU hotplug MMIO region with related access fields */
 aml_append(cpu_ctrl_dev,
-aml_operation_region("PRST", AML_SYSTEM_IO, aml_int(io_base),
+aml_operation_region("PRST", rs, aml_int(base_addr),
  ACPI_CPU_HOTPLUG_REG_LEN));
 
 field = aml_field("PRST", AML_BYTE_ACC, AML_NOLOCK,
@@ -702,9 +709,11 @@ void build_cpus_aml(Aml *table, MachineState *machine, 
CPUHotplugFeatures opts,
 aml_append(sb_scope, cpus_dev);
 aml_append(table, sb_scope);
 
-method = aml_method(event_handler_method, 0, AML_NOTSERIALIZED);
-aml_append(method, aml_call0("\\_SB.CPUS." CPU_SCAN_METHOD));
-aml_append(table, method);
+if (event_handler_method) {
+method = aml_method(event_handler_method, 0, AML_NOTSERIALIZED);
+aml_append(method, aml_call0("\\_SB.CPUS." CPU_SCAN_METHOD));
+aml_append(table, method);
+}
 
 g_free(cphp_res_path);
 }
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index bb12b0ad43..560f108d38 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -1550,7 +1550,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
 .fw_unplugs_cpu = pm->smi_on_cpu_unplug,
 };
 build_cpus_aml(dsdt, machine, opts, pm->cpu_hp_io_base,
-   "\\_SB.PCI0", "\\_GPE._E02");
+   "\\_SB.PCI0", "\\_GPE._E02", AML_SYSTEM_IO);
 }
 
 if (pcms->memhp_io_base && nr_mem) {
diff --git a/include/hw/acpi/cpu.h b/include/hw/acpi/cpu.h
index 999caaf510..b87ebfdf4b 100644
--- a/include/hw/acpi/cpu.h
+++ b/include/hw/acpi/cpu.h
@@ -56,9 +56,10 @@ typedef struct CPUHotplugFeatures {
 } CPUHotplugFeatures;
 
 void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts,
-hwaddr io_base,
+hwaddr base_addr,
 const char *res_root,
-const char *event_handler_method);
+const char *event_handler_method,
+AmlRegionSpace rs);
 
 void acpi_cpu_ospm_status(CPUHotplugState *cpu_st, ACPIOSTInfoList ***list);
 
-- 
2.34.1




[PATCH RFC V2 17/37] arm/virt/acpi: Build CPUs AML with CPU Hotplug support

2023-09-26 Thread Salil Mehta via
Support of vCPU Hotplug requires sequence of ACPI handshakes between Qemu and
Guest kernel when a vCPU is plugged or unplugged. Most of the AML code to
support these handshakes already exists. This AML need to be build during VM
init for ARM architecture as well if the GED support exists.

Signed-off-by: Salil Mehta 
---
 hw/arm/virt-acpi-build.c | 13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 6b674231c2..d27df5030e 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -858,7 +858,18 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, 
VirtMachineState *vms)
  * the RTC ACPI device at all when using UEFI.
  */
 scope = aml_scope("\\_SB");
-acpi_dsdt_add_cpus(scope, vms);
+/* if GED is enabled then cpus AML shall be added as part build_cpus_aml */
+if (vms->acpi_dev) {
+CPUHotplugFeatures opts = {
+ .acpi_1_compatible = false,
+ .has_legacy_cphp = false
+};
+
+build_cpus_aml(scope, ms, opts, memmap[VIRT_CPUHP_ACPI].base,
+   "\\_SB", NULL, AML_SYSTEM_MEMORY);
+} else {
+acpi_dsdt_add_cpus(scope, vms);
+}
 acpi_dsdt_add_uart(scope, &memmap[VIRT_UART],
(irqmap[VIRT_UART] + ARM_SPI_BASE));
 if (vmc->acpi_expose_flash) {
-- 
2.34.1




[PATCH RFC V2 18/37] arm/virt: Make ARM vCPU *present* status ACPI *persistent*

2023-09-26 Thread Salil Mehta via
ARM arch does not allow CPUs presence to be changed [1] after kernel has booted.
Hence, firmware/ACPI/Qemu must ensure persistent view of the vCPUs to the Guest
kernel even when they are not present in the QoM i.e. are unplugged or are
yet-to-be-plugged

References:
[1] Check comment 5 in the bugzilla entry
   Link: https://bugzilla.tianocore.org/show_bug.cgi?id=4481#c5

Signed-off-by: Salil Mehta 
---
 cpus-common.c |  6 ++
 hw/arm/virt.c |  7 +++
 include/hw/core/cpu.h | 20 
 3 files changed, 33 insertions(+)

diff --git a/cpus-common.c b/cpus-common.c
index 24c04199a1..d64aa63b19 100644
--- a/cpus-common.c
+++ b/cpus-common.c
@@ -128,6 +128,12 @@ bool qemu_enabled_cpu(CPUState *cpu)
 return cpu && !cpu->disabled;
 }
 
+bool qemu_persistent_cpu(CPUState *cpu)
+{
+/* cpu state can be faked to the guest via acpi */
+return cpu->acpi_persistent;
+}
+
 uint64_t qemu_get_cpu_archid(int cpu_index)
 {
 MachineState *ms = MACHINE(qdev_get_machine());
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index cbb6199ec6..f1bee569d5 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -3006,6 +3006,13 @@ static void virt_cpu_pre_plug(HotplugHandler 
*hotplug_dev, DeviceState *dev,
 return;
 }
 virt_cpu_set_properties(OBJECT(cs), cpu_slot, errp);
+
+/*
+ * To give persistent presence view of vCPUs to the guest, ACPI might need
+ * to fake the presence of the vCPUs to the guest but keep them disabled.
+ * This shall be used during the init of ACPI Hotplug state and hot-unplug
+ */
+ cs->acpi_persistent = true;
 }
 
 static void virt_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index b2201a98ee..dab572c9bd 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -425,6 +425,13 @@ struct CPUState {
  * By default every CPUState is enabled as of now across all archs.
  */
 bool disabled;
+/*
+ * On certain architectures, to give persistent view of the 'presence' of
+ * vCPUs to the guest, ACPI might need to fake the 'presence' of the vCPUs
+ * but keep them ACPI disabled to the guest. This is done by returning
+ * _STA.PRES=True and _STA.Ena=False for the unplugged vCPUs in QEMU QoM.
+ */
+bool acpi_persistent;
 /* TODO Move common fields from CPUArchState here. */
 int cpu_index;
 int cluster_index;
@@ -814,6 +821,19 @@ bool qemu_present_cpu(CPUState *cpu);
  */
 bool qemu_enabled_cpu(CPUState *cpu);
 
+/**
+ * qemu_persistent_cpu:
+ * @cpu: The vCPU to check
+ *
+ * Checks if the vCPU state should always be reflected as *present* via ACPI
+ * to the Guest. By default, this is False on all architectures and has to be
+ * explicity set during initialization.
+ *
+ * Returns: True if it is ACPI 'persistent' CPU
+ *
+ */
+bool qemu_persistent_cpu(CPUState *cpu);
+
 /**
  * qemu_get_cpu_archid:
  * @cpu_index: possible vCPU for which arch-id needs to be retreived
-- 
2.34.1




[PATCH RFC V2 19/37] hw/acpi: ACPI/AML Changes to reflect the correct _STA.{PRES, ENA} Bits to Guest

2023-09-26 Thread Salil Mehta via
ACPI AML changes to properly reflect the _STA.PRES and _STA.ENA Bits to the
guest during initialzation, when CPUs are hotplugged and after CPUs are
hot-unplugged.

Signed-off-by: Salil Mehta 
---
 hw/acpi/cpu.c  | 49 +++---
 hw/acpi/generic_event_device.c | 11 
 include/hw/acpi/cpu.h  |  2 ++
 3 files changed, 58 insertions(+), 4 deletions(-)

diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c
index 232720992d..e1299696d3 100644
--- a/hw/acpi/cpu.c
+++ b/hw/acpi/cpu.c
@@ -63,10 +63,11 @@ static uint64_t cpu_hotplug_rd(void *opaque, hwaddr addr, 
unsigned size)
 cdev = &cpu_st->devs[cpu_st->selector];
 switch (addr) {
 case ACPI_CPU_FLAGS_OFFSET_RW: /* pack and return is_* fields */
-val |= cdev->cpu ? 1 : 0;
+val |= cdev->is_enabled ? 1 : 0;
 val |= cdev->is_inserting ? 2 : 0;
 val |= cdev->is_removing  ? 4 : 0;
 val |= cdev->fw_remove  ? 16 : 0;
+val |= cdev->is_present ? 32 : 0;
 trace_cpuhp_acpi_read_flags(cpu_st->selector, val);
 break;
 case ACPI_CPU_CMD_DATA_OFFSET_RW:
@@ -228,7 +229,21 @@ void cpu_hotplug_hw_init(MemoryRegion *as, Object *owner,
 struct CPUState *cpu = CPU(id_list->cpus[i].cpu);
 if (qemu_present_cpu(cpu)) {
 state->devs[i].cpu = cpu;
+state->devs[i].is_present = true;
+} else {
+if (qemu_persistent_cpu(cpu)) {
+state->devs[i].is_present = true;
+} else {
+state->devs[i].is_present = false;
+}
 }
+
+if (qemu_enabled_cpu(cpu)) {
+state->devs[i].is_enabled = true;
+} else {
+state->devs[i].is_enabled = false;
+}
+
 state->devs[i].arch_id = id_list->cpus[i].arch_id;
 }
 memory_region_init_io(&state->ctrl_reg, owner, &cpu_hotplug_ops, state,
@@ -261,6 +276,8 @@ void acpi_cpu_plug_cb(HotplugHandler *hotplug_dev,
 }
 
 cdev->cpu = CPU(dev);
+cdev->is_present = true;
+cdev->is_enabled = true;
 if (dev->hotplugged) {
 cdev->is_inserting = true;
 acpi_send_event(DEVICE(hotplug_dev), ACPI_CPU_HOTPLUG_STATUS);
@@ -292,6 +309,11 @@ void acpi_cpu_unplug_cb(CPUHotplugState *cpu_st,
 return;
 }
 
+cdev->is_enabled = false;
+if (!qemu_persistent_cpu(CPU(dev))) {
+cdev->is_present = false;
+}
+
 cdev->cpu = NULL;
 }
 
@@ -302,6 +324,8 @@ static const VMStateDescription vmstate_cpuhp_sts = {
 .fields  = (VMStateField[]) {
 VMSTATE_BOOL(is_inserting, AcpiCpuStatus),
 VMSTATE_BOOL(is_removing, AcpiCpuStatus),
+VMSTATE_BOOL(is_present, AcpiCpuStatus),
+VMSTATE_BOOL(is_enabled, AcpiCpuStatus),
 VMSTATE_UINT32(ost_event, AcpiCpuStatus),
 VMSTATE_UINT32(ost_status, AcpiCpuStatus),
 VMSTATE_END_OF_LIST()
@@ -339,6 +363,7 @@ const VMStateDescription vmstate_cpu_hotplug = {
 #define CPU_REMOVE_EVENT  "CRMV"
 #define CPU_EJECT_EVENT   "CEJ0"
 #define CPU_FW_EJECT_EVENT "CEJF"
+#define CPU_PRESENT   "CPRS"
 
 void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts,
 hwaddr base_addr,
@@ -399,7 +424,9 @@ void build_cpus_aml(Aml *table, MachineState *machine, 
CPUHotplugFeatures opts,
 aml_append(field, aml_named_field(CPU_EJECT_EVENT, 1));
 /* tell firmware to do device eject, write only */
 aml_append(field, aml_named_field(CPU_FW_EJECT_EVENT, 1));
-aml_append(field, aml_reserved_field(3));
+/* 1 if present, read only */
+aml_append(field, aml_named_field(CPU_PRESENT, 1));
+aml_append(field, aml_reserved_field(2));
 aml_append(field, aml_named_field(CPU_COMMAND, 8));
 aml_append(cpu_ctrl_dev, field);
 
@@ -429,6 +456,7 @@ void build_cpus_aml(Aml *table, MachineState *machine, 
CPUHotplugFeatures opts,
 Aml *ctrl_lock = aml_name("%s.%s", cphp_res_path, CPU_LOCK);
 Aml *cpu_selector = aml_name("%s.%s", cphp_res_path, CPU_SELECTOR);
 Aml *is_enabled = aml_name("%s.%s", cphp_res_path, CPU_ENABLED);
+Aml *is_present = aml_name("%s.%s", cphp_res_path, CPU_PRESENT);
 Aml *cpu_cmd = aml_name("%s.%s", cphp_res_path, CPU_COMMAND);
 Aml *cpu_data = aml_name("%s.%s", cphp_res_path, CPU_DATA);
 Aml *ins_evt = aml_name("%s.%s", cphp_res_path, CPU_INSERT_EVENT);
@@ -457,13 +485,26 @@ void build_cpus_aml(Aml *table, MachineState *machine, 
CPUHotplugFeatures opts,
 {
 Aml *idx = aml_arg(0);
 Aml *sta = aml_local(0);
+Aml *ifctx2;
+Aml *else_ctx;
 
 aml_append(method, aml_acquire(ctrl_lock, 0x));
 aml_append(method, aml_store(idx, cpu_selector));
 aml_append(method, aml_store(zero, sta));
-ifctx = aml_if(aml_equal(is_enabled, one));
+ifctx = aml_if(aml_equal(is_present, one));
 {

[PATCH RFC V2 20/37] hw/acpi: Update GED _EVT method AML with cpu scan

2023-09-26 Thread Salil Mehta via
OSPM evaluates _EVT method to map the event. The cpu hotplug event eventually
results in start of the cpu scan. Scan figures out the cpu and the kind of
event(plug/unplug) and notifies it back to the guest.

The change in this patch updates the GED AML _EVT method with the call to
\\_SB.CPUS.CSCN which will do above.

Co-developed-by: Salil Mehta 
Signed-off-by: Salil Mehta 
Co-developed-by: Keqian Zhu 
Signed-off-by: Keqian Zhu 
Signed-off-by: Salil Mehta 
---
 hw/acpi/generic_event_device.c | 4 
 include/hw/acpi/cpu_hotplug.h  | 2 ++
 2 files changed, 6 insertions(+)

diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c
index b84602b238..ad252e6a91 100644
--- a/hw/acpi/generic_event_device.c
+++ b/hw/acpi/generic_event_device.c
@@ -108,6 +108,10 @@ void build_ged_aml(Aml *table, const char *name, 
HotplugHandler *hotplug_dev,
 aml_append(if_ctx, aml_call0(MEMORY_DEVICES_CONTAINER "."
  MEMORY_SLOT_SCAN_METHOD));
 break;
+case ACPI_GED_CPU_HOTPLUG_EVT:
+aml_append(if_ctx, aml_call0(ACPI_CPU_CONTAINER "."
+ ACPI_CPU_SCAN_METHOD));
+break;
 case ACPI_GED_PWR_DOWN_EVT:
 aml_append(if_ctx,
aml_notify(aml_name(ACPI_POWER_BUTTON_DEVICE),
diff --git a/include/hw/acpi/cpu_hotplug.h b/include/hw/acpi/cpu_hotplug.h
index 48b291e45e..ef631750b4 100644
--- a/include/hw/acpi/cpu_hotplug.h
+++ b/include/hw/acpi/cpu_hotplug.h
@@ -20,6 +20,8 @@
 #include "hw/acpi/cpu.h"
 
 #define ACPI_CPU_HOTPLUG_REG_LEN 12
+#define ACPI_CPU_SCAN_METHOD "CSCN"
+#define ACPI_CPU_CONTAINER "\\_SB.CPUS"
 
 typedef struct AcpiCpuHotplug {
 Object *device;
-- 
2.34.1




[PATCH RFC V2 21/37] hw/arm: MADT Tbl change to size the guest with possible vCPUs

2023-09-26 Thread Salil Mehta via
Changes required during building of MADT Table by QEMU to accomodate disabled
possible vCPUs. This info shall be used by the guest kernel to size up its
resources during boot time. This pre-sizing of the guest kernel done on
possible vCPUs will facilitate hotplug of the disabled vCPUs.

This change also caters ACPI MADT GIC CPU Interface flag related changes
recently introduced in the UEFI ACPI 6.5 Specification which allows deferred
virtual CPU online'ing in the Guest Kernel.

Link: 
https://uefi.org/specs/ACPI/6.5/05_ACPI_Software_Programming_Model.html#gic-cpu-interface-gicc-structure

Co-developed-by: Salil Mehta 
Signed-off-by: Salil Mehta 
Co-developed-by: Keqian Zhu 
Signed-off-by: Keqian Zhu 
Signed-off-by: Salil Mehta 
---
 hw/arm/virt-acpi-build.c | 36 ++--
 1 file changed, 30 insertions(+), 6 deletions(-)

diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index d27df5030e..cbccd2ca2d 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -700,6 +700,29 @@ static void build_append_gicr(GArray *table_data, uint64_t 
base, uint32_t size)
 build_append_int_noprefix(table_data, size, 4); /* Discovery Range Length 
*/
 }
 
+static uint32_t virt_acpi_get_gicc_flags(CPUState *cpu)
+{
+MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
+
+/* can only exist in 'enabled' state */
+if (!mc->has_hotpluggable_cpus) {
+return 1;
+}
+
+/*
+ * ARM GIC CPU Interface can be 'online-capable' or 'enabled' at boot
+ * We MUST set 'online-capable' Bit for all hotpluggable CPUs except the
+ * first/boot CPU. Cold-booted CPUs without 'Id' can also be unplugged.
+ * Though as-of-now this is only used as a debugging feature.
+ *
+ *   UEFI ACPI Specification 6.5
+ *   Section: 5.2.12.14. GIC CPU Interface (GICC) Structure
+ *   Table:   5.37 GICC CPU Interface Flags
+ *   Link: https://uefi.org/specs/ACPI/6.5
+ */
+return cpu && !cpu->cpu_index ? 1 : (1 << 3);
+}
+
 static void
 build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
 {
@@ -726,12 +749,13 @@ build_madt(GArray *table_data, BIOSLinker *linker, 
VirtMachineState *vms)
 build_append_int_noprefix(table_data, vms->gic_version, 1);
 build_append_int_noprefix(table_data, 0, 3);   /* Reserved */
 
-for (i = 0; i < MACHINE(vms)->smp.cpus; i++) {
-ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(i));
+for (i = 0; i < MACHINE(vms)->smp.max_cpus; i++) {
+CPUState *cpu = qemu_get_possible_cpu(i);
 uint64_t physical_base_address = 0, gich = 0, gicv = 0;
 uint32_t vgic_interrupt = vms->virt ? PPI(ARCH_GIC_MAINT_IRQ) : 0;
-uint32_t pmu_interrupt = arm_feature(&armcpu->env, ARM_FEATURE_PMU) ?
- PPI(VIRTUAL_PMU_IRQ) : 0;
+uint32_t pmu_interrupt = vms->pmu ? PPI(VIRTUAL_PMU_IRQ) : 0;
+uint32_t flags = virt_acpi_get_gicc_flags(cpu);
+uint64_t mpidr = qemu_get_cpu_archid(i);
 
 if (vms->gic_version == VIRT_GIC_VERSION_2) {
 physical_base_address = memmap[VIRT_GIC_CPU].base;
@@ -746,7 +770,7 @@ build_madt(GArray *table_data, BIOSLinker *linker, 
VirtMachineState *vms)
 build_append_int_noprefix(table_data, i, 4);/* GIC ID */
 build_append_int_noprefix(table_data, i, 4);/* ACPI Processor UID 
*/
 /* Flags */
-build_append_int_noprefix(table_data, 1, 4);/* Enabled */
+build_append_int_noprefix(table_data, flags, 4);
 /* Parking Protocol Version */
 build_append_int_noprefix(table_data, 0, 4);
 /* Performance Interrupt GSIV */
@@ -760,7 +784,7 @@ build_madt(GArray *table_data, BIOSLinker *linker, 
VirtMachineState *vms)
 build_append_int_noprefix(table_data, vgic_interrupt, 4);
 build_append_int_noprefix(table_data, 0, 8);/* GICR Base Address*/
 /* MPIDR */
-build_append_int_noprefix(table_data, armcpu->mp_affinity, 8);
+build_append_int_noprefix(table_data, mpidr, 8);
 /* Processor Power Efficiency Class */
 build_append_int_noprefix(table_data, 0, 1);
 /* Reserved */
-- 
2.34.1




[PATCH RFC V2 23/37] arm/virt: Release objects for *disabled* possible vCPUs after init

2023-09-26 Thread Salil Mehta via
During machvirt_init(), QOM ARMCPU objects are also pre-created along with the
corresponding KVM vCPUs in the host for all possible vCPUs. This necessary
because of the architectural constraint, KVM restricts the deferred creation of
the KVM vCPUs and VGIC initialization/sizing after VM init. Hence, VGIC is
pre-sized with possible vCPUs.

After initialization of the machine is complete disabled possible KVM vCPUs are
then parked at the per-virt-machine list "kvm_parked_vcpus" and we release the
QOM ARMCPU objects for the disabled vCPUs. These shall be re-created at the time
when vCPU is hotplugged again. QOM ARMCPU object is then re-attached with
corresponding parked KVM vCPU.

Alternatively, we could've never released the QOM CPU objects and kept on
reusing. This approach might require some modifications of qdevice_add()
interface to get old ARMCPU object instead of creating a new one for the hotplug
request.

Each of the above approaches come with their own pros and cons. This prototype
uses the 1st approach.(suggestions are welcome!)

Co-developed-by: Salil Mehta 
Signed-off-by: Salil Mehta 
Co-developed-by: Keqian Zhu 
Signed-off-by: Keqian Zhu 
Signed-off-by: Salil Mehta 
---
 hw/arm/virt.c | 32 
 1 file changed, 32 insertions(+)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index f1bee569d5..3b068534a8 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -1965,6 +1965,7 @@ static void virt_cpu_post_init(VirtMachineState *vms, 
MemoryRegion *sysmem)
 {
 CPUArchIdList *possible_cpus = vms->parent.possible_cpus;
 int max_cpus = MACHINE(vms)->smp.max_cpus;
+MachineState *ms = MACHINE(vms);
 bool aarch64, steal_time;
 CPUState *cpu;
 int n;
@@ -2025,6 +2026,37 @@ static void virt_cpu_post_init(VirtMachineState *vms, 
MemoryRegion *sysmem)
 }
 }
 }
+
+if (kvm_enabled() || tcg_enabled()) {
+for (n = 0; n < possible_cpus->len; n++) {
+cpu = qemu_get_possible_cpu(n);
+
+/*
+ * Now, GIC has been sized with possible CPUs and we dont require
+ * disabled vCPU objects to be represented in the QOM. Release the
+ * disabled ARMCPU objects earlier used during init for pre-sizing.
+ *
+ * We fake to the guest through ACPI about the 
presence(_STA.PRES=1)
+ * of these non-existent vCPUs at VMM/qemu and present these as
+ * disabled vCPUs(_STA.ENA=0) so that they cant be used. These 
vCPUs
+ * can be later added to the guest through hotplug exchanges when
+ * ARMCPU objects are created back again using 'device_add' QMP
+ * command.
+ */
+/*
+ * RFC: Question: Other approach could've been to keep them forever
+ * and release it only once when qemu exits as part of finalize or
+ * when new vCPU is hotplugged. In the later old could be released
+ * for the newly created object for the same vCPU?
+ */
+if (!qemu_enabled_cpu(cpu)) {
+CPUArchId *cpu_slot;
+cpu_slot = virt_find_cpu_slot(ms, cpu->cpu_index);
+cpu_slot->cpu = NULL;
+object_unref(OBJECT(cpu));
+}
+}
+}
 }
 
 static void virt_cpu_set_properties(Object *cpuobj, const CPUArchId *cpu_slot,
-- 
2.34.1




[PATCH RFC V2 22/37] hw/acpi: Make _MAT method optional

2023-09-26 Thread Salil Mehta via
From: Jean-Philippe Brucker 

The GICC interface on arm64 vCPUs is statically defined in the MADT, and
doesn't require a _MAT entry. Although the GICC is indicated as present
by the MADT entry, it can only be used from vCPU sysregs, which aren't
accessible until hot-add.

Co-developed-by: Jean-Philippe Brucker 
Signed-off-by: Jean-Philippe Brucker 
Co-developed-by: Jonathan Cameron 
Signed-off-by: Jonathan Cameron 
Signed-off-by: Salil Mehta 
---
 hw/acpi/cpu.c | 12 +++-
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c
index e1299696d3..217db99538 100644
--- a/hw/acpi/cpu.c
+++ b/hw/acpi/cpu.c
@@ -715,11 +715,13 @@ void build_cpus_aml(Aml *table, MachineState *machine, 
CPUHotplugFeatures opts,
 aml_append(dev, method);
 
 /* build _MAT object */
-assert(adevc && adevc->madt_cpu);
-adevc->madt_cpu(i, arch_ids, madt_buf,
-true); /* set enabled flag */
-aml_append(dev, aml_name_decl("_MAT",
-aml_buffer(madt_buf->len, (uint8_t *)madt_buf->data)));
+if (adevc && adevc->madt_cpu) {
+assert(adevc && adevc->madt_cpu);
+adevc->madt_cpu(i, arch_ids, madt_buf,
+true); /* set enabled flag */
+aml_append(dev, aml_name_decl("_MAT",
+aml_buffer(madt_buf->len, (uint8_t *)madt_buf->data)));
+}
 g_array_free(madt_buf, true);
 
 if (CPU(arch_ids->cpus[i].cpu) != first_cpu) {
-- 
2.34.1




[PATCH RFC V2 24/37] hw/acpi: Update ACPI GED framework to support vCPU Hotplug

2023-09-26 Thread Salil Mehta via
ACPI GED shall be used to convey to the guest kernel about any CPU hot-(un)plug
events. Therefore, existing ACPI GED framework inside QEMU needs to be enhanced
to support CPU hotplug state and events.

Co-developed-by: Salil Mehta 
Signed-off-by: Salil Mehta 
Co-developed-by: Keqian Zhu 
Signed-off-by: Keqian Zhu 
Signed-off-by: Salil Mehta 
---
 hw/acpi/generic_event_device.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c
index ad252e6a91..0266733a54 100644
--- a/hw/acpi/generic_event_device.c
+++ b/hw/acpi/generic_event_device.c
@@ -12,6 +12,7 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "hw/acpi/acpi.h"
+#include "hw/acpi/cpu.h"
 #include "hw/acpi/generic_event_device.h"
 #include "hw/irq.h"
 #include "hw/mem/pc-dimm.h"
@@ -239,6 +240,8 @@ static void acpi_ged_device_plug_cb(HotplugHandler 
*hotplug_dev,
 } else {
 acpi_memory_plug_cb(hotplug_dev, &s->memhp_state, dev, errp);
 }
+} else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
+acpi_cpu_plug_cb(hotplug_dev, &s->cpuhp_state, dev, errp);
 } else {
 error_setg(errp, "virt: device plug request for unsupported device"
" type: %s", object_get_typename(OBJECT(dev)));
@@ -253,6 +256,8 @@ static void acpi_ged_unplug_request_cb(HotplugHandler 
*hotplug_dev,
 if ((object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) &&
!(object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM {
 acpi_memory_unplug_request_cb(hotplug_dev, &s->memhp_state, dev, errp);
+} else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
+acpi_cpu_unplug_request_cb(hotplug_dev, &s->cpuhp_state, dev, errp);
 } else {
 error_setg(errp, "acpi: device unplug request for unsupported device"
" type: %s", object_get_typename(OBJECT(dev)));
@@ -266,6 +271,8 @@ static void acpi_ged_unplug_cb(HotplugHandler *hotplug_dev,
 
 if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
 acpi_memory_unplug_cb(&s->memhp_state, dev, errp);
+} else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
+acpi_cpu_unplug_cb(&s->cpuhp_state, dev, errp);
 } else {
 error_setg(errp, "acpi: device unplug for unsupported device"
" type: %s", object_get_typename(OBJECT(dev)));
@@ -277,6 +284,7 @@ static void acpi_ged_ospm_status(AcpiDeviceIf *adev, 
ACPIOSTInfoList ***list)
 AcpiGedState *s = ACPI_GED(adev);
 
 acpi_memory_ospm_status(&s->memhp_state, list);
+acpi_cpu_ospm_status(&s->cpuhp_state, list);
 }
 
 static void acpi_ged_send_event(AcpiDeviceIf *adev, AcpiEventStatusBits ev)
@@ -291,6 +299,8 @@ static void acpi_ged_send_event(AcpiDeviceIf *adev, 
AcpiEventStatusBits ev)
 sel = ACPI_GED_PWR_DOWN_EVT;
 } else if (ev & ACPI_NVDIMM_HOTPLUG_STATUS) {
 sel = ACPI_GED_NVDIMM_HOTPLUG_EVT;
+} else if (ev & ACPI_CPU_HOTPLUG_STATUS) {
+sel = ACPI_GED_CPU_HOTPLUG_EVT;
 } else {
 /* Unknown event. Return without generating interrupt. */
 warn_report("GED: Unsupported event %d. No irq injected", ev);
-- 
2.34.1




[PATCH RFC V2 25/37] arm/virt: Add/update basic hot-(un)plug framework

2023-09-26 Thread Salil Mehta via
Add CPU hot-unplug hooks and update hotplug hooks with additional sanity checks
for use in hotplug paths.

Note, Functional contents of the hooks(now left with TODO comment) shall be
gradually filled in the subsequent patches in an incremental approach to patch
and logic building which would be roughly as follows:
1. (Un-)wiring of interrupts between vCPU<->GIC
2. Sending events to Guest for hot-(un)plug so that guest can take appropriate
   actions.
3. Notifying GIC about hot-(un)plug action so that vCPU could be (un-)stitched
   to the GIC CPU interface.
4. Updating the Guest with Next boot info for this vCPU in the firmware.

Co-developed-by: Salil Mehta 
Signed-off-by: Salil Mehta 
Co-developed-by: Keqian Zhu 
Signed-off-by: Keqian Zhu 
Signed-off-by: Salil Mehta 
---
 hw/arm/virt.c | 104 ++
 1 file changed, 104 insertions(+)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 3b068534a8..dce02136cb 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -81,6 +81,7 @@
 #include "hw/virtio/virtio-iommu.h"
 #include "hw/char/pl011.h"
 #include "qemu/guest-random.h"
+#include "qapi/qmp/qdict.h"
 
 #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \
 static void virt_##major##_##minor##_class_init(ObjectClass *oc, \
@@ -2985,12 +2986,23 @@ static void virt_cpu_pre_plug(HotplugHandler 
*hotplug_dev, DeviceState *dev,
 {
 VirtMachineState *vms = VIRT_MACHINE(hotplug_dev);
 MachineState *ms = MACHINE(hotplug_dev);
+MachineClass *mc = MACHINE_GET_CLASS(ms);
 ARMCPU *cpu = ARM_CPU(dev);
 CPUState *cs = CPU(dev);
 CPUArchId *cpu_slot;
 int32_t min_cpuid = 0;
 int32_t max_cpuid;
 
+if (dev->hotplugged && !vms->acpi_dev) {
+error_setg(errp, "GED acpi device does not exists");
+return;
+}
+
+if (dev->hotplugged && !mc->has_hotpluggable_cpus) {
+error_setg(errp, "CPU hotplug not supported on this machine");
+return;
+}
+
 /* sanity check the cpu */
 if (!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) {
 error_setg(errp, "Invalid CPU type, expected cpu type: '%s'",
@@ -3039,6 +3051,22 @@ static void virt_cpu_pre_plug(HotplugHandler 
*hotplug_dev, DeviceState *dev,
 }
 virt_cpu_set_properties(OBJECT(cs), cpu_slot, errp);
 
+/*
+ * Fix the GIC for this new vCPU being plugged. The QOM CPU object for the
+ * new vCPU need to be updated in the corresponding QOM GICv3CPUState 
object
+ * We also need to re-wire the IRQs for this new CPU object. This update
+ * is limited to the QOM only and does not affects the KVM. Later has
+ * already been pre-sized with possible CPU at VM init time. This is a
+ * workaround to the constraints posed by ARM architecture w.r.t supporting
+ * CPU Hotplug. Specification does not exist for the later.
+ * This patch-up is required both for {cold,hot}-plugged vCPUs. Cold-inited
+ * vCPUs have their GIC state initialized during machvit_init().
+ */
+if (vms->acpi_dev) {
+/* TODO: update GIC about this hotplug change here */
+/* TODO: wire the GIC<->CPU irqs */
+}
+
 /*
  * To give persistent presence view of vCPUs to the guest, ACPI might need
  * to fake the presence of the vCPUs to the guest but keep them disabled.
@@ -3050,6 +3078,7 @@ static void virt_cpu_pre_plug(HotplugHandler 
*hotplug_dev, DeviceState *dev,
 static void virt_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
   Error **errp)
 {
+VirtMachineState *vms = VIRT_MACHINE(hotplug_dev);
 MachineState *ms = MACHINE(hotplug_dev);
 CPUState *cs = CPU(dev);
 CPUArchId *cpu_slot;
@@ -3058,10 +3087,81 @@ static void virt_cpu_plug(HotplugHandler *hotplug_dev, 
DeviceState *dev,
 cpu_slot = virt_find_cpu_slot(ms, cs->cpu_index);
 cpu_slot->cpu = OBJECT(dev);
 
+/*
+ * Update the ACPI Hotplug state both for vCPUs being {hot,cold}-plugged.
+ * vCPUs can be cold-plugged using '-device' option. For vCPUs being hot
+ * plugged, guest is also notified.
+ */
+if (vms->acpi_dev) {
+/* TODO: update acpi hotplug state. Send cpu hotplug event to guest */
+/* TODO: register cpu for reset & update F/W info for the next boot */
+}
+
 cs->disabled = false;
 return;
 }
 
+static void virt_cpu_unplug_request(HotplugHandler *hotplug_dev,
+DeviceState *dev, Error **errp)
+{
+MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
+VirtMachineState *vms = VIRT_MACHINE(hotplug_dev);
+ARMCPU *cpu = ARM_CPU(dev);
+CPUState *cs = CPU(dev);
+
+if (!vms->acpi_dev || !dev->realized) {
+error_setg(errp, "GED does not exists or device is not realized!");
+return;
+}
+
+if (!mc->has_hotpluggable_cpus) {
+error_setg(errp, "CPU hot(un)plug not supported on this machine");
+return;
+}
+
+if (cs->cpu_index == first_cpu->cpu_index

Re: [PATCH 50/52] migration/rdma: Silence qemu_rdma_cleanup()

2023-09-26 Thread Zhijian Li (Fujitsu)


On 18/09/2023 22:42, Markus Armbruster wrote:
> Functions that use an Error **errp parameter to return errors should
> not also report them to the user, because reporting is the caller's
> job.  When the caller does, the error is reported twice.  When it
> doesn't (because it recovered from the error), there is no error to
> report, i.e. the report is bogus.
> 
> qemu_rdma_source_init(), qemu_rdma_connect(),
> rdma_start_incoming_migration(), and rdma_start_outgoing_migration()
> violate this principle: they call error_report() via
> qemu_rdma_cleanup().
> 
> Moreover, qemu_rdma_cleanup() can't fail.  It is called on error
> paths, and QIOChannel close and finalization.  Are the conditions it
> reports really errors?  I doubt it.

I'm not very sure, it's fine if it's call from the error path. but when
the caller is migration_cancle from HMP/QMP, shall we report something more
though we know QEMU can recover.

maybe change to warning etc...




> 
> Clean this up: silence qemu_rdma_cleanup().  I believe that's fine for
> all these callers.  If it isn't, we need to convert to Error instead.
> 
> Signed-off-by: Markus Armbruster 
> ---
>   migration/rdma.c | 6 +-
>   1 file changed, 1 insertion(+), 5 deletions(-)
> 
> diff --git a/migration/rdma.c b/migration/rdma.c
> index d9f80ef390..be2db7946d 100644
> --- a/migration/rdma.c
> +++ b/migration/rdma.c
> @@ -2330,7 +2330,6 @@ static int qemu_rdma_write(QEMUFile *f, RDMAContext 
> *rdma,
>   
>   static void qemu_rdma_cleanup(RDMAContext *rdma)
>   {
> -Error *err = NULL;
>   int idx;
>   
>   if (rdma->cm_id && rdma->connected) {
> @@ -2341,10 +2340,7 @@ static void qemu_rdma_cleanup(RDMAContext *rdma)
>  .type = RDMA_CONTROL_ERROR,
>  .repeat = 1,
>};
> -error_report("Early error. Sending error.");
> -if (qemu_rdma_post_send_control(rdma, NULL, &head, &err) < 0) {
> -error_report_err(err);
> -}
> +qemu_rdma_post_send_control(rdma, NULL, &head, NULL);
>   }
>   
>   rdma_disconnect(rdma->cm_id);

[PATCH RFC V2 26/37] arm/virt: Changes to (un)wire GICC<->vCPU IRQs during hot-(un)plug

2023-09-26 Thread Salil Mehta via
Refactors the existing GIC create code to extract common code to wire the
vcpu<->gic interrupts. This function could be used with cold-plug case and also
used when vCPU is hot-plugged. It also introduces a new function to unwire the
vcpu<->gic interrupts for the vCPU hot-unplug cases.

Co-developed-by: Salil Mehta 
Signed-off-by: Salil Mehta 
Co-developed-by: Keqian Zhu 
Signed-off-by: Keqian Zhu 
Signed-off-by: Salil Mehta 
---
 hw/arm/virt.c  | 139 -
 hw/core/gpio.c |   2 +-
 include/hw/qdev-core.h |   2 +
 3 files changed, 99 insertions(+), 44 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index dce02136cb..5b829e47b7 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -714,6 +714,99 @@ static void create_v2m(VirtMachineState *vms)
 vms->msi_controller = VIRT_MSI_CTRL_GICV2M;
 }
 
+/*
+ * Mapping from the output timer irq lines from the CPU to the GIC PPI inputs
+ * we use for the virt board.
+ */
+const int timer_irq[] = {
+[GTIMER_PHYS] = ARCH_TIMER_NS_EL1_IRQ,
+[GTIMER_VIRT] = ARCH_TIMER_VIRT_IRQ,
+[GTIMER_HYP]  = ARCH_TIMER_NS_EL2_IRQ,
+[GTIMER_SEC]  = ARCH_TIMER_S_EL1_IRQ,
+};
+
+static void unwire_gic_cpu_irqs(VirtMachineState *vms, CPUState *cs)
+{
+MachineState *ms = MACHINE(vms);
+unsigned int max_cpus = ms->smp.max_cpus;
+DeviceState *cpudev = DEVICE(cs);
+DeviceState *gicdev = vms->gic;
+int cpu = CPU(cs)->cpu_index;
+int type = vms->gic_version;
+int irq;
+
+for (irq = 0; irq < ARRAY_SIZE(timer_irq); irq++) {
+qdev_disconnect_gpio_out_named(cpudev, NULL, irq);
+}
+
+if (type != VIRT_GIC_VERSION_2) {
+qdev_disconnect_gpio_out_named(cpudev, "gicv3-maintenance-interrupt",
+   0);
+} else if (vms->virt) {
+qdev_disconnect_gpio_out_named(gicdev, SYSBUS_DEVICE_GPIO_IRQ,
+   cpu + 4 * max_cpus);
+}
+
+/*
+ * RFC: Question: This currently does not takes care of intimating the
+ * devices which might be sitting on system bus. Do we need a
+ * sysbus_disconnect_irq() which also does the job of notification beside
+ * disconnection?
+ */
+qdev_disconnect_gpio_out_named(cpudev, "pmu-interrupt", 0);
+qdev_disconnect_gpio_out_named(gicdev, SYSBUS_DEVICE_GPIO_IRQ, cpu);
+qdev_disconnect_gpio_out_named(gicdev,
+   SYSBUS_DEVICE_GPIO_IRQ, cpu + max_cpus);
+qdev_disconnect_gpio_out_named(gicdev, SYSBUS_DEVICE_GPIO_IRQ,
+   cpu + 2 * max_cpus);
+qdev_disconnect_gpio_out_named(gicdev, SYSBUS_DEVICE_GPIO_IRQ,
+   cpu + 3 * max_cpus);
+}
+
+static void wire_gic_cpu_irqs(VirtMachineState *vms, CPUState *cs)
+{
+MachineState *ms = MACHINE(vms);
+unsigned int max_cpus = ms->smp.max_cpus;
+DeviceState *cpudev = DEVICE(cs);
+DeviceState *gicdev = vms->gic;
+int cpu = CPU(cs)->cpu_index;
+int type = vms->gic_version;
+SysBusDevice *gicbusdev;
+int ppibase;
+int irq;
+
+ppibase = NUM_IRQS + cpu * GIC_INTERNAL + GIC_NR_SGIS;
+
+for (irq = 0; irq < ARRAY_SIZE(timer_irq); irq++) {
+qdev_connect_gpio_out(cpudev, irq,
+  qdev_get_gpio_in(gicdev,
+   ppibase + timer_irq[irq]));
+}
+
+gicbusdev = SYS_BUS_DEVICE(gicdev);
+if (type != VIRT_GIC_VERSION_2) {
+qemu_irq irq = qdev_get_gpio_in(gicdev,
+ppibase + ARCH_GIC_MAINT_IRQ);
+qdev_connect_gpio_out_named(cpudev, "gicv3-maintenance-interrupt",
+0, irq);
+} else if (vms->virt) {
+qemu_irq irq = qdev_get_gpio_in(gicdev,
+ppibase + ARCH_GIC_MAINT_IRQ);
+sysbus_connect_irq(gicbusdev, cpu + 4 * max_cpus, irq);
+}
+
+qdev_connect_gpio_out_named(cpudev, "pmu-interrupt", 0,
+qdev_get_gpio_in(gicdev,
+ ppibase + VIRTUAL_PMU_IRQ));
+sysbus_connect_irq(gicbusdev, cpu, qdev_get_gpio_in(cpudev, ARM_CPU_IRQ));
+sysbus_connect_irq(gicbusdev, cpu + max_cpus,
+   qdev_get_gpio_in(cpudev, ARM_CPU_FIQ));
+sysbus_connect_irq(gicbusdev, cpu + 2 * max_cpus,
+   qdev_get_gpio_in(cpudev, ARM_CPU_VIRQ));
+sysbus_connect_irq(gicbusdev, cpu + 3 * max_cpus,
+   qdev_get_gpio_in(cpudev, ARM_CPU_VFIQ));
+}
+
 static void create_gic(VirtMachineState *vms, MemoryRegion *mem)
 {
 MachineState *ms = MACHINE(vms);
@@ -809,47 +902,7 @@ static void create_gic(VirtMachineState *vms, MemoryRegion 
*mem)
  * and the GIC's IRQ/FIQ/VIRQ/VFIQ interrupt outputs to the CPU's inputs.
  */
 for (i = 0; i < smp_cpus; i++) {
-DeviceState *cpudev = DEVICE(qemu_get_cpu(i));
-int ppibase = NUM

[PATCH RFC V2 27/37] hw/arm, gicv3: Changes to update GIC with vCPU hot-plug notification

2023-09-26 Thread Salil Mehta via
vCPU hot-(un)plug events MUST be notified to the GIC. Introduce a notfication
mechanism to update any such events to GIC so that it can update its vCPU to GIC
CPU interface association.

This is required to implement a workaround to the limitations posed by the ARM
architecture. For details about the constraints and workarounds please check
below slides:

Link: https://kvm-forum.qemu.org/2023/talk/9SMPDQ/

Signed-off-by: Salil Mehta 
Co-developed-by: Keqian Zhu 
Signed-off-by: Keqian Zhu 
Signed-off-by: Salil Mehta 
---
 hw/arm/virt.c  | 27 +--
 hw/intc/arm_gicv3_common.c | 54 +-
 hw/intc/arm_gicv3_cpuif_common.c   |  5 +++
 hw/intc/gicv3_internal.h   |  1 +
 include/hw/arm/virt.h  |  1 +
 include/hw/intc/arm_gicv3_common.h | 22 
 6 files changed, 107 insertions(+), 3 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 5b829e47b7..b447e86fb6 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -666,6 +666,16 @@ static inline DeviceState 
*create_acpi_ged(VirtMachineState *vms)
 return dev;
 }
 
+static void virt_add_gic_cpuhp_notifier(VirtMachineState *vms)
+{
+MachineClass *mc = MACHINE_GET_CLASS(vms);
+
+if (mc->has_hotpluggable_cpus) {
+Notifier *cpuhp_notifier = gicv3_cpuhp_notifier(vms->gic);
+notifier_list_add(&vms->cpuhp_notifiers, cpuhp_notifier);
+}
+}
+
 static void create_its(VirtMachineState *vms)
 {
 const char *itsclass = its_class_name();
@@ -912,6 +922,9 @@ static void create_gic(VirtMachineState *vms, MemoryRegion 
*mem)
 } else if (vms->gic_version == VIRT_GIC_VERSION_2) {
 create_v2m(vms);
 }
+
+/* add GIC CPU hot(un)plug update notifier */
+virt_add_gic_cpuhp_notifier(vms);
 }
 
 static void create_uart(const VirtMachineState *vms, int uart,
@@ -2384,6 +2397,8 @@ static void machvirt_init(MachineState *machine)
 
 create_fdt(vms);
 
+notifier_list_init(&vms->cpuhp_notifiers);
+possible_cpus = mc->possible_cpu_arch_ids(machine);
 assert(possible_cpus->len == max_cpus);
 for (n = 0; n < possible_cpus->len; n++) {
 Object *cpuobj;
@@ -3034,6 +3049,14 @@ static void virt_memory_plug(HotplugHandler *hotplug_dev,
  dev, &error_abort);
 }
 
+static void virt_update_gic(VirtMachineState *vms, CPUState *cs)
+{
+GICv3CPUHotplugInfo gic_info = { .gic = vms->gic, .cpu = cs };
+
+/* notify gic to stitch GICC to this new cpu */
+notifier_list_notify(&vms->cpuhp_notifiers, &gic_info);
+}
+
 static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
   Error **errp)
 {
@@ -3116,7 +3139,7 @@ static void virt_cpu_pre_plug(HotplugHandler 
*hotplug_dev, DeviceState *dev,
  * vCPUs have their GIC state initialized during machvit_init().
  */
 if (vms->acpi_dev) {
-/* TODO: update GIC about this hotplug change here */
+virt_update_gic(vms, cs);
 wire_gic_cpu_irqs(vms, cs);
 }
 
@@ -3202,7 +3225,7 @@ static void virt_cpu_unplug(HotplugHandler *hotplug_dev, 
DeviceState *dev,
 /* TODO: update the acpi cpu hotplug state for cpu hot-unplug */
 
 unwire_gic_cpu_irqs(vms, cs);
-/* TODO: update the GIC about this hot unplug change */
+virt_update_gic(vms, cs);
 
 /* TODO: unregister cpu for reset & update F/W info for the next boot */
 
diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
index ebd99af610..fc87fa9369 100644
--- a/hw/intc/arm_gicv3_common.c
+++ b/hw/intc/arm_gicv3_common.c
@@ -33,7 +33,6 @@
 #include "hw/arm/linux-boot-if.h"
 #include "sysemu/kvm.h"
 
-
 static void gicv3_gicd_no_migration_shift_bug_post_load(GICv3State *cs)
 {
 if (cs->gicd_no_migration_shift_bug) {
@@ -322,6 +321,56 @@ void gicv3_init_irqs_and_mmio(GICv3State *s, 
qemu_irq_handler handler,
 }
 }
 
+static int arm_gicv3_get_proc_num(GICv3State *s, CPUState *cpu)
+{
+uint64_t mp_affinity;
+uint64_t gicr_typer;
+uint64_t cpu_affid;
+int i;
+
+mp_affinity = object_property_get_uint(OBJECT(cpu), "mp-affinity", NULL);
+/* match the cpu mp-affinity to get the gic cpuif number */
+for (i = 0; i < s->num_cpu; i++) {
+gicr_typer = s->cpu[i].gicr_typer;
+cpu_affid = (gicr_typer >> 32) & 0xFF;
+if (cpu_affid == mp_affinity) {
+return i;
+}
+}
+
+return -1;
+}
+
+static void arm_gicv3_cpu_update_notifier(Notifier *notifier, void * data)
+{
+GICv3CPUHotplugInfo *gic_info = (GICv3CPUHotplugInfo *)data;
+CPUState *cpu = gic_info->cpu;
+int gic_cpuif_num;
+GICv3State *s;
+
+s = ARM_GICV3_COMMON(gic_info->gic);
+
+/* this shall get us mapped gicv3 cpuif corresponding to mpidr */
+gic_cpuif_num = arm_gicv3_get_proc_num(s, cpu);
+if (gic_cpuif_num < 0) {
+error_report("Failed to associate cpu %d with any GIC cpuif",
+ cpu->cpu_index);
+abort();
+

[PATCH RFC V2 30/37] hw/arm: Changes required for reset and to support next boot

2023-09-26 Thread Salil Mehta via
Updates the firmware config with the next boot cpus information and also
registers the reset callback to be called when guest reboots to reset the cpu.

Co-developed-by: Salil Mehta 
Signed-off-by: Salil Mehta 
Co-developed-by: Keqian Zhu 
Signed-off-by: Keqian Zhu 
Signed-off-by: Salil Mehta 
---
 hw/arm/boot.c |  2 +-
 hw/arm/virt.c | 18 +++---
 include/hw/arm/boot.h |  2 ++
 include/hw/arm/virt.h |  1 +
 4 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index 720f22531a..2a2d27c20a 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -682,7 +682,7 @@ fail:
 return -1;
 }
 
-static void do_cpu_reset(void *opaque)
+void do_cpu_reset(void *opaque)
 {
 ARMCPU *cpu = opaque;
 CPUState *cs = CPU(cpu);
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 6f5ee4a1c6..e46f529801 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -45,6 +45,8 @@
 #include "sysemu/device_tree.h"
 #include "sysemu/numa.h"
 #include "sysemu/runstate.h"
+#include "sysemu/reset.h"
+#include "sysemu/sysemu.h"
 #include "sysemu/tpm.h"
 #include "sysemu/tcg.h"
 #include "sysemu/kvm.h"
@@ -1357,7 +1359,7 @@ static FWCfgState *create_fw_cfg(const VirtMachineState 
*vms, AddressSpace *as)
 char *nodename;
 
 fw_cfg = fw_cfg_init_mem_wide(base + 8, base, 8, base + 16, as);
-fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, (uint16_t)ms->smp.cpus);
+fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, vms->boot_cpus);
 
 nodename = g_strdup_printf("/fw-cfg@%" PRIx64, base);
 qemu_fdt_add_subnode(ms->fdt, nodename);
@@ -3177,7 +3179,13 @@ static void virt_cpu_plug(HotplugHandler *hotplug_dev, 
DeviceState *dev,
 if (local_err) {
 goto fail;
 }
-/* TODO: register cpu for reset & update F/W info for the next boot */
+/* register this cpu for reset & update F/W info for the next boot */
+qemu_register_reset(do_cpu_reset, ARM_CPU(cs));
+}
+
+vms->boot_cpus++;
+if (vms->fw_cfg) {
+fw_cfg_modify_i16(vms->fw_cfg, FW_CFG_NB_CPUS, vms->boot_cpus);
 }
 
 cs->disabled = false;
@@ -3252,7 +3260,11 @@ static void virt_cpu_unplug(HotplugHandler *hotplug_dev, 
DeviceState *dev,
 unwire_gic_cpu_irqs(vms, cs);
 virt_update_gic(vms, cs);
 
-/* TODO: unregister cpu for reset & update F/W info for the next boot */
+qemu_unregister_reset(do_cpu_reset, ARM_CPU(cs));
+vms->boot_cpus--;
+if (vms->fw_cfg) {
+fw_cfg_modify_i16(vms->fw_cfg, FW_CFG_NB_CPUS, vms->boot_cpus);
+}
 
 qobject_unref(dev->opts);
 dev->opts = NULL;
diff --git a/include/hw/arm/boot.h b/include/hw/arm/boot.h
index 80c492d742..f81326a1dc 100644
--- a/include/hw/arm/boot.h
+++ b/include/hw/arm/boot.h
@@ -178,6 +178,8 @@ AddressSpace *arm_boot_address_space(ARMCPU *cpu,
 int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo,
  hwaddr addr_limit, AddressSpace *as, MachineState *ms);
 
+void do_cpu_reset(void *opaque);
+
 /* Write a secure board setup routine with a dummy handler for SMCs */
 void arm_write_secure_board_setup_dummy_smc(ARMCPU *cpu,
 const struct arm_boot_info *info,
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index f9a748a5a9..a130fdad52 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -176,6 +176,7 @@ struct VirtMachineState {
 MemMapEntry *memmap;
 char *pciehb_nodename;
 const int *irqmap;
+uint16_t boot_cpus;
 int fdt_size;
 uint32_t clock_phandle;
 uint32_t gic_phandle;
-- 
2.34.1




Re: [PATCH 11/12] hw/core/loader: read_targphys(): add upper bound

2023-09-26 Thread Vladimir Sementsov-Ogievskiy

On 25.09.23 23:12, Michael Tokarev wrote:

25.09.2023 22:40, Vladimir Sementsov-Ogievskiy wrote:

Coverity doesn't like using "untrusted" values, coming from buffers and
fd-s as length to do IO and allocations. And that's make sense. The


"And that makes sense".  Just a nitpick in commit comment.


function is used three times with "untrusted" nbytes parameter. Let's
introduce at least empirical limit of 1G for it.

While being here make the function static, as it's used only here.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  hw/core/loader.c    | 13 ++---
  include/hw/loader.h |  2 --
  2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/hw/core/loader.c b/hw/core/loader.c
index aa02b27089..48cff6f59e 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -101,17 +101,24 @@ ssize_t load_image_size(const char *filename, void *addr, 
size_t size)
  return actsize < 0 ? -1 : l;
  }
+#define READ_TARGPHYS_MAX_BYTES (1024 * 1024 * 1024)
  /* read()-like version */
-ssize_t read_targphys(const char *name,
-  int fd, hwaddr dst_addr, size_t nbytes)
+static ssize_t read_targphys(const char *name,
+ int fd, hwaddr dst_addr, size_t nbytes)
  {
  uint8_t *buf;
  ssize_t did;
+    if (nbytes > READ_TARGPHYS_MAX_BYTES) {
+    return -1;


Right now this is not important, since the only user of this
function, load_aout(), ignores errno value and reports general
failure instead.  Original read_targphys() returned errno which
corresponds to failed read().


Agree, will fix to -EINVAL



FWIW, at least load_aout() assumes we've read whole struct exec
from the file in question, which might not be the case.



Hmm, right. Will fix too.

Thanks for reviewing!

--
Best regards,
Vladimir




[PATCH RFC V2 28/37] hw/intc/arm-gicv3*: Changes required to (re)init the vCPU register info

2023-09-26 Thread Salil Mehta via
vCPU register info needs to be re-initialized each time vCPU is hot-plugged.
This has to be done both for emulation/TCG and KVM case. This is done in
context to the GIC update notification for any vCPU hot-(un)plug events. This
change adds that support and re-factors existing to maximize the code re-use.

Co-developed-by: Salil Mehta 
Signed-off-by: Salil Mehta 
Co-developed-by: Keqian Zhu 
Signed-off-by: Keqian Zhu 
Signed-off-by: Salil Mehta 
---
 hw/intc/arm_gicv3.c|   1 +
 hw/intc/arm_gicv3_common.c |   7 +-
 hw/intc/arm_gicv3_cpuif.c  | 257 +++--
 hw/intc/arm_gicv3_kvm.c|   7 +-
 hw/intc/gicv3_internal.h   |   1 +
 include/hw/intc/arm_gicv3_common.h |   1 +
 6 files changed, 150 insertions(+), 124 deletions(-)

diff --git a/hw/intc/arm_gicv3.c b/hw/intc/arm_gicv3.c
index 0b8f79a122..e1c7c8c4bc 100644
--- a/hw/intc/arm_gicv3.c
+++ b/hw/intc/arm_gicv3.c
@@ -410,6 +410,7 @@ static void arm_gicv3_class_init(ObjectClass *klass, void 
*data)
 ARMGICv3Class *agc = ARM_GICV3_CLASS(klass);
 
 agcc->post_load = arm_gicv3_post_load;
+agcc->init_cpu_reginfo = gicv3_init_cpu_reginfo;
 device_class_set_parent_realize(dc, arm_gic_realize, &agc->parent_realize);
 }
 
diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
index fc87fa9369..d051024a30 100644
--- a/hw/intc/arm_gicv3_common.c
+++ b/hw/intc/arm_gicv3_common.c
@@ -345,10 +345,12 @@ static void arm_gicv3_cpu_update_notifier(Notifier 
*notifier, void * data)
 {
 GICv3CPUHotplugInfo *gic_info = (GICv3CPUHotplugInfo *)data;
 CPUState *cpu = gic_info->cpu;
+ARMGICv3CommonClass *c;
 int gic_cpuif_num;
 GICv3State *s;
 
 s = ARM_GICV3_COMMON(gic_info->gic);
+c = ARM_GICV3_COMMON_GET_CLASS(s);
 
 /* this shall get us mapped gicv3 cpuif corresponding to mpidr */
 gic_cpuif_num = arm_gicv3_get_proc_num(s, cpu);
@@ -368,7 +370,10 @@ static void arm_gicv3_cpu_update_notifier(Notifier 
*notifier, void * data)
 gicv3_set_gicv3state(cpu, &s->cpu[gic_cpuif_num]);
 gicv3_set_cpustate(&s->cpu[gic_cpuif_num], cpu);
 
-/* TODO: initialize the registers info for this newly added cpu */
+/* initialize the registers info for this newly added cpu */
+if (c->init_cpu_reginfo) {
+c->init_cpu_reginfo(cpu);
+}
 }
 
 static void arm_gicv3_common_realize(DeviceState *dev, Error **errp)
diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c
index 7b7a0fdb9c..70fc2cc858 100644
--- a/hw/intc/arm_gicv3_cpuif.c
+++ b/hw/intc/arm_gicv3_cpuif.c
@@ -2782,6 +2782,127 @@ static const ARMCPRegInfo 
gicv3_cpuif_ich_apxr23_reginfo[] = {
 },
 };
 
+void gicv3_init_cpu_reginfo(CPUState *cs)
+{
+ARMCPU *cpu = ARM_CPU(cs);
+GICv3CPUState *gcs = icc_cs_from_env(&cpu->env);
+
+/*
+ * If the CPU doesn't define a GICv3 configuration, probably because
+ * in real hardware it doesn't have one, then we use default values
+ * matching the one used by most Arm CPUs. This applies to:
+ *  cpu->gic_num_lrs
+ *  cpu->gic_vpribits
+ *  cpu->gic_vprebits
+ *  cpu->gic_pribits
+ */
+
+/*
+ * Note that we can't just use the GICv3CPUState as an opaque pointer
+ * in define_arm_cp_regs_with_opaque(), because when we're called back
+ * it might be with code translated by CPU 0 but run by CPU 1, in
+ * which case we'd get the wrong value.
+ * So instead we define the regs with no ri->opaque info, and
+ * get back to the GICv3CPUState from the CPUARMState.
+ */
+define_arm_cp_regs(cpu, gicv3_cpuif_reginfo);
+
+/*
+ * The CPU implementation specifies the number of supported
+ * bits of physical priority. For backwards compatibility
+ * of migration, we have a compat property that forces use
+ * of 8 priority bits regardless of what the CPU really has.
+ */
+if (gcs->gic->force_8bit_prio) {
+gcs->pribits = 8;
+} else {
+gcs->pribits = cpu->gic_pribits ?: 5;
+}
+
+/*
+ * The GICv3 has separate ID register fields for virtual priority
+ * and preemption bit values, but only a single ID register field
+ * for the physical priority bits. The preemption bit count is
+ * always the same as the priority bit count, except that 8 bits
+ * of priority means 7 preemption bits. We precalculate the
+ * preemption bits because it simplifies the code and makes the
+ * parallels between the virtual and physical bits of the GIC
+ * a bit clearer.
+ */
+gcs->prebits = gcs->pribits;
+if (gcs->prebits == 8) {
+gcs->prebits--;
+}
+/*
+ * Check that CPU code defining pribits didn't violate
+ * architectural constraints our implementation relies on.
+ */
+g_assert(gcs->pribits >= 4 && gcs->pribits <= 8);
+
+/*
+ * gicv3_cpuif_reginfo[] defines ICC_AP*R0_EL1; add definitions
+ * for ICC_AP*R{1,2,3}_EL1 if the prebits value requires them.
+

[PATCH RFC V2 29/37] arm/virt: Update the guest(via GED) about CPU hot-(un)plug events

2023-09-26 Thread Salil Mehta via
During any vCPU hot-(un)plug, running guest VM needs to be intimated about the
new vCPU being added or request the deletion of the vCPU which is already part
of the guest VM. This is done using the ACPI GED event which eventually gets
demultiplexed to a CPU hotplug event and further to specific hot-(un)plug event
of a particular vCPU.

This change adds the ACPI calls to the existing hot-(un)plug hooks to trigger
ACPI GED events from QEMU to guest VM.

Co-developed-by: Salil Mehta 
Signed-off-by: Salil Mehta 
Co-developed-by: Keqian Zhu 
Signed-off-by: Keqian Zhu 
Signed-off-by: Salil Mehta 
---
 hw/arm/virt.c | 33 ++---
 1 file changed, 30 insertions(+), 3 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index b447e86fb6..6f5ee4a1c6 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -3157,6 +3157,7 @@ static void virt_cpu_plug(HotplugHandler *hotplug_dev, 
DeviceState *dev,
 VirtMachineState *vms = VIRT_MACHINE(hotplug_dev);
 MachineState *ms = MACHINE(hotplug_dev);
 CPUState *cs = CPU(dev);
+Error *local_err = NULL;
 CPUArchId *cpu_slot;
 
 /* insert the cold/hot-plugged vcpu in the slot */
@@ -3169,12 +3170,20 @@ static void virt_cpu_plug(HotplugHandler *hotplug_dev, 
DeviceState *dev,
  * plugged, guest is also notified.
  */
 if (vms->acpi_dev) {
-/* TODO: update acpi hotplug state. Send cpu hotplug event to guest */
+HotplugHandlerClass *hhc;
+/* update acpi hotplug state and send cpu hotplug event to guest */
+hhc = HOTPLUG_HANDLER_GET_CLASS(vms->acpi_dev);
+hhc->plug(HOTPLUG_HANDLER(vms->acpi_dev), dev, &local_err);
+if (local_err) {
+goto fail;
+}
 /* TODO: register cpu for reset & update F/W info for the next boot */
 }
 
 cs->disabled = false;
 return;
+fail:
+error_propagate(errp, local_err);
 }
 
 static void virt_cpu_unplug_request(HotplugHandler *hotplug_dev,
@@ -3182,8 +3191,10 @@ static void virt_cpu_unplug_request(HotplugHandler 
*hotplug_dev,
 {
 MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
 VirtMachineState *vms = VIRT_MACHINE(hotplug_dev);
+HotplugHandlerClass *hhc;
 ARMCPU *cpu = ARM_CPU(dev);
 CPUState *cs = CPU(dev);
+Error *local_err = NULL;
 
 if (!vms->acpi_dev || !dev->realized) {
 error_setg(errp, "GED does not exists or device is not realized!");
@@ -3202,9 +3213,16 @@ static void virt_cpu_unplug_request(HotplugHandler 
*hotplug_dev,
 return;
 }
 
-/* TODO: request cpu hotplug from guest */
+/* request cpu hotplug from guest */
+hhc = HOTPLUG_HANDLER_GET_CLASS(vms->acpi_dev);
+hhc->unplug_request(HOTPLUG_HANDLER(vms->acpi_dev), dev, &local_err);
+if (local_err) {
+goto fail;
+}
 
 return;
+fail:
+error_propagate(errp, local_err);
 }
 
 static void virt_cpu_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
@@ -3212,7 +3230,9 @@ static void virt_cpu_unplug(HotplugHandler *hotplug_dev, 
DeviceState *dev,
 {
 VirtMachineState *vms = VIRT_MACHINE(hotplug_dev);
 MachineState *ms = MACHINE(hotplug_dev);
+HotplugHandlerClass *hhc;
 CPUState *cs = CPU(dev);
+Error *local_err = NULL;
 CPUArchId *cpu_slot;
 
 if (!vms->acpi_dev || !dev->realized) {
@@ -3222,7 +3242,12 @@ static void virt_cpu_unplug(HotplugHandler *hotplug_dev, 
DeviceState *dev,
 
 cpu_slot = virt_find_cpu_slot(ms, cs->cpu_index);
 
-/* TODO: update the acpi cpu hotplug state for cpu hot-unplug */
+/* update the acpi cpu hotplug state for cpu hot-unplug */
+hhc = HOTPLUG_HANDLER_GET_CLASS(vms->acpi_dev);
+hhc->unplug(HOTPLUG_HANDLER(vms->acpi_dev), dev, &local_err);
+if (local_err) {
+goto fail;
+}
 
 unwire_gic_cpu_irqs(vms, cs);
 virt_update_gic(vms, cs);
@@ -3236,6 +3261,8 @@ static void virt_cpu_unplug(HotplugHandler *hotplug_dev, 
DeviceState *dev,
 cs->disabled = true;
 
 return;
+fail:
+error_propagate(errp, local_err);
 }
 
 static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
-- 
2.34.1




[PATCH RFC V2 31/37] physmem, gdbstub: Common helping funcs/changes to *unrealize* vCPU

2023-09-26 Thread Salil Mehta via
Supporting vCPU Hotplug for ARM arch also means introducing new functionality of
unrealizing the ARMCPU. This requires some new common functions.

Defining them as part of architecture independent change so that this code could
be reused by other interested parties.

Signed-off-by: Salil Mehta 
---
 gdbstub/gdbstub.c | 13 +
 include/exec/cpu-common.h |  8 
 include/exec/gdbstub.h|  1 +
 include/hw/core/cpu.h |  1 +
 softmmu/physmem.c | 25 +
 5 files changed, 48 insertions(+)

diff --git a/gdbstub/gdbstub.c b/gdbstub/gdbstub.c
index 5f28d5cf57..ddbcb4f115 100644
--- a/gdbstub/gdbstub.c
+++ b/gdbstub/gdbstub.c
@@ -491,6 +491,19 @@ void gdb_register_coprocessor(CPUState *cpu,
 }
 }
 
+void gdb_unregister_coprocessor_all(CPUState *cpu)
+{
+GDBRegisterState *s, *p;
+
+p = cpu->gdb_regs;
+while (p) {
+s = p;
+p = p->next;
+g_free(s);
+}
+cpu->gdb_regs = NULL;
+}
+
 static void gdb_process_breakpoint_remove_all(GDBProcess *p)
 {
 CPUState *cpu = gdb_get_first_cpu_in_process(p);
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index 87dc9a752c..27cd4d32b1 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -120,6 +120,14 @@ size_t qemu_ram_pagesize_largest(void);
  */
 void cpu_address_space_init(CPUState *cpu, int asidx,
 const char *prefix, MemoryRegion *mr);
+/**
+ * cpu_address_space_destroy:
+ * @cpu: CPU for which address space needs to be destroyed
+ * @asidx: integer index of this address space
+ *
+ * Note that with KVM only one address space is supported.
+ */
+void cpu_address_space_destroy(CPUState *cpu, int asidx);
 
 void cpu_physical_memory_rw(hwaddr addr, void *buf,
 hwaddr len, bool is_write);
diff --git a/include/exec/gdbstub.h b/include/exec/gdbstub.h
index 7d743fe1e9..a22f0875e2 100644
--- a/include/exec/gdbstub.h
+++ b/include/exec/gdbstub.h
@@ -17,6 +17,7 @@ typedef int (*gdb_set_reg_cb)(CPUArchState *env, uint8_t 
*buf, int reg);
 void gdb_register_coprocessor(CPUState *cpu,
   gdb_get_reg_cb get_reg, gdb_set_reg_cb set_reg,
   int num_regs, const char *xml, int g_pos);
+void gdb_unregister_coprocessor_all(CPUState *cpu);
 
 /**
  * gdbserver_start: start the gdb server
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index dab572c9bd..ffd815a0d8 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -366,6 +366,7 @@ struct CPUState {
 QSIMPLEQ_HEAD(, qemu_work_item) work_list;
 
 CPUAddressSpace *cpu_ases;
+int cpu_ases_ref_count;
 int num_ases;
 AddressSpace *as;
 MemoryRegion *memory;
diff --git a/softmmu/physmem.c b/softmmu/physmem.c
index 3df73542e1..a93ae783af 100644
--- a/softmmu/physmem.c
+++ b/softmmu/physmem.c
@@ -762,6 +762,7 @@ void cpu_address_space_init(CPUState *cpu, int asidx,
 
 if (!cpu->cpu_ases) {
 cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
+cpu->cpu_ases_ref_count = cpu->num_ases;
 }
 
 newas = &cpu->cpu_ases[asidx];
@@ -775,6 +776,30 @@ void cpu_address_space_init(CPUState *cpu, int asidx,
 }
 }
 
+void cpu_address_space_destroy(CPUState *cpu, int asidx)
+{
+CPUAddressSpace *cpuas;
+
+assert(asidx < cpu->num_ases);
+assert(asidx == 0 || !kvm_enabled());
+assert(cpu->cpu_ases);
+
+cpuas = &cpu->cpu_ases[asidx];
+if (tcg_enabled()) {
+memory_listener_unregister(&cpuas->tcg_as_listener);
+}
+
+address_space_destroy(cpuas->as);
+g_free_rcu(cpuas->as, rcu);
+
+if (cpu->cpu_ases_ref_count == 1) {
+g_free(cpu->cpu_ases);
+cpu->cpu_ases = NULL;
+}
+
+cpu->cpu_ases_ref_count--;
+}
+
 AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
 {
 /* Return the AddressSpace corresponding to the specified index */
-- 
2.34.1




Re: [PATCH 24/52] migration/rdma: Return -1 instead of negative errno code

2023-09-26 Thread Zhijian Li (Fujitsu)


On 18/09/2023 22:41, Markus Armbruster wrote:
> Several functions return negative errno codes on failure.  Callers
> check for specific codes exactly never.  For some of the functions,
> callers couldn't check even if they wanted to, because the functions
> also return negative values that aren't errno codes, leaving readers
> confused on what the function actually returns.
> 
> Clean up and simplify: return -1 instead of negative errno code.
> 
> Signed-off-by: Markus Armbruster 


Reviewed-by: Li Zhijian 


> ---
>   migration/rdma.c | 44 ++--
>   1 file changed, 22 insertions(+), 22 deletions(-)
> 
> diff --git a/migration/rdma.c b/migration/rdma.c
> index efbb3c7754..d0af258468 100644
> --- a/migration/rdma.c
> +++ b/migration/rdma.c
> @@ -857,14 +857,14 @@ static int qemu_rdma_broken_ipv6_kernel(struct 
> ibv_context *verbs, Error **errp)
>   } else {
>   error_setg_errno(errp, errno,
>"could not open RDMA device context");
> -return -EINVAL;
> +return -1;
>   }
>   }
>   
>   if (ibv_query_port(verbs, 1, &port_attr)) {
>   ibv_close_device(verbs);
>   ERROR(errp, "Could not query initial IB port");
> -return -EINVAL;
> +return -1;
>   }
>   
>   if (port_attr.link_layer == IBV_LINK_LAYER_INFINIBAND) {
> @@ -889,7 +889,7 @@ static int qemu_rdma_broken_ipv6_kernel(struct 
> ibv_context *verbs, Error **errp)
>   ERROR(errp, "You only have RoCE / iWARP devices in your 
> systems"
>   " and your management software has specified 
> '[::]'"
>   ", but IPv6 over RoCE / iWARP is not supported 
> in Linux.");
> -return -ENONET;
> +return -1;
>   }
>   }
>   
> @@ -905,13 +905,13 @@ static int qemu_rdma_broken_ipv6_kernel(struct 
> ibv_context *verbs, Error **errp)
>   /* IB ports start with 1, not 0 */
>   if (ibv_query_port(verbs, 1, &port_attr)) {
>   ERROR(errp, "Could not query initial IB port");
> -return -EINVAL;
> +return -1;
>   }
>   
>   if (port_attr.link_layer == IBV_LINK_LAYER_ETHERNET) {
>   ERROR(errp, "Linux kernel's RoCE / iWARP does not support IPv6 "
>   "(but patches on linux-rdma in progress)");
> -return -ENONET;
> +return -1;
>   }
>   
>   #endif
> @@ -1409,7 +1409,7 @@ static int qemu_rdma_unregister_waiting(RDMAContext 
> *rdma)
>   
>   if (ret != 0) {
>   perror("unregistration chunk failed");
> -return -ret;
> +return -1;
>   }
>   rdma->total_registrations--;
>   
> @@ -1554,7 +1554,7 @@ static int qemu_rdma_wait_comp_channel(RDMAContext 
> *rdma,
>   if (ret) {
>   error_report("failed to get cm event while wait "
>"completion channel");
> -return -EPIPE;
> +return -1;
>   }
>   
>   error_report("receive cm event while wait comp channel,"
> @@ -1562,7 +1562,7 @@ static int qemu_rdma_wait_comp_channel(RDMAContext 
> *rdma,
>   if (cm_event->event == RDMA_CM_EVENT_DISCONNECTED ||
>   cm_event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) {
>   rdma_ack_cm_event(cm_event);
> -return -EPIPE;
> +return -1;
>   }
>   rdma_ack_cm_event(cm_event);
>   }
> @@ -1575,18 +1575,18 @@ static int qemu_rdma_wait_comp_channel(RDMAContext 
> *rdma,
> * I don't trust errno from qemu_poll_ns
>*/
>   error_report("%s: poll failed", __func__);
> -return -EPIPE;
> +return -1;
>   }
>   
>   if (migrate_get_current()->state == 
> MIGRATION_STATUS_CANCELLING) {
>   /* Bail out and let the cancellation happen */
> -return -EPIPE;
> +return -1;
>   }
>   }
>   }
>   
>   if (rdma->received_error) {
> -return -EPIPE;
> +return -1;
>   }
>   return -!!rdma->error_state;
>   }
> @@ -1751,7 +1751,7 @@ static int qemu_rdma_post_send_control(RDMAContext 
> *rdma, uint8_t *buf,
>   
>   if (ret > 0) {
>   error_report("Failed to use post IB SEND for control");
> -return -ret;
> +return -1;
>   }
>   
>   ret = qemu_rdma_block_for_wrid(rdma, RDMA_WRID_SEND_CONTROL, NULL);
> @@ -1820,15 +1820,15 @@ static int 
> qemu_rdma_exchange_get_response(RDMAContext *rdma,
>   if (head-

Re: [PATCH 25/52] migration/rdma: Dumb down remaining int error values to -1

2023-09-26 Thread Zhijian Li (Fujitsu)


On 18/09/2023 22:41, Markus Armbruster wrote:
> This is just to make the error value more obvious.  Callers don't
> mind.
> 
> Signed-off-by: Markus Armbruster 

Reviewed-by: Li Zhijian 

Re: [PATCH 26/52] migration/rdma: Replace int error_state by bool errored

2023-09-26 Thread Zhijian Li (Fujitsu)


On 25/09/2023 15:09, Markus Armbruster wrote:
> "Zhijian Li (Fujitsu)"  writes:
> 
>> On 18/09/2023 22:41, Markus Armbruster wrote:
>>> All we do with the value of RDMAContext member @error_state is test
>>> whether it's zero.  Change to bool and rename to @errored.
>>>
>>
>> make sense!
>>
>> Reviewed-by: Li Zhijian 
>>
>> Can we move this patch ahead "[PATCH 23/52] migration/rdma: Clean up 
>> qemu_rdma_wait_comp_channel()'s error value",
>> so that [23/52] [24/52] [25/52] will be more easy to review.
> 
> I think I could squash PATCH 23 into "[PATCH 25/52] migration/rdma: Dumb
> down remaining int error values to -1".  Would that work for you?

Yeah~, thank you


> 

Re: [PATCH 12/12] io/channel-socket: qio_channel_socket_flush(): improve msg validation

2023-09-26 Thread Vladimir Sementsov-Ogievskiy

On 26.09.23 12:04, Maksim Davydov wrote:

Could you add a comment into the commit message why ee_data must be
bigger than ee_info?


As I understand, in this case ee_data is lower bound and ee_info is upper bound 
of notification:

https://docs.kernel.org/networking/msg_zerocopy.html#notification-parsing

and the next line "sioc->zero_copy_sent += serr->ee_data - serr->ee_info + 1;" 
actually depends on it.

So, I'll add:

For SO_EE_ORIGIN_ZEROCOPY the 32-bit notification range is encoded
as [ee_info, ee_data] inclusively, so ee_info should be less or
equal to ee_data.



On 9/25/23 22:40, Vladimir Sementsov-Ogievskiy wrote:

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  io/channel-socket.c | 5 +
  1 file changed, 5 insertions(+)

diff --git a/io/channel-socket.c b/io/channel-socket.c
index 02ffb51e99..3a899b0608 100644
--- a/io/channel-socket.c
+++ b/io/channel-socket.c
@@ -782,6 +782,11 @@ static int qio_channel_socket_flush(QIOChannel *ioc,
   "Error not from zero copy");
  return -1;
  }
+    if (serr->ee_data < serr->ee_info) {
+    error_setg_errno(errp, serr->ee_origin,
+ "Wrong notification bounds");
+    return -1;
+    }
  /* No errors, count successfully finished sendmsg()*/
  sioc->zero_copy_sent += serr->ee_data - serr->ee_info + 1;




--
Best regards,
Vladimir




Re: [PATCH v4 13/14] MAINTAINERS: add maintainer of simpletrace.py

2023-09-26 Thread Mads Ynddal


> You're welcome to be the maintainer. I haven't touched it in years.
> 
> When you have reviewed future simpletrace.py patches you can either post
> your Reviewed-by and I'll include them in my tracing pull requests, or
> you could send pull requests to the qemu.git maintainer yourself
> (requires publishing a GPG key and signing pull request tags).
> 
> Please let me know which option you prefer.

For future patches, I would like to send pull requests myself with a GPG key to 
get the practice. I can have Klaus Jensen  sign my key to
get started.

—
Mads Ynddal




[PATCH] mailmap: Fix Andrey Drobyshev author email

2023-09-26 Thread andrey . drobyshev--- via
From: Andrey Drobyshev 

This fixes authorship of commits 2848289168, 52b10c9c0c as the mailing
list rewrote the "From:" field in the corresponding patches.  See commit
3bd2608db7 ("maint: Add .mailmap entries for patches claiming list
authorship") for explanation.

Signed-off-by: Andrey Drobyshev 
---
 .mailmap | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.mailmap b/.mailmap
index 64ef9f4de6..04a7feb005 100644
--- a/.mailmap
+++ b/.mailmap
@@ -46,6 +46,7 @@ Ian McKellar  Ian McKellar via Qemu-devel 
 Julia Suvorova via Qemu-devel 

 Justin Terry (VM)  Justin Terry (VM) via Qemu-devel 

 Stefan Weil  Stefan Weil via 
+Andrey Drobyshev  Andrey Drobyshev via 

 
 # Next, replace old addresses by a more recent one.
 Aleksandar Markovic  

-- 
2.41.0




Re: [PATCH 01/12] hw/core/loader: load_at(): check size

2023-09-26 Thread Peter Maydell
On Mon, 25 Sept 2023 at 20:41, Vladimir Sementsov-Ogievskiy
 wrote:
>
> This @size parameter often comes from fd. We'd better check it before
> doing read and allocation.
>
> Chose 1G as high enough empiric bound.

Empirical for who?

> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> ---
>  hw/core/loader.c | 17 -
>  1 file changed, 16 insertions(+), 1 deletion(-)
>
> diff --git a/hw/core/loader.c b/hw/core/loader.c
> index 4dd5a71fb7..4b67543046 100644
> --- a/hw/core/loader.c
> +++ b/hw/core/loader.c
> @@ -281,11 +281,26 @@ ssize_t load_aout(const char *filename, hwaddr addr, 
> int max_sz,
>
>  /* ELF loader */
>
> +#define ELF_LOAD_MAX (1024 * 1024 * 1024)
> +
>  static void *load_at(int fd, off_t offset, size_t size)
>  {
>  void *ptr;
> -if (lseek(fd, offset, SEEK_SET) < 0)
> +
> +/*
> + * We often come here with @size, which was previously read from file
> + * descriptor too. That's not good to read and allocate for unchecked
> + * number of bytes. Coverity also doesn't like it and generate problems.
> + * So, let's limit all load_at() calls to ELF_LOAD_MAX at least.
> + */
> +if (size > ELF_LOAD_MAX) {
>  return NULL;
> +}
> +
> +if (lseek(fd, offset, SEEK_SET) < 0) {
> +return NULL;
> +}
> +
>  ptr = g_malloc(size);
>  if (read(fd, ptr, size) != size) {
>  g_free(ptr);

This doesn't really help anything:
 (1) if the value is really big, it doesn't cause any terrible
consequences -- QEMU will just exit because the allocation
fails, which is fine because this will be at QEMU startup
and only happens if the user running QEMU gives us a silly file
 (2) we do a lot of other "allocate and abort on failure"
elsewhere in the ELF loader, for instance the allocations of
the symbol table and relocs in the load_symbols and
elf_reloc functions, and then on a bigger scale when we
work with the actual data in the ELF file

thanks
-- PMM



[PATCH v5 04/14] simpletrace: changed naming of edict and idtoname to improve readability

2023-09-26 Thread Mads Ynddal
From: Mads Ynddal 

Readability is subjective, but I've expanded the naming of the variables
and arguments, to help with understanding for new eyes on the code.

Signed-off-by: Mads Ynddal 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Stefan Hajnoczi 
---
 scripts/simpletrace.py | 34 +-
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/scripts/simpletrace.py b/scripts/simpletrace.py
index 283b5918a1..09511f624d 100755
--- a/scripts/simpletrace.py
+++ b/scripts/simpletrace.py
@@ -40,17 +40,17 @@ def read_header(fobj, hfmt):
 return None
 return struct.unpack(hfmt, hdr)
 
-def get_record(edict, idtoname, rechdr, fobj):
+def get_record(event_mapping, event_id_to_name, rechdr, fobj):
 """Deserialize a trace record from a file into a tuple
(name, timestamp, pid, arg1, ..., arg6)."""
 if rechdr is None:
 return None
 if rechdr[0] != dropped_event_id:
 event_id = rechdr[0]
-name = idtoname[event_id]
+name = event_id_to_name[event_id]
 rec = (name, rechdr[1], rechdr[3])
 try:
-event = edict[name]
+event = event_mapping[name]
 except KeyError as e:
 sys.stderr.write('%s event is logged but is not declared ' \
  'in the trace events file, try using ' \
@@ -79,10 +79,10 @@ def get_mapping(fobj):
 
 return (event_id, name)
 
-def read_record(edict, idtoname, fobj):
+def read_record(event_mapping, event_id_to_name, fobj):
 """Deserialize a trace record from a file into a tuple (event_num, 
timestamp, pid, arg1, ..., arg6)."""
 rechdr = read_header(fobj, rec_header_fmt)
-return get_record(edict, idtoname, rechdr, fobj)
+return get_record(event_mapping, event_id_to_name, rechdr, fobj)
 
 def read_trace_header(fobj):
 """Read and verify trace file header"""
@@ -103,14 +103,14 @@ def read_trace_header(fobj):
 raise ValueError('Log format %d not supported with this QEMU release!'
  % log_version)
 
-def read_trace_records(edict, idtoname, fobj):
+def read_trace_records(event_mapping, event_id_to_name, fobj):
 """Deserialize trace records from a file, yielding record tuples 
(event_num, timestamp, pid, arg1, ..., arg6).
 
-Note that `idtoname` is modified if the file contains mapping records.
+Note that `event_id_to_name` is modified if the file contains mapping 
records.
 
 Args:
-edict (str -> Event): events dict, indexed by name
-idtoname (int -> str): event names dict, indexed by event ID
+event_mapping (str -> Event): events dict, indexed by name
+event_id_to_name (int -> str): event names dict, indexed by event ID
 fobj (file): input file
 
 """
@@ -122,9 +122,9 @@ def read_trace_records(edict, idtoname, fobj):
 (rectype, ) = struct.unpack('=Q', t)
 if rectype == record_type_mapping:
 event_id, name = get_mapping(fobj)
-idtoname[event_id] = name
+event_id_to_name[event_id] = name
 else:
-rec = read_record(edict, idtoname, fobj)
+rec = read_record(event_mapping, event_id_to_name, fobj)
 
 yield rec
 
@@ -201,16 +201,16 @@ def process(events, log, analyzer, read_header=True):
 frameinfo = inspect.getframeinfo(inspect.currentframe())
 dropped_event = Event.build("Dropped_Event(uint64_t num_events_dropped)",
 frameinfo.lineno + 1, frameinfo.filename)
-edict = {"dropped": dropped_event}
-idtoname = {dropped_event_id: "dropped"}
+event_mapping = {"dropped": dropped_event}
+event_id_to_name = {dropped_event_id: "dropped"}
 
 for event in events_list:
-edict[event.name] = event
+event_mapping[event.name] = event
 
 # If there is no header assume event ID mapping matches events list
 if not read_header:
 for event_id, event in enumerate(events_list):
-idtoname[event_id] = event.name
+event_id_to_name[event_id] = event.name
 
 def build_fn(analyzer, event):
 if isinstance(event, str):
@@ -234,9 +234,9 @@ def build_fn(analyzer, event):
 
 analyzer.begin()
 fn_cache = {}
-for rec in read_trace_records(edict, idtoname, log):
+for rec in read_trace_records(event_mapping, event_id_to_name, log):
 event_num = rec[0]
-event = edict[event_num]
+event = event_mapping[event_num]
 if event_num not in fn_cache:
 fn_cache[event_num] = build_fn(analyzer, event)
 fn_cache[event_num](event, rec)
-- 
2.38.1




[PATCH v5 07/14] simpletrace: define exception and add handling

2023-09-26 Thread Mads Ynddal
From: Mads Ynddal 

Define `SimpleException` to differentiate our exceptions from generic
exceptions (IOError, etc.). Adapted simpletrace to support this and
output to stderr.

Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Mads Ynddal 
---
 scripts/simpletrace.py | 22 ++
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/scripts/simpletrace.py b/scripts/simpletrace.py
index 8aea0d169b..229b10aa99 100755
--- a/scripts/simpletrace.py
+++ b/scripts/simpletrace.py
@@ -32,12 +32,15 @@
 log_header_fmt = '=QQQ'
 rec_header_fmt = '=QQII'
 
+class SimpleException(Exception):
+pass
+
 def read_header(fobj, hfmt):
 '''Read a trace record header'''
 hlen = struct.calcsize(hfmt)
 hdr = fobj.read(hlen)
 if len(hdr) != hlen:
-raise ValueError('Error reading header. Wrong filetype provided?')
+raise SimpleException('Error reading header. Wrong filetype provided?')
 return struct.unpack(hfmt, hdr)
 
 def get_record(event_mapping, event_id_to_name, rechdr, fobj):
@@ -49,10 +52,10 @@ def get_record(event_mapping, event_id_to_name, rechdr, 
fobj):
 try:
 event = event_mapping[name]
 except KeyError as e:
-sys.stderr.write(f'{e} event is logged but is not declared ' \
- 'in the trace events file, try using ' \
- 'trace-events-all instead.\n')
-sys.exit(1)
+raise SimpleException(
+f'{e} event is logged but is not declared in the trace events'
+'file, try using trace-events-all instead.'
+)
 
 rec = (name, timestamp_ns, pid)
 for type, name in event.args:
@@ -247,8 +250,7 @@ def run(analyzer):
 *no_header, trace_event_path, trace_file_path = sys.argv[1:]
 assert no_header == [] or no_header == ['--no-header'], 'Invalid 
no-header argument'
 except (AssertionError, ValueError):
-sys.stderr.write(f'usage: {sys.argv[0]} [--no-header]  
\n')
-sys.exit(1)
+raise SimpleException(f'usage: {sys.argv[0]} [--no-header] 
 \n')
 
 with open(trace_event_path, 'r') as events_fobj, open(trace_file_path, 
'rb') as log_fobj:
 process(events_fobj, log_fobj, analyzer, read_header=not no_header)
@@ -276,4 +278,8 @@ def catchall(self, event, rec):
 i += 1
 print(' '.join(fields))
 
-run(Formatter())
+try:
+run(Formatter())
+except SimpleException as e:
+sys.stderr.write(str(e) + "\n")
+sys.exit(1)
-- 
2.38.1




[PATCH v5 02/14] simpletrace: annotate magic constants from QEMU code

2023-09-26 Thread Mads Ynddal
From: Mads Ynddal 

It wasn't clear where the constants and structs came from, so I added
comments to help.

Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Mads Ynddal 
---
 scripts/simpletrace.py | 5 +
 1 file changed, 5 insertions(+)

diff --git a/scripts/simpletrace.py b/scripts/simpletrace.py
index b221d9a241..5c230a1b74 100755
--- a/scripts/simpletrace.py
+++ b/scripts/simpletrace.py
@@ -16,6 +16,11 @@
 
 __all__ = ['Analyzer', 'process', 'run']
 
+# This is the binary format that the QEMU "simple" trace backend
+# emits. There is no specification documentation because the format is
+# not guaranteed to be stable. Trace files must be parsed with the
+# same trace-events-all file and the same simpletrace.py file that
+# QEMU was built with.
 header_event_id = 0x
 header_magic= 0xf2b177cb0aa429b4
 dropped_event_id = 0xfffe
-- 
2.38.1




[PATCH v5 00/14] simpletrace: refactor and general improvements

2023-09-26 Thread Mads Ynddal
From: Mads Ynddal 

I wanted to use simpletrace.py for an internal project, so I tried to update
and polish the code. Some of the commits resolve specific issues, while some
are more subjective.

I've tried to divide it into commits so we can discuss the
individual changes, and I'm ready to pull things out, if it isn't needed.

v5:
 * Picked Formatter2 over Formatter, as to not use the deprecated class
 * simpletrace-benchmark.zip deleted
 * Rebased with master
v4:
 * Added missing Analyzer2 to __all__
 * Rebased with master
v3:
 * Added __all__ with public interface
 * Added comment about magic numbers and structs from Stefan Hajnoczi
 * Reintroduced old interface for process, run and Analyzer
 * Added comment about Python 3.6 in ref. to getfullargspec
 * process now accepts events as file-like objects
 * Updated context-manager code for Analyzer
 * Moved logic of event processing to Analyzer class
 * Moved logic of process into _process function
 * Added new Analyzer2 class with kwarg event-processing
 * Reverted changes to process-call in scripts/analyse-locks-simpletrace.py
v2:
 * Added myself as maintainer of simpletrace.py
 * Improve docstring on `process`
 * Changed call to `process` in scripts/analyse-locks-simpletrace.py to reflect 
new argument types
 * Replaced `iteritems()` with `items()` in 
scripts/analyse-locks-simpletrace.py to support Python 3

Mads Ynddal (14):
  simpletrace: add __all__ to define public interface
  simpletrace: annotate magic constants from QEMU code
  simpletrace: improve parsing of sys.argv; fix files never closed.
  simpletrace: changed naming of edict and idtoname to improve
readability
  simpletrace: update code for Python 3.11
  simpletrace: improved error handling on struct unpack
  simpletrace: define exception and add handling
  simpletrace: made Analyzer into context-manager
  simpletrace: refactor to separate responsibilities
  simpletrace: move logic of process into internal function
  simpletrace: move event processing to Analyzer class
  simpletrace: added simplified Analyzer2 class
  MAINTAINERS: add maintainer of simpletrace.py
  scripts/analyse-locks-simpletrace.py: changed iteritems() to items()

 MAINTAINERS  |   6 +
 scripts/analyse-locks-simpletrace.py |   2 +-
 scripts/simpletrace.py   | 382 +--
 3 files changed, 246 insertions(+), 144 deletions(-)

-- 
2.38.1




[PATCH v5 14/14] scripts/analyse-locks-simpletrace.py: changed iteritems() to items()

2023-09-26 Thread Mads Ynddal
From: Mads Ynddal 

Python 3 removed `dict.iteritems()` in favor of `dict.items()`. This
means the script currently doesn't work on Python 3.

Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Mads Ynddal 
---
 scripts/analyse-locks-simpletrace.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/analyse-locks-simpletrace.py 
b/scripts/analyse-locks-simpletrace.py
index 63c11f4fce..d650dd7140 100755
--- a/scripts/analyse-locks-simpletrace.py
+++ b/scripts/analyse-locks-simpletrace.py
@@ -75,7 +75,7 @@ def get_args():
(analyser.locks, analyser.locked, analyser.unlocks))
 
 # Now dump the individual lock stats
-for key, val in sorted(analyser.mutex_records.iteritems(),
+for key, val in sorted(analyser.mutex_records.items(),
key=lambda k_v: k_v[1]["locks"]):
 print ("Lock: %#x locks: %d, locked: %d, unlocked: %d" %
(key, val["locks"], val["locked"], val["unlocked"]))
-- 
2.38.1




[PATCH v5 13/14] MAINTAINERS: add maintainer of simpletrace.py

2023-09-26 Thread Mads Ynddal
From: Mads Ynddal 

In my work to refactor simpletrace.py, I noticed that there's no
maintainer of it, and has the status of "odd fixes". I'm using it from
time to time, so I'd like to maintain the script.

I've added myself as reviewer under "Tracing" to be informed of changes
that might affect simpletrace.py.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Mads Ynddal 
---
 MAINTAINERS | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 355b1960ce..81625f036b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3170,6 +3170,7 @@ F: stubs/
 
 Tracing
 M: Stefan Hajnoczi 
+R: Mads Ynddal 
 S: Maintained
 F: trace/
 F: trace-events
@@ -3182,6 +3183,11 @@ F: docs/tools/qemu-trace-stap.rst
 F: docs/devel/tracing.rst
 T: git https://github.com/stefanha/qemu.git tracing
 
+Simpletrace
+M: Mads Ynddal 
+S: Maintained
+F: scripts/simpletrace.py
+
 TPM
 M: Stefan Berger 
 S: Maintained
-- 
2.38.1




[PATCH v5 11/14] simpletrace: move event processing to Analyzer class

2023-09-26 Thread Mads Ynddal
From: Mads Ynddal 

Moved event processing to the Analyzer class to separate specific analyzer
logic (like caching and function signatures) from the _process function.
This allows for new types of Analyzer-based subclasses without changing
the core code.

Note, that the fn_cache is important for performance in cases where the
analyzer is branching away from the catch-all a lot. The cache has no
measurable performance penalty.

Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Mads Ynddal 
---
 scripts/simpletrace.py | 60 +-
 1 file changed, 36 insertions(+), 24 deletions(-)

diff --git a/scripts/simpletrace.py b/scripts/simpletrace.py
index 6969fdd59a..4136d00600 100755
--- a/scripts/simpletrace.py
+++ b/scripts/simpletrace.py
@@ -169,6 +169,35 @@ def catchall(self, event, rec):
 """Called if no specific method for processing a trace event has been 
found."""
 pass
 
+def _build_fn(self, event):
+fn = getattr(self, event.name, None)
+if fn is None:
+# Return early to avoid costly call to inspect.getfullargspec
+return self.catchall
+
+event_argcount = len(event.args)
+fn_argcount = len(inspect.getfullargspec(fn)[0]) - 1
+if fn_argcount == event_argcount + 1:
+# Include timestamp as first argument
+return lambda _, rec: fn(*(rec[1:2] + rec[3:3 + event_argcount]))
+elif fn_argcount == event_argcount + 2:
+# Include timestamp and pid
+return lambda _, rec: fn(*rec[1:3 + event_argcount])
+else:
+# Just arguments, no timestamp or pid
+return lambda _, rec: fn(*rec[3:3 + event_argcount])
+
+def _process_event(self, rec_args, *, event, event_id, timestamp_ns, pid, 
**kwargs):
+if not hasattr(self, '_fn_cache'):
+# NOTE: Cannot depend on downstream subclasses to have
+# super().__init__() because of legacy.
+self._fn_cache = {}
+
+rec = (event_id, timestamp_ns, pid, *rec_args)
+if event_id not in self._fn_cache:
+self._fn_cache[event_id] = self._build_fn(event)
+self._fn_cache[event_id](event, rec)
+
 def end(self):
 """Called at the end of the trace."""
 pass
@@ -222,32 +251,15 @@ def _process(events, log_fobj, analyzer, 
read_header=True):
 if read_header:
 read_trace_header(log_fobj)
 
-def build_fn(analyzer, event):
-if isinstance(event, str):
-return analyzer.catchall
-
-fn = getattr(analyzer, event.name, None)
-if fn is None:
-return analyzer.catchall
-
-event_argcount = len(event.args)
-fn_argcount = len(inspect.getfullargspec(fn)[0]) - 1
-if fn_argcount == event_argcount + 1:
-# Include timestamp as first argument
-return lambda _, rec: fn(*(rec[1:2] + rec[3:3 + event_argcount]))
-elif fn_argcount == event_argcount + 2:
-# Include timestamp and pid
-return lambda _, rec: fn(*rec[1:3 + event_argcount])
-else:
-# Just arguments, no timestamp or pid
-return lambda _, rec: fn(*rec[3:3 + event_argcount])
-
 with analyzer:
-fn_cache = {}
 for event, event_id, timestamp_ns, record_pid, *rec_args in 
read_trace_records(events, log_fobj, read_header):
-if event_id not in fn_cache:
-fn_cache[event_id] = build_fn(analyzer, event)
-fn_cache[event_id](event, (event_id, timestamp_ns, record_pid, 
*rec_args))
+analyzer._process_event(
+rec_args,
+event=event,
+event_id=event_id,
+timestamp_ns=timestamp_ns,
+pid=record_pid,
+)
 
 def run(analyzer):
 """Execute an analyzer on a trace file given on the command-line.
-- 
2.38.1




[PATCH v5 03/14] simpletrace: improve parsing of sys.argv; fix files never closed.

2023-09-26 Thread Mads Ynddal
From: Mads Ynddal 

The arguments extracted from `sys.argv` named and unpacked to make it
clear what the arguments are and what they're used for.

The two input files were opened, but never explicitly closed. File usage
changed to use `with` statement to take care of this. At the same time,
ownership of the file-object is moved up to `run` function. Added option
to process to support file-like objects.

Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Mads Ynddal 
---
 scripts/simpletrace.py | 50 --
 1 file changed, 34 insertions(+), 16 deletions(-)

diff --git a/scripts/simpletrace.py b/scripts/simpletrace.py
index 5c230a1b74..283b5918a1 100755
--- a/scripts/simpletrace.py
+++ b/scripts/simpletrace.py
@@ -9,6 +9,7 @@
 #
 # For help see docs/devel/tracing.rst
 
+import sys
 import struct
 import inspect
 from tracetool import read_events, Event
@@ -51,7 +52,6 @@ def get_record(edict, idtoname, rechdr, fobj):
 try:
 event = edict[name]
 except KeyError as e:
-import sys
 sys.stderr.write('%s event is logged but is not declared ' \
  'in the trace events file, try using ' \
  'trace-events-all instead.\n' % str(e))
@@ -172,11 +172,28 @@ def end(self):
 pass
 
 def process(events, log, analyzer, read_header=True):
-"""Invoke an analyzer on each event in a log."""
+"""Invoke an analyzer on each event in a log.
+Args:
+events (file-object or list or str): events list or file-like object 
or file path as str to read event data from
+log (file-object or str): file-like object or file path as str to read 
log data from
+analyzer (Analyzer): Instance of Analyzer to interpret the event data
+read_header (bool, optional): Whether to read header data from the log 
data. Defaults to True.
+"""
+
 if isinstance(events, str):
-events = read_events(open(events, 'r'), events)
+with open(events, 'r') as f:
+events_list = read_events(f, events)
+elif isinstance(events, list):
+# Treat as a list of events already produced by tracetool.read_events
+events_list = events
+else:
+# Treat as an already opened file-object
+events_list = read_events(events, events.name)
+
+close_log = False
 if isinstance(log, str):
 log = open(log, 'rb')
+close_log = True
 
 if read_header:
 read_trace_header(log)
@@ -187,12 +204,12 @@ def process(events, log, analyzer, read_header=True):
 edict = {"dropped": dropped_event}
 idtoname = {dropped_event_id: "dropped"}
 
-for event in events:
+for event in events_list:
 edict[event.name] = event
 
 # If there is no header assume event ID mapping matches events list
 if not read_header:
-for event_id, event in enumerate(events):
+for event_id, event in enumerate(events_list):
 idtoname[event_id] = event.name
 
 def build_fn(analyzer, event):
@@ -225,24 +242,25 @@ def build_fn(analyzer, event):
 fn_cache[event_num](event, rec)
 analyzer.end()
 
+if close_log:
+log.close()
+
 def run(analyzer):
 """Execute an analyzer on a trace file given on the command-line.
 
 This function is useful as a driver for simple analysis scripts.  More
 advanced scripts will want to call process() instead."""
-import sys
-
-read_header = True
-if len(sys.argv) == 4 and sys.argv[1] == '--no-header':
-read_header = False
-del sys.argv[1]
-elif len(sys.argv) != 3:
-sys.stderr.write('usage: %s [--no-header]  ' \
- '\n' % sys.argv[0])
+
+try:
+# NOTE: See built-in `argparse` module for a more robust cli interface
+*no_header, trace_event_path, trace_file_path = sys.argv[1:]
+assert no_header == [] or no_header == ['--no-header'], 'Invalid 
no-header argument'
+except (AssertionError, ValueError):
+sys.stderr.write(f'usage: {sys.argv[0]} [--no-header]  
\n')
 sys.exit(1)
 
-events = read_events(open(sys.argv[1], 'r'), sys.argv[1])
-process(events, sys.argv[2], analyzer, read_header=read_header)
+with open(trace_event_path, 'r') as events_fobj, open(trace_file_path, 
'rb') as log_fobj:
+process(events_fobj, log_fobj, analyzer, read_header=not no_header)
 
 if __name__ == '__main__':
 class Formatter(Analyzer):
-- 
2.38.1




[PATCH v5 09/14] simpletrace: refactor to separate responsibilities

2023-09-26 Thread Mads Ynddal
From: Mads Ynddal 

Moved event_mapping and event_id_to_name down one level in the function
call-stack to keep variable instantiation and usage closer (`process`
and `run` has no use of the variables; `read_trace_records` does).

Instead of passing event_mapping and event_id_to_name to the bottom of
the call-stack, we move their use to `read_trace_records`. This
separates responsibility and ownership of the information.

`read_record` now just reads the arguments from the file-object by
knowning the total number of bytes. Parsing it to specific arguments is
moved up to `read_trace_records`.

Special handling of dropped events removed, as they can be handled
by the general code.

Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Mads Ynddal 
---
 scripts/simpletrace.py | 115 +++--
 1 file changed, 53 insertions(+), 62 deletions(-)

diff --git a/scripts/simpletrace.py b/scripts/simpletrace.py
index 7f514d1577..0826aef283 100755
--- a/scripts/simpletrace.py
+++ b/scripts/simpletrace.py
@@ -31,6 +31,7 @@
 
 log_header_fmt = '=QQQ'
 rec_header_fmt = '=QQII'
+rec_header_fmt_len = struct.calcsize(rec_header_fmt)
 
 class SimpleException(Exception):
 pass
@@ -43,35 +44,6 @@ def read_header(fobj, hfmt):
 raise SimpleException('Error reading header. Wrong filetype provided?')
 return struct.unpack(hfmt, hdr)
 
-def get_record(event_mapping, event_id_to_name, rechdr, fobj):
-"""Deserialize a trace record from a file into a tuple
-   (name, timestamp, pid, arg1, ..., arg6)."""
-event_id, timestamp_ns, length, pid = rechdr
-if event_id != dropped_event_id:
-name = event_id_to_name[event_id]
-try:
-event = event_mapping[name]
-except KeyError as e:
-raise SimpleException(
-f'{e} event is logged but is not declared in the trace events'
-'file, try using trace-events-all instead.'
-)
-
-rec = (name, timestamp_ns, pid)
-for type, name in event.args:
-if is_string(type):
-l = fobj.read(4)
-(len,) = struct.unpack('=L', l)
-s = fobj.read(len)
-rec = rec + (s,)
-else:
-(value,) = struct.unpack('=Q', fobj.read(8))
-rec = rec + (value,)
-else:
-(dropped_count,) = struct.unpack('=Q', fobj.read(8))
-rec = ("dropped", timestamp_ns, pid, dropped_count)
-return rec
-
 def get_mapping(fobj):
 (event_id, ) = struct.unpack('=Q', fobj.read(8))
 (len, ) = struct.unpack('=L', fobj.read(4))
@@ -79,10 +51,11 @@ def get_mapping(fobj):
 
 return (event_id, name)
 
-def read_record(event_mapping, event_id_to_name, fobj):
-"""Deserialize a trace record from a file into a tuple (event_num, 
timestamp, pid, arg1, ..., arg6)."""
-rechdr = read_header(fobj, rec_header_fmt)
-return get_record(event_mapping, event_id_to_name, rechdr, fobj)
+def read_record(fobj):
+"""Deserialize a trace record from a file into a tuple (event_num, 
timestamp, pid, args)."""
+event_id, timestamp_ns, record_length, record_pid = read_header(fobj, 
rec_header_fmt)
+args_payload = fobj.read(record_length - rec_header_fmt_len)
+return (event_id, timestamp_ns, record_pid, args_payload)
 
 def read_trace_header(fobj):
 """Read and verify trace file header"""
@@ -97,17 +70,28 @@ def read_trace_header(fobj):
 if log_version != 4:
 raise ValueError(f'Log format {log_version} not supported with this 
QEMU release!')
 
-def read_trace_records(event_mapping, event_id_to_name, fobj):
-"""Deserialize trace records from a file, yielding record tuples 
(event_num, timestamp, pid, arg1, ..., arg6).
-
-Note that `event_id_to_name` is modified if the file contains mapping 
records.
+def read_trace_records(events, fobj, read_header):
+"""Deserialize trace records from a file, yielding record tuples (event, 
event_num, timestamp, pid, arg1, ..., arg6).
 
 Args:
 event_mapping (str -> Event): events dict, indexed by name
-event_id_to_name (int -> str): event names dict, indexed by event ID
 fobj (file): input file
+read_header (bool): whether headers were read from fobj
 
 """
+frameinfo = inspect.getframeinfo(inspect.currentframe())
+dropped_event = Event.build("Dropped_Event(uint64_t num_events_dropped)",
+frameinfo.lineno + 1, frameinfo.filename)
+
+event_mapping = {e.name: e for e in events}
+event_mapping["dropped"] = dropped_event
+event_id_to_name = {dropped_event_id: "dropped"}
+
+# If there is no header assume event ID mapping matches events list
+if not read_header:
+for event_id, event in enumerate(events):
+event_id_to_name[event_id] = event.name
+
 while True:
 t = fobj.read(8)
 if len(t) == 0:
@@ -115,12 +99,35 @@ def read_trace_records(event_mapping, event_id_to_na

[PATCH v5 08/14] simpletrace: made Analyzer into context-manager

2023-09-26 Thread Mads Ynddal
From: Mads Ynddal 

Instead of explicitly calling `begin` and `end`, we can change the class
to use the context-manager paradigm. This is mostly a styling choice,
used in modern Python code. But it also allows for more advanced analyzers
to handle exceptions gracefully in the `__exit__` method (not
demonstrated here).

Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Mads Ynddal 
---
 scripts/simpletrace.py | 31 ---
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/scripts/simpletrace.py b/scripts/simpletrace.py
index 229b10aa99..7f514d1577 100755
--- a/scripts/simpletrace.py
+++ b/scripts/simpletrace.py
@@ -122,12 +122,13 @@ def read_trace_records(event_mapping, event_id_to_name, 
fobj):
 
 yield rec
 
-class Analyzer(object):
+class Analyzer:
 """A trace file analyzer which processes trace records.
 
 An analyzer can be passed to run() or process().  The begin() method is
 invoked, then each trace record is processed, and finally the end() method
-is invoked.
+is invoked. When Analyzer is used as a context-manager (using the `with`
+statement), begin() and end() are called automatically.
 
 If a method matching a trace event name exists, it is invoked to process
 that trace record.  Otherwise the catchall() method is invoked.
@@ -165,6 +166,15 @@ def end(self):
 """Called at the end of the trace."""
 pass
 
+def __enter__(self):
+self.begin()
+return self
+
+def __exit__(self, exc_type, exc_val, exc_tb):
+if exc_type is None:
+self.end()
+return False
+
 def process(events, log, analyzer, read_header=True):
 """Invoke an analyzer on each event in a log.
 Args:
@@ -226,15 +236,14 @@ def build_fn(analyzer, event):
 # Just arguments, no timestamp or pid
 return lambda _, rec: fn(*rec[3:3 + event_argcount])
 
-analyzer.begin()
-fn_cache = {}
-for rec in read_trace_records(event_mapping, event_id_to_name, log):
-event_num = rec[0]
-event = event_mapping[event_num]
-if event_num not in fn_cache:
-fn_cache[event_num] = build_fn(analyzer, event)
-fn_cache[event_num](event, rec)
-analyzer.end()
+with analyzer:
+fn_cache = {}
+for rec in read_trace_records(event_mapping, event_id_to_name, log):
+event_num = rec[0]
+event = event_mapping[event_num]
+if event_num not in fn_cache:
+fn_cache[event_num] = build_fn(analyzer, event)
+fn_cache[event_num](event, rec)
 
 if close_log:
 log.close()
-- 
2.38.1




[PATCH v5 01/14] simpletrace: add __all__ to define public interface

2023-09-26 Thread Mads Ynddal
From: Mads Ynddal 

It was unclear what was the supported public interface. I.e. when
refactoring the code, what functions/classes are important to retain.

Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Mads Ynddal 
---
 scripts/simpletrace.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/simpletrace.py b/scripts/simpletrace.py
index 1f6d1ae1f3..b221d9a241 100755
--- a/scripts/simpletrace.py
+++ b/scripts/simpletrace.py
@@ -14,6 +14,8 @@
 from tracetool import read_events, Event
 from tracetool.backend.simple import is_string
 
+__all__ = ['Analyzer', 'process', 'run']
+
 header_event_id = 0x
 header_magic= 0xf2b177cb0aa429b4
 dropped_event_id = 0xfffe
-- 
2.38.1




[PATCH v5 10/14] simpletrace: move logic of process into internal function

2023-09-26 Thread Mads Ynddal
From: Mads Ynddal 

To avoid duplicate code depending on input types and to better handle
open/close of log with a context-manager, we move the logic of process into
_process.

Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Mads Ynddal 
---
 scripts/simpletrace.py | 26 ++
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/scripts/simpletrace.py b/scripts/simpletrace.py
index 0826aef283..6969fdd59a 100755
--- a/scripts/simpletrace.py
+++ b/scripts/simpletrace.py
@@ -201,13 +201,26 @@ def process(events, log, analyzer, read_header=True):
 # Treat as an already opened file-object
 events_list = read_events(events, events.name)
 
-close_log = False
 if isinstance(log, str):
-log = open(log, 'rb')
-close_log = True
+with open(log, 'rb') as log_fobj:
+_process(events_list, log_fobj, analyzer, read_header)
+else:
+# Treat `log` as an already opened file-object. We will not close it,
+# as we do not own it.
+_process(events_list, log, analyzer, read_header)
+
+def _process(events, log_fobj, analyzer, read_header=True):
+"""Internal function for processing
+
+Args:
+events (list): list of events already produced by tracetool.read_events
+log_fobj (file): file-object to read log data from
+analyzer (Analyzer): the Analyzer to interpret the event data
+read_header (bool, optional): Whether to read header data from the log 
data. Defaults to True.
+"""
 
 if read_header:
-read_trace_header(log)
+read_trace_header(log_fobj)
 
 def build_fn(analyzer, event):
 if isinstance(event, str):
@@ -231,14 +244,11 @@ def build_fn(analyzer, event):
 
 with analyzer:
 fn_cache = {}
-for event, event_id, timestamp_ns, record_pid, *rec_args in 
read_trace_records(events, log, read_header):
+for event, event_id, timestamp_ns, record_pid, *rec_args in 
read_trace_records(events, log_fobj, read_header):
 if event_id not in fn_cache:
 fn_cache[event_id] = build_fn(analyzer, event)
 fn_cache[event_id](event, (event_id, timestamp_ns, record_pid, 
*rec_args))
 
-if close_log:
-log.close()
-
 def run(analyzer):
 """Execute an analyzer on a trace file given on the command-line.
 
-- 
2.38.1




[PATCH v5 12/14] simpletrace: added simplified Analyzer2 class

2023-09-26 Thread Mads Ynddal
From: Mads Ynddal 

By moving the dynamic argument construction to keyword-arguments,
we can remove all of the specialized handling, and streamline it.
If a tracing method wants to access these, they can define the
kwargs, or ignore it be placing `**kwargs` at the end of the
function's arguments list.

Added deprecation warning to Analyzer class to make users aware
of the Analyzer2 class. No removal date is planned.

Signed-off-by: Mads Ynddal 
---
 scripts/simpletrace.py | 98 --
 1 file changed, 75 insertions(+), 23 deletions(-)

diff --git a/scripts/simpletrace.py b/scripts/simpletrace.py
index 4136d00600..cef81b0707 100755
--- a/scripts/simpletrace.py
+++ b/scripts/simpletrace.py
@@ -12,10 +12,11 @@
 import sys
 import struct
 import inspect
+import warnings
 from tracetool import read_events, Event
 from tracetool.backend.simple import is_string
 
-__all__ = ['Analyzer', 'process', 'run']
+__all__ = ['Analyzer', 'Analyzer2', 'process', 'run']
 
 # This is the binary format that the QEMU "simple" trace backend
 # emits. There is no specification documentation because the format is
@@ -130,7 +131,9 @@ def read_trace_records(events, fobj, read_header):
 yield (event_mapping[event_name], event_name, timestamp_ns, pid) + 
tuple(args)
 
 class Analyzer:
-"""A trace file analyzer which processes trace records.
+"""[Deprecated. Refer to Analyzer2 instead.]
+
+A trace file analyzer which processes trace records.
 
 An analyzer can be passed to run() or process().  The begin() method is
 invoked, then each trace record is processed, and finally the end() method
@@ -188,6 +191,11 @@ def _build_fn(self, event):
 return lambda _, rec: fn(*rec[3:3 + event_argcount])
 
 def _process_event(self, rec_args, *, event, event_id, timestamp_ns, pid, 
**kwargs):
+warnings.warn(
+"Use of deprecated Analyzer class. Refer to Analyzer2 instead.",
+DeprecationWarning,
+)
+
 if not hasattr(self, '_fn_cache'):
 # NOTE: Cannot depend on downstream subclasses to have
 # super().__init__() because of legacy.
@@ -211,6 +219,56 @@ def __exit__(self, exc_type, exc_val, exc_tb):
 self.end()
 return False
 
+class Analyzer2(Analyzer):
+"""A trace file analyzer which processes trace records.
+
+An analyzer can be passed to run() or process().  The begin() method is
+invoked, then each trace record is processed, and finally the end() method
+is invoked. When Analyzer is used as a context-manager (using the `with`
+statement), begin() and end() are called automatically.
+
+If a method matching a trace event name exists, it is invoked to process
+that trace record.  Otherwise the catchall() method is invoked.
+
+The methods are called with a set of keyword-arguments. These can be 
ignored
+using `**kwargs` or defined like any keyword-argument.
+
+The following keyword-arguments are available, but make sure to have an
+**kwargs to allow for unmatched arguments in the future:
+event: Event object of current trace
+event_id: The id of the event in the current trace file
+timestamp_ns: The timestamp in nanoseconds of the trace
+pid: The process id recorded for the given trace
+
+Example:
+The following method handles the runstate_set(int new_state) trace event::
+
+  def runstate_set(self, new_state, **kwargs):
+  ...
+
+The method can also explicitly take a timestamp keyword-argument with the
+trace event arguments::
+
+  def runstate_set(self, new_state, *, timestamp_ns, **kwargs):
+  ...
+
+Timestamps have the uint64_t type and are in nanoseconds.
+
+The pid can be included in addition to the timestamp and is useful when
+dealing with traces from multiple processes:
+
+  def runstate_set(self, new_state, *, timestamp_ns, pid, **kwargs):
+  ...
+"""
+
+def catchall(self, *rec_args, event, timestamp_ns, pid, event_id, 
**kwargs):
+"""Called if no specific method for processing a trace event has been 
found."""
+pass
+
+def _process_event(self, rec_args, *, event, **kwargs):
+fn = getattr(self, event.name, self.catchall)
+fn(*rec_args, event=event, **kwargs)
+
 def process(events, log, analyzer, read_header=True):
 """Invoke an analyzer on each event in a log.
 Args:
@@ -278,30 +336,24 @@ def run(analyzer):
 process(events_fobj, log_fobj, analyzer, read_header=not no_header)
 
 if __name__ == '__main__':
-class Formatter(Analyzer):
+class Formatter2(Analyzer2):
 def __init__(self):
-self.last_timestamp = None
-
-def catchall(self, event, rec):
-timestamp = rec[1]
-if self.last_timestamp is None:
-self.last_timestamp = timestamp
-delta_ns = timestamp - self.last_timestamp
-self

[PATCH v5 06/14] simpletrace: improved error handling on struct unpack

2023-09-26 Thread Mads Ynddal
From: Mads Ynddal 

A failed call to `read_header` wouldn't be handled the same for the two
different code paths (one path would try to use `None` as a list).
Changed to raise exception to be handled centrally. This also allows for
easier unpacking, as errors has been filtered out.

Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Mads Ynddal 
---
 scripts/simpletrace.py | 41 -
 1 file changed, 16 insertions(+), 25 deletions(-)

diff --git a/scripts/simpletrace.py b/scripts/simpletrace.py
index 971b2a0f6a..8aea0d169b 100755
--- a/scripts/simpletrace.py
+++ b/scripts/simpletrace.py
@@ -37,26 +37,24 @@ def read_header(fobj, hfmt):
 hlen = struct.calcsize(hfmt)
 hdr = fobj.read(hlen)
 if len(hdr) != hlen:
-return None
+raise ValueError('Error reading header. Wrong filetype provided?')
 return struct.unpack(hfmt, hdr)
 
 def get_record(event_mapping, event_id_to_name, rechdr, fobj):
 """Deserialize a trace record from a file into a tuple
(name, timestamp, pid, arg1, ..., arg6)."""
-if rechdr is None:
-return None
-if rechdr[0] != dropped_event_id:
-event_id = rechdr[0]
+event_id, timestamp_ns, length, pid = rechdr
+if event_id != dropped_event_id:
 name = event_id_to_name[event_id]
-rec = (name, rechdr[1], rechdr[3])
 try:
 event = event_mapping[name]
 except KeyError as e:
-sys.stderr.write('%s event is logged but is not declared ' \
+sys.stderr.write(f'{e} event is logged but is not declared ' \
  'in the trace events file, try using ' \
- 'trace-events-all instead.\n' % str(e))
+ 'trace-events-all instead.\n')
 sys.exit(1)
 
+rec = (name, timestamp_ns, pid)
 for type, name in event.args:
 if is_string(type):
 l = fobj.read(4)
@@ -67,9 +65,8 @@ def get_record(event_mapping, event_id_to_name, rechdr, fobj):
 (value,) = struct.unpack('=Q', fobj.read(8))
 rec = rec + (value,)
 else:
-rec = ("dropped", rechdr[1], rechdr[3])
-(value,) = struct.unpack('=Q', fobj.read(8))
-rec = rec + (value,)
+(dropped_count,) = struct.unpack('=Q', fobj.read(8))
+rec = ("dropped", timestamp_ns, pid, dropped_count)
 return rec
 
 def get_mapping(fobj):
@@ -86,22 +83,16 @@ def read_record(event_mapping, event_id_to_name, fobj):
 
 def read_trace_header(fobj):
 """Read and verify trace file header"""
-header = read_header(fobj, log_header_fmt)
-if header is None:
-raise ValueError('Not a valid trace file!')
-if header[0] != header_event_id:
-raise ValueError('Not a valid trace file, header id %d != %d' %
- (header[0], header_event_id))
-if header[1] != header_magic:
-raise ValueError('Not a valid trace file, header magic %d != %d' %
- (header[1], header_magic))
-
-log_version = header[2]
+_header_event_id, _header_magic, log_version = read_header(fobj, 
log_header_fmt)
+if _header_event_id != header_event_id:
+raise ValueError(f'Not a valid trace file, header id 
{_header_event_id} != {header_event_id}')
+if _header_magic != header_magic:
+raise ValueError(f'Not a valid trace file, header magic 
{_header_magic} != {header_magic}')
+
 if log_version not in [0, 2, 3, 4]:
-raise ValueError('Unknown version of tracelog format!')
+raise ValueError(f'Unknown version {log_version} of tracelog format!')
 if log_version != 4:
-raise ValueError('Log format %d not supported with this QEMU release!'
- % log_version)
+raise ValueError(f'Log format {log_version} not supported with this 
QEMU release!')
 
 def read_trace_records(event_mapping, event_id_to_name, fobj):
 """Deserialize trace records from a file, yielding record tuples 
(event_num, timestamp, pid, arg1, ..., arg6).
-- 
2.38.1




[PATCH v5 05/14] simpletrace: update code for Python 3.11

2023-09-26 Thread Mads Ynddal
From: Mads Ynddal 

The call to `getargspec` was deprecated and in Python 3.11 it has been
removed in favor of `getfullargspec`. `getfullargspec` is compatible
with QEMU's requirement of at least Python version 3.6.

Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Mads Ynddal 
---
 scripts/simpletrace.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/simpletrace.py b/scripts/simpletrace.py
index 09511f624d..971b2a0f6a 100755
--- a/scripts/simpletrace.py
+++ b/scripts/simpletrace.py
@@ -221,7 +221,7 @@ def build_fn(analyzer, event):
 return analyzer.catchall
 
 event_argcount = len(event.args)
-fn_argcount = len(inspect.getargspec(fn)[0]) - 1
+fn_argcount = len(inspect.getfullargspec(fn)[0]) - 1
 if fn_argcount == event_argcount + 1:
 # Include timestamp as first argument
 return lambda _, rec: fn(*(rec[1:2] + rec[3:3 + event_argcount]))
-- 
2.38.1




[PATCH RFC V2 32/37] target/arm: Add support of *unrealize* ARMCPU during vCPU Hot-unplug

2023-09-26 Thread Salil Mehta via
vCPU Hot-unplug will result in QOM CPU object unrealization which will do away
with all the vCPU thread creations, allocations, registrations that happened
as part of the realization process. This change introduces the ARM CPU unrealize
function taking care of exactly that.

Note, initialized KVM vCPUs are not destroyed in host KVM but their Qemu context
is parked at the QEMU KVM layer.

Co-developed-by: Salil Mehta 
Signed-off-by: Salil Mehta 
Co-developed-by: Keqian Zhu 
Signed-off-by: Keqian Zhu 
Reported-by: Vishnu Pajjuri 
[VP: Identified CPU stall issue & suggested probable fix]
Signed-off-by: Salil Mehta 
---
 target/arm/cpu-qom.h   |   3 ++
 target/arm/cpu.c   | 101 +
 target/arm/cpu.h   |  13 ++
 target/arm/gdbstub.c   |   6 +++
 target/arm/helper.c|  25 ++
 target/arm/internals.h |   1 +
 target/arm/kvm64.c |   4 ++
 7 files changed, 153 insertions(+)

diff --git a/target/arm/cpu-qom.h b/target/arm/cpu-qom.h
index 514c22ced9..2503493710 100644
--- a/target/arm/cpu-qom.h
+++ b/target/arm/cpu-qom.h
@@ -54,6 +54,7 @@ struct ARMCPUClass {
 
 const ARMCPUInfo *info;
 DeviceRealize parent_realize;
+DeviceUnrealize parent_unrealize;
 ResettablePhases parent_phases;
 };
 
@@ -70,7 +71,9 @@ struct AArch64CPUClass {
 };
 
 void register_cp_regs_for_features(ARMCPU *cpu);
+void unregister_cp_regs_for_features(ARMCPU *cpu);
 void init_cpreg_list(ARMCPU *cpu);
+void destroy_cpreg_list(ARMCPU *cpu);
 
 /* Callback functions for the generic timer's timers. */
 void arm_gt_ptimer_cb(void *opaque);
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 3a2e7e64ee..93b00835bf 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -141,6 +141,16 @@ void arm_register_pre_el_change_hook(ARMCPU *cpu, 
ARMELChangeHookFn *hook,
 QLIST_INSERT_HEAD(&cpu->pre_el_change_hooks, entry, node);
 }
 
+void arm_unregister_pre_el_change_hooks(ARMCPU *cpu)
+{
+ARMELChangeHook *entry, *next;
+
+QLIST_FOREACH_SAFE(entry, &cpu->pre_el_change_hooks, node, next) {
+QLIST_REMOVE(entry, node);
+g_free(entry);
+}
+}
+
 void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook,
  void *opaque)
 {
@@ -152,6 +162,16 @@ void arm_register_el_change_hook(ARMCPU *cpu, 
ARMELChangeHookFn *hook,
 QLIST_INSERT_HEAD(&cpu->el_change_hooks, entry, node);
 }
 
+void arm_unregister_el_change_hooks(ARMCPU *cpu)
+{
+ARMELChangeHook *entry, *next;
+
+QLIST_FOREACH_SAFE(entry, &cpu->el_change_hooks, node, next) {
+QLIST_REMOVE(entry, node);
+g_free(entry);
+}
+}
+
 static void cp_reg_reset(gpointer key, gpointer value, gpointer opaque)
 {
 /* Reset a single ARMCPRegInfo register */
@@ -2244,6 +2264,85 @@ static void arm_cpu_realizefn(DeviceState *dev, Error 
**errp)
 acc->parent_realize(dev, errp);
 }
 
+static void arm_cpu_unrealizefn(DeviceState *dev)
+{
+ARMCPUClass *acc = ARM_CPU_GET_CLASS(dev);
+ARMCPU *cpu = ARM_CPU(dev);
+CPUARMState *env = &cpu->env;
+CPUState *cs = CPU(dev);
+bool has_secure;
+
+has_secure = cpu->has_el3 || arm_feature(env, ARM_FEATURE_M_SECURITY);
+
+/* rock 'n' un-roll, whatever happened in the arm_cpu_realizefn cleanly */
+cpu_address_space_destroy(cs, ARMASIdx_NS);
+
+if (cpu->tag_memory != NULL) {
+cpu_address_space_destroy(cs, ARMASIdx_TagNS);
+if (has_secure) {
+cpu_address_space_destroy(cs, ARMASIdx_TagS);
+}
+}
+
+if (has_secure) {
+cpu_address_space_destroy(cs, ARMASIdx_S);
+}
+
+destroy_cpreg_list(cpu);
+arm_cpu_unregister_gdb_regs(cpu);
+unregister_cp_regs_for_features(cpu);
+
+if (cpu->sau_sregion && arm_feature(env, ARM_FEATURE_M_SECURITY)) {
+g_free(env->sau.rbar);
+g_free(env->sau.rlar);
+}
+
+if (arm_feature(env, ARM_FEATURE_PMSA) &&
+arm_feature(env, ARM_FEATURE_V7) &&
+cpu->pmsav7_dregion) {
+if (arm_feature(env, ARM_FEATURE_V8)) {
+g_free(env->pmsav8.rbar[M_REG_NS]);
+g_free(env->pmsav8.rlar[M_REG_NS]);
+if (arm_feature(env, ARM_FEATURE_M_SECURITY)) {
+g_free(env->pmsav8.rbar[M_REG_S]);
+g_free(env->pmsav8.rlar[M_REG_S]);
+}
+} else {
+g_free(env->pmsav7.drbar);
+g_free(env->pmsav7.drsr);
+g_free(env->pmsav7.dracr);
+}
+if (cpu->pmsav8r_hdregion) {
+g_free(env->pmsav8.hprbar);
+g_free(env->pmsav8.hprlar);
+}
+}
+
+if (arm_feature(env, ARM_FEATURE_PMU)) {
+if (!kvm_enabled()) {
+arm_unregister_pre_el_change_hooks(cpu);
+arm_unregister_el_change_hooks(cpu);
+}
+
+#ifndef CONFIG_USER_ONLY
+if (cpu->pmu_timer) {
+timer_del(cpu->pmu_timer);
+}
+#endif
+}
+
+cpu_remove_sync(CPU(dev));
+acc->parent_unrealize(dev);

[PATCH RFC V2 33/37] target/arm/kvm: Write CPU state back to KVM on reset

2023-09-26 Thread Salil Mehta via
From: Jean-Philippe Brucker 

When a KVM vCPU is reset following a PSCI CPU_ON call, its power state
is not synchronized with KVM at the moment. Because the vCPU is not
marked dirty, we miss the call to kvm_arch_put_registers() that writes
to KVM's MP_STATE. Force mp_state synchronization.

Signed-off-by: Jean-Philippe Brucker 
Signed-off-by: Salil Mehta 
---
 target/arm/kvm.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index 0e1d0692b1..8e7c68af6a 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -614,11 +614,12 @@ void kvm_arm_cpu_post_load(ARMCPU *cpu)
 void kvm_arm_reset_vcpu(ARMCPU *cpu)
 {
 int ret;
+CPUState *cs = CPU(cpu);
 
 /* Re-init VCPU so that all registers are set to
  * their respective reset values.
  */
-ret = kvm_arm_vcpu_init(CPU(cpu));
+ret = kvm_arm_vcpu_init(cs);
 if (ret < 0) {
 fprintf(stderr, "kvm_arm_vcpu_init failed: %s\n", strerror(-ret));
 abort();
@@ -635,6 +636,12 @@ void kvm_arm_reset_vcpu(ARMCPU *cpu)
  * for the same reason we do so in kvm_arch_get_registers().
  */
 write_list_to_cpustate(cpu);
+
+/*
+ * Ensure we call kvm_arch_put_registers(). The vCPU isn't marked dirty if
+ * it was parked in KVM and is now booting from a PSCI CPU_ON call.
+ */
+cs->vcpu_dirty = true;
 }
 
 void kvm_arm_create_host_vcpu(ARMCPU *cpu)
-- 
2.34.1




Re: [PATCH 02/12] hw/i386/intel_iommu: vtd_slpte_nonzero_rsvd(): reduce magic numbers

2023-09-26 Thread Peter Maydell
On Mon, 25 Sept 2023 at 20:41, Vladimir Sementsov-Ogievskiy
 wrote:
>
> Add a constant and clear assertion. The assertion also tells Coverity
> that we are not going to overflow the array.
>
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> ---
>  hw/i386/intel_iommu.c | 11 ---
>  1 file changed, 8 insertions(+), 3 deletions(-)
>
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index c0ce896668..2233dbe13a 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -1028,12 +1028,17 @@ static dma_addr_t 
> vtd_get_iova_pgtbl_base(IntelIOMMUState *s,
>   * vtd_spte_rsvd 4k pages
>   * vtd_spte_rsvd_large large pages
>   */
> -static uint64_t vtd_spte_rsvd[5];
> -static uint64_t vtd_spte_rsvd_large[5];
> +#define VTD_SPTE_RSVD_LEN 5
> +static uint64_t vtd_spte_rsvd[VTD_SPTE_RSVD_LEN];
> +static uint64_t vtd_spte_rsvd_large[VTD_SPTE_RSVD_LEN];
>
>  static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level)
>  {
> -uint64_t rsvd_mask = vtd_spte_rsvd[level];
> +uint64_t rsvd_mask;
> +
> +assert(level < VTD_SPTE_RSVD_LEN);
> +
> +rsvd_mask = vtd_spte_rsvd[level];


Looking at the code it is not clear to me why this assertion is
valid. It looks like we are picking up fields from guest-set
configuration (probably in-memory data structures). So we can't
assert() here -- we need to do whatever the real hardware does
if these fields are set to an incorrect value, or at least something
sensible that doesn't crash QEMU.

thanks
-- PMM



[PATCH RFC V2 34/37] target/arm/kvm, tcg: Register/Handle SMCCC hypercall exits to VMM/Qemu

2023-09-26 Thread Salil Mehta via
From: Author Salil Mehta 

Add registration and Handling of HVC/SMC hypercall exits to VMM

Co-developed-by: Salil Mehta 
Signed-off-by: Salil Mehta 
Co-developed-by: Jean-Philippe Brucker 
Signed-off-by: Jean-Philippe Brucker 
Signed-off-by: Salil Mehta 
---
 target/arm/arm-powerctl.c   | 51 +---
 target/arm/helper.c |  2 +-
 target/arm/internals.h  | 11 
 target/arm/kvm.c| 52 +
 target/arm/kvm64.c  | 46 +---
 target/arm/kvm_arm.h| 13 ++
 target/arm/meson.build  |  1 +
 target/arm/{tcg => }/psci.c |  8 ++
 target/arm/tcg/meson.build  |  4 ---
 9 files changed, 160 insertions(+), 28 deletions(-)
 rename target/arm/{tcg => }/psci.c (97%)

diff --git a/target/arm/arm-powerctl.c b/target/arm/arm-powerctl.c
index 326a03153d..0184c7fb09 100644
--- a/target/arm/arm-powerctl.c
+++ b/target/arm/arm-powerctl.c
@@ -16,6 +16,7 @@
 #include "qemu/log.h"
 #include "qemu/main-loop.h"
 #include "sysemu/tcg.h"
+#include "hw/boards.h"
 
 #ifndef DEBUG_ARM_POWERCTL
 #define DEBUG_ARM_POWERCTL 0
@@ -28,18 +29,37 @@
 } \
 } while (0)
 
+static CPUArchId *arm_get_archid_by_id(uint64_t id)
+{
+int n;
+CPUArchId *arch_id;
+MachineState *ms = MACHINE(qdev_get_machine());
+
+/*
+ * At this point disabled CPUs don't have a CPUState, but their CPUArchId
+ * exists.
+ *
+ * TODO: Is arch_id == mp_affinity? This needs work.
+ */
+for (n = 0; n < ms->possible_cpus->len; n++) {
+arch_id = &ms->possible_cpus->cpus[n];
+
+if (arch_id->arch_id == id) {
+return arch_id;
+}
+}
+return NULL;
+}
+
 CPUState *arm_get_cpu_by_id(uint64_t id)
 {
-CPUState *cpu;
+CPUArchId *arch_id;
 
 DPRINTF("cpu %" PRId64 "\n", id);
 
-CPU_FOREACH(cpu) {
-ARMCPU *armcpu = ARM_CPU(cpu);
-
-if (armcpu->mp_affinity == id) {
-return cpu;
-}
+arch_id = arm_get_archid_by_id(id);
+if (arch_id && arch_id->cpu) {
+return CPU(arch_id->cpu);
 }
 
 qemu_log_mask(LOG_GUEST_ERROR,
@@ -148,6 +168,7 @@ int arm_set_cpu_on(uint64_t cpuid, uint64_t entry, uint64_t 
context_id,
 {
 CPUState *target_cpu_state;
 ARMCPU *target_cpu;
+CPUArchId *arch_id;
 struct CpuOnInfo *info;
 
 assert(qemu_mutex_iothread_locked());
@@ -168,12 +189,24 @@ int arm_set_cpu_on(uint64_t cpuid, uint64_t entry, 
uint64_t context_id,
 }
 
 /* Retrieve the cpu we are powering up */
-target_cpu_state = arm_get_cpu_by_id(cpuid);
-if (!target_cpu_state) {
+arch_id = arm_get_archid_by_id(cpuid);
+if (!arch_id) {
 /* The cpu was not found */
 return QEMU_ARM_POWERCTL_INVALID_PARAM;
 }
 
+target_cpu_state = CPU(arch_id->cpu);
+if (!qemu_enabled_cpu(target_cpu_state)) {
+/*
+ * The cpu is not plugged in or disabled. We should return appropriate
+ * value as introduced in DEN0022E PSCI 1.2 issue E
+ */
+qemu_log_mask(LOG_GUEST_ERROR,
+  "[ARM]%s: Denying attempt to online removed/disabled "
+  "CPU%" PRId64"\n", __func__, cpuid);
+return QEMU_ARM_POWERCTL_IS_OFF;
+}
+
 target_cpu = ARM_CPU(target_cpu_state);
 if (target_cpu->power_state == PSCI_ON) {
 qemu_log_mask(LOG_GUEST_ERROR,
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 272d6ba139..4d396426f2 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -11187,7 +11187,7 @@ void arm_cpu_do_interrupt(CPUState *cs)
   env->exception.syndrome);
 }
 
-if (tcg_enabled() && arm_is_psci_call(cpu, cs->exception_index)) {
+if (arm_is_psci_call(cpu, cs->exception_index)) {
 arm_handle_psci_call(cpu);
 qemu_log_mask(CPU_LOG_INT, "...handled as PSCI call\n");
 return;
diff --git a/target/arm/internals.h b/target/arm/internals.h
index fe330e89e7..7ffefc2d58 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -305,21 +305,10 @@ vaddr arm_adjust_watchpoint_address(CPUState *cs, vaddr 
addr, int len);
 /* Callback function for when a watchpoint or breakpoint triggers. */
 void arm_debug_excp_handler(CPUState *cs);
 
-#if defined(CONFIG_USER_ONLY) || !defined(CONFIG_TCG)
-static inline bool arm_is_psci_call(ARMCPU *cpu, int excp_type)
-{
-return false;
-}
-static inline void arm_handle_psci_call(ARMCPU *cpu)
-{
-g_assert_not_reached();
-}
-#else
 /* Return true if the r0/x0 value indicates that this SMC/HVC is a PSCI call. 
*/
 bool arm_is_psci_call(ARMCPU *cpu, int excp_type);
 /* Actually handle a PSCI call */
 void arm_handle_psci_call(ARMCPU *cpu);
-#endif
 
 /**
  * arm_clear_exclusive: clear the exclusive monitor
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index 8e7c68af6a..6f3fd5aecd 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -250,6 +250,7 @@ int kvm_arm_g

[PATCH RFC V2 35/37] hw/arm: Support hotplug capability check using _OSC method

2023-09-26 Thread Salil Mehta via
Physical CPU hotplug results in (un)setting of ACPI _STA.Present bit. AARCH64
platforms do not support physical CPU hotplug. Virtual CPU hotplug support being
implemented toggles ACPI _STA.Enabled Bit to achieve hotplug functionality. This
is not same as physical CPU hotplug support.

In future, if ARM architecture supports physical CPU hotplug then the current
design of virtual CPU hotplug can be used unchanged. Hence, there is a need for
firmware/VMM/Qemu to support evaluation of platform wide capabilitiy related to
the *type* of CPU hotplug support present on the platform. OSPM might need this
during boot time to correctly initialize the CPUs and other related components
in the kernel.

NOTE: This implementation will be improved to add the support of *query* in the
subsequent versions. This is very minimal support to assist kernel.

ASL for the implemented _OSC method:

Method (_OSC, 4, NotSerialized)  // _OSC: Operating System Capabilities
{
CreateDWordField (Arg3, Zero, CDW1)
If ((Arg0 == ToUUID ("0811b06e-4a27-44f9-8d60-3cbbc22e7b48") /* 
Platform-wide Capabilities */))
{
CreateDWordField (Arg3, 0x04, CDW2)
Local0 = CDW2 /* \_SB_._OSC.CDW2 */
If ((Arg1 != One))
{
CDW1 |= 0x08
}

Local0 &= 0x0080
If ((CDW2 != Local0))
{
CDW1 |= 0x10
}

CDW2 = Local0
}
Else
{
CDW1 |= 0x04
}

Return (Arg3)
}

Signed-off-by: Salil Mehta 
---
 hw/arm/virt-acpi-build.c | 52 
 1 file changed, 52 insertions(+)

diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index cbccd2ca2d..377450dd16 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -861,6 +861,55 @@ static void build_fadt_rev6(GArray *table_data, BIOSLinker 
*linker,
 build_fadt(table_data, linker, &fadt, vms->oem_id, vms->oem_table_id);
 }
 
+static void build_virt_osc_method(Aml *scope, VirtMachineState *vms)
+{
+Aml *if_uuid, *else_uuid, *if_rev, *if_caps_masked, *method;
+Aml *a_cdw1 = aml_name("CDW1");
+Aml *a_cdw2 = aml_local(0);
+
+method = aml_method("_OSC", 4, AML_NOTSERIALIZED);
+aml_append(method, aml_create_dword_field(aml_arg(3), aml_int(0), "CDW1"));
+
+/* match UUID */
+if_uuid = aml_if(aml_equal(
+aml_arg(0), aml_touuid("0811B06E-4A27-44F9-8D60-3CBBC22E7B48")));
+
+aml_append(if_uuid, aml_create_dword_field(aml_arg(3), aml_int(4), 
"CDW2"));
+aml_append(if_uuid, aml_store(aml_name("CDW2"), a_cdw2));
+
+/* check unknown revision in arg(1) */
+if_rev = aml_if(aml_lnot(aml_equal(aml_arg(1), aml_int(1;
+/* set revision error bits,  DWORD1 Bit[3] */
+aml_append(if_rev, aml_or(a_cdw1, aml_int(0x08), a_cdw1));
+aml_append(if_uuid, if_rev);
+
+/*
+ * check support for vCPU hotplug type(=enabled) platform-wide capability
+ * in DWORD2 as sepcified in the below ACPI Specification ECR,
+ *  # https://bugzilla.tianocore.org/show_bug.cgi?id=4481
+ */
+if (vms->acpi_dev) {
+aml_append(if_uuid, aml_and(a_cdw2, aml_int(0x80), a_cdw2));
+/* check if OSPM specified hotplug capability bits were masked */
+if_caps_masked = aml_if(aml_lnot(aml_equal(aml_name("CDW2"), a_cdw2)));
+aml_append(if_caps_masked, aml_or(a_cdw1, aml_int(0x10), a_cdw1));
+aml_append(if_uuid, if_caps_masked);
+}
+aml_append(if_uuid, aml_store(a_cdw2, aml_name("CDW2")));
+
+aml_append(method, if_uuid);
+else_uuid = aml_else();
+
+/* set unrecognized UUID error bits, DWORD1 Bit[2] */
+aml_append(else_uuid, aml_or(a_cdw1, aml_int(4), a_cdw1));
+aml_append(method, else_uuid);
+
+aml_append(method, aml_return(aml_arg(3)));
+aml_append(scope, method);
+
+return;
+}
+
 /* DSDT */
 static void
 build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
@@ -894,6 +943,9 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, 
VirtMachineState *vms)
 } else {
 acpi_dsdt_add_cpus(scope, vms);
 }
+
+build_virt_osc_method(scope, vms);
+
 acpi_dsdt_add_uart(scope, &memmap[VIRT_UART],
(irqmap[VIRT_UART] + ARM_SPI_BASE));
 if (vmc->acpi_expose_flash) {
-- 
2.34.1




[PATCH RFC V2 36/37] tcg/mttcg: enable threads to unregister in tcg_ctxs[]

2023-09-26 Thread Salil Mehta via
From: Miguel Luis 

[This patch is just for reference. It has problems as it does not takes care of
the TranslationBlocks and their assigned regions during CPU unrealize]

When using TCG acceleration in a multi-threaded context each vCPU has its own
thread registered in tcg_ctxs[] upon creation and tcg_cur_ctxs stores the 
current
number of threads that got created. Although, the lack of a mechanism to
unregister these threads is a problem when exercising vCPU hotplug/unplug
due to the fact that tcg_cur_ctxs gets incremented everytime a vCPU gets
hotplugged but never gets decremented everytime a vCPU gets unplugged, therefore
breaking the assert stating tcg_cur_ctxs < tcg_max_ctxs after a certain amount
of vCPU hotplugs.

Suggested-by: Salil Mehta 
[SM: Check Things To Do Section, 
https://lore.kernel.org/all/20200613213629.21984-1-salil.me...@huawei.com/]
Signed-off-by: Miguel Luis 
---
 accel/tcg/tcg-accel-ops-mttcg.c |  1 +
 include/tcg/tcg.h   |  1 +
 tcg/tcg.c   | 23 +++
 3 files changed, 25 insertions(+)

diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c
index b276262007..5cf9747ef2 100644
--- a/accel/tcg/tcg-accel-ops-mttcg.c
+++ b/accel/tcg/tcg-accel-ops-mttcg.c
@@ -127,6 +127,7 @@ static void *mttcg_cpu_thread_fn(void *arg)
 qemu_mutex_unlock_iothread();
 rcu_remove_force_rcu_notifier(&force_rcu.notifier);
 rcu_unregister_thread();
+tcg_unregister_thread();
 return NULL;
 }
 
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index 0875971719..6c1cd2a618 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -785,6 +785,7 @@ static inline void *tcg_malloc(int size)
 
 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus);
 void tcg_register_thread(void);
+void tcg_unregister_thread(void);
 void tcg_prologue_init(TCGContext *s);
 void tcg_func_start(TCGContext *s);
 
diff --git a/tcg/tcg.c b/tcg/tcg.c
index ddfe9a96cb..6760f40823 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -742,6 +742,14 @@ static void alloc_tcg_plugin_context(TCGContext *s)
 #endif
 }
 
+static void free_tcg_plugin_context(TCGContext *s)
+{
+#ifdef CONFIG_PLUGIN
+g_ptr_array_unref(s->plugin_tb->insns);
+g_free(s->plugin_tb);
+#endif
+}
+
 /*
  * All TCG threads except the parent (i.e. the one that called tcg_context_init
  * and registered the target's TCG globals) must register with this function
@@ -791,6 +799,21 @@ void tcg_register_thread(void)
 
 tcg_ctx = s;
 }
+
+void tcg_unregister_thread(void)
+{
+TCGContext *s = tcg_ctx;
+unsigned int n;
+
+/* Unclaim an entry in tcg_ctxs */
+n = qatomic_fetch_dec(&tcg_cur_ctxs);
+g_assert(n > 1);
+qatomic_store_release(&tcg_ctxs[n - 1], 0);
+
+free_tcg_plugin_context(s);
+
+g_free(s);
+}
 #endif /* !CONFIG_USER_ONLY */
 
 /* pool based memory allocation */
-- 
2.34.1




[PATCH RFC V2 37/37] hw/arm/virt: Expose cold-booted CPUs as MADT GICC Enabled

2023-09-26 Thread Salil Mehta via
Hotpluggable CPUs MUST be exposed as 'online-capable' as per the new change. But
cold booted CPUs if made 'online-capable' during boot time might not get
detected in the legacy OS. Hence, can cause compatibility problems.

Original Change Link: https://bugzilla.tianocore.org/show_bug.cgi?id=3706

Specification change might take time and hence disabling the support of
unplugging any cold booted CPUs to preserve the compatibility with legacy OS.

Signed-off-by: Salil Mehta 
---
 hw/arm/virt-acpi-build.c | 19 ++-
 hw/arm/virt.c| 16 
 include/hw/core/cpu.h|  2 ++
 3 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 377450dd16..879c83a337 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -710,17 +710,26 @@ static uint32_t virt_acpi_get_gicc_flags(CPUState *cpu)
 }
 
 /*
- * ARM GIC CPU Interface can be 'online-capable' or 'enabled' at boot
- * We MUST set 'online-capable' Bit for all hotpluggable CPUs except the
- * first/boot CPU. Cold-booted CPUs without 'Id' can also be unplugged.
- * Though as-of-now this is only used as a debugging feature.
+ * ARM GIC CPU Interface can be 'online-capable' or 'enabled' at boot. We
+ * MUST set 'online-capable' bit for all hotpluggable CPUs.
+ * Change Link: https://bugzilla.tianocore.org/show_bug.cgi?id=3706
  *
  *   UEFI ACPI Specification 6.5
  *   Section: 5.2.12.14. GIC CPU Interface (GICC) Structure
  *   Table:   5.37 GICC CPU Interface Flags
  *   Link: https://uefi.org/specs/ACPI/6.5
+ *
+ * Cold-booted CPUs, except for the first/boot CPU, SHOULD be allowed to be
+ * hot(un)plug as well but for this to happen these MUST have
+ * 'online-capable' bit set. Later creates compatibility problem with 
legacy
+ * OS as it might ignore online-capable' bits during boot time and hence
+ * some CPUs might not get detected. To fix this MADT GIC CPU interface 
flag
+ * should be allowed to have both bits set i.e. 'online-capable' and
+ * 'Enabled' bits together. This change will require UEFI ACPI standard
+ * change. Till this happens exposing all cold-booted CPUs as 'enabled' 
only
+ *
  */
-return cpu && !cpu->cpu_index ? 1 : (1 << 3);
+return cpu && cpu->cold_booted ? 1 : (1 << 3);
 }
 
 static void
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index e46f529801..3bfe9b9db3 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -3151,6 +3151,10 @@ static void virt_cpu_pre_plug(HotplugHandler 
*hotplug_dev, DeviceState *dev,
  * This shall be used during the init of ACPI Hotplug state and hot-unplug
  */
  cs->acpi_persistent = true;
+
+if (!dev->hotplugged) {
+cs->cold_booted = true;
+}
 }
 
 static void virt_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
@@ -3214,6 +3218,18 @@ static void virt_cpu_unplug_request(HotplugHandler 
*hotplug_dev,
 return;
 }
 
+/*
+ * UEFI ACPI standard change is required to make both 'enabled' and the
+ * 'online-capable' bit co-exist instead of being mutually exclusive.
+ * check virt_acpi_get_gicc_flags() for more details.
+ *
+ * Disable the unplugging of cold-booted vCPUs as a temporary mitigation.
+ */
+if (cs->cold_booted) {
+error_setg(errp, "Hot-unplug of cold-booted CPU not supported!");
+return;
+}
+
 if (cs->cpu_index == first_cpu->cpu_index) {
 error_setg(errp, "Boot CPU(id%d=%d:%d:%d:%d) hot-unplug not supported",
first_cpu->cpu_index, cpu->socket_id, cpu->cluster_id,
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index ffd815a0d8..f6b92a3285 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -441,6 +441,8 @@ struct CPUState {
 uint32_t can_do_io;
 int32_t exception_index;
 
+bool cold_booted;
+
 AccelCPUState *accel;
 /* shared by kvm, hax and hvf */
 bool vcpu_dirty;
-- 
2.34.1




Re: [PATCH v4 12/14] simpletrace: added simplified Analyzer2 class

2023-09-26 Thread Mads Ynddal


>> +class Formatter2(Analyzer2):
> 
> Was this class part of the benchmark? It appears to be unused.
> 
>> +def __init__(self):
>> +self.last_timestamp_ns = None
>> +
>> +def catchall(self, *rec_args, event, timestamp_ns, pid, event_id):
>> +if self.last_timestamp_ns is None:
>> +self.last_timestamp_ns = timestamp_ns
>> +delta_ns = timestamp_ns - self.last_timestamp_ns
>> +self.last_timestamp_ns = timestamp_ns
>> +
>> +fields = [
>> +f'{name}={r}' if is_string(type) else f'{name}=0x{r:x}'
>> +for r, (type, name) in zip(rec_args, event.args)
>> +]
>> +print(f'{event.name} {delta_ns / 1000:0.3f} {pid=} ' + ' 
>> '.join(fields))
>> +
>> try:
>> run(Formatter())
>> except SimpleException as e:

It was indeed, but was left in as a usage example. I have decided to keep
Formatter2 in favor of Formatter, as Formatter is based on the Analyzer class
that this patchset is deprecating.


  1   2   3   4   >